⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fastmcd.m

📁 SVDD的工具箱
💻 M
📖 第 1 页 / 共 5 页
字号:
end         data=(data-repmat(med,n,1))./repmat(mad,n,1);% The standardized classical estimates are now computed.clmean=mean(data);clcov=cov(data);% The univariate non-classical case is now handled.if p==1 & h~=n   if ~lts      res.method=sprintf('\nUnivariate location and scale estimation.');   end      [raw.center,raw.cov]=mcduni(data,n,h,n-h+1,alpha);   scale=raw.cov./sqrt(rawconsfactor(h,n,p)*rawcorfactor(p,n,alpha));   sor=sort((data-raw.center).^2);   raw.objective=1/(h-1)*sum(sor(1:h))*prod(mad)^2;   %m=2*n/asvardiag(h,n,p);   %quantile=qf(0.975,p,m-p+1);   quantile=chi2q(p);
   %weights=((data-raw.center)/scale).^2*(m-p+1)/(m*p)<quantile;   weights=((data-raw.center)/scale).^2<quantile;
   raw.wt=weights;   [res.center,res.cov]=weightmecov(data,weights,n,p);   factor=rewconsfactor(weights,n,p);   factor=factor*rewcorfactor(p,n,alpha);   res.cov=factor*res.cov;   mah=(data-res.center).^2/res.cov;   mah_raw=(data-raw.center).^2/raw.cov;   res.flag= mah <= chi2q(1);   [raw.cov,raw.center]=trafo(raw.cov,raw.center,med,mad,p);   [res.cov,res.center]=trafo(res.cov,res.center,med,mad,p);      res.mahalanobis=abs(data'-clmean)/sqrt(clcov);   raw.robdist=sqrt(mah_raw');   res.robdist=sqrt(mah');   if ~lts      disp(res.method);   end      spec.ask=1;   if ~lts      plotmcd(res,spec);   end   returnendif det(clascov) < exp(-50*p)   % all observations lie on a hyperplane.   [z, eigvl]=eigs(clcov,1,0,struct('disp',0));   res.plane=z;   weights(1:n)=1;   if cor      correl=clcov./(sqrt(diag(clcov))*sqrt(diag(clcov))');   end   [clcov,clmean]=trafo(clcov,clmean,med,mad,p);   [raw.center,raw.cov,res.center,res.cov,raw.objective,raw.wt,res.flag,...   res.method]=displ(1,n,weights,n,p,clmean,clcov,res.method,z./mad',ok,...                     raw.wt,res.flag,file,fid,cor,correl);      if cor      [res.cor,raw.cor]=deal(correl);   end                                    returnend% The classical case is now handled.if h==n   if ~lts      msg=sprintf('The MCD estimates based on %g observations are equal to the classical estimates.\n',h);      res.method=strvcat(res.method,msg);   end      raw.center=clmean;   raw.cov=clcov;   raw.objective=det(clcov);   mah=mahalanobis(data,clmean,clcov,n,p);   res.mahalanobis=sqrt(mah);   raw.robdist=res.mahalanobis;   weights=mah <= chi2q(p);   raw.wt=weights;   [res.center,res.cov]=weightmecov(data,weights,n,p)   if cor      raw.cor=raw.cov./(sqrt(diag(raw.cov))*sqrt(diag(raw.cov))');      res.cor=res.cov./(sqrt(diag(res.cov))*sqrt(diag(res.cov))');   end   if det(res.cov) < exp(-50*p)      [center,covar,z,correl,plane,count]=fit(data,NaN,med,mad,p,z,cor,res.center,res.cov,n);        res.plane=z;      if cor         correl=covar./(sqrt(diag(covar))*sqrt(diag(covar))');      end       res.method=displrw(count,n,p,center,covar,res.method,file,z,fid,cor,correl);         [raw.cov,raw.center]=trafo(raw.cov,raw.center,med,mad,p);      [res.cov,res.center]=trafo(res.cov,res.center,med,mad,p);      res.robdist=raw.robdist;   else      mah=mahalanobis(data,res.center,res.cov,n,p);      weights=mah <= chi2q(p);      [raw.cov,raw.center]=trafo(raw.cov,raw.center,med,mad,p);      [res.cov,res.center]=trafo(res.cov,res.center,med,mad,p);         res.robdist=sqrt(mah);   end      raw.objective=raw.objective*prod(mad)^2;   res.flag=weights;   if ~lts      disp(res.method)   end         spec.ask=1;   if ~lts      plotmcd(res,spec);   end   returnendpercent=h/n;%  If n >= 2*nmini the dataset will be divided into subdatasets.  For n < 2*nmini the set %  will be treated as a whole. if n >= 2*nmini      maxobs=maxgroup*nmini;   if n >= maxobs      ngroup=maxgroup;      group(1:maxgroup)=nmini;   else      ngroup=floor(n/nmini);      minquan=floor(n/ngroup);      group(1)=minquan;      for s=2:ngroup         group(s)=minquan+double(rem(n,ngroup)>=s-1);      end   end   part=1;     adjh=floor(group(1)*percent);   nsamp=floor(ntrial/ngroup);   minigr=sum(group);      obsingroup=fillgroup(n,group,ngroup,seed,fid);   % obsingroup : i-th row contains the observations of the i-th group.     % The last row (ngroup+1-th) contains the observations for the 2nd stage    % of the algorithm.else       [part,group,ngroup,adjh,minigr,obsingroup]=deal(0,n,1,h,n,n);   replow=[50,22,17,15,14,zeros(1,45)];   if n < replow(p)      % All (p+1)-subsets will be considered.       allsubset=1;      perm=[1:p,p];      nsamp=nchoosek(n,p+1);   else      allsubset=0;      nsamp=ntrial;   end   end   % some further initialisations. csteps=csteps1;inplane=NaN;% tottimes : the total number of iteration steps.% fine     : becomes 1 when the subdatasets are merged. % final    : becomes 1 for the final stage of the algorithm.     [tottimes,fine,final,prevdet]=deal(0);if part   % bmean1 : contains, for the first stage of the algorithm, the means of the ngroup*10    %          best estimates.    % bcov1  : analogous to bmean1, but now for the covariance matrices.    % bobj1  : analogous to bmean1, but now for the objective values.   % coeff1 : if in the k-th subdataset there are at least adjh observations that lie on     %          a hyperplane then the coefficients of this plane will be stored in the    %          k-th column of coeff1.   coeff1=repmat(NaN,p,ngroup);   bobj1=repmat(inf,ngroup,10);     bmean1=cell(ngroup,10);	bcov1=cell(ngroup,10);	[bmean1{:}]=deal(NaN);	[bcov1{:}]=deal(NaN);end% bmean : contains the means of the ten best estimates obtained in the second stage of the%         algorithm. % bcov  : analogous to bmean, but now for the covariance matrices.% bobj  : analogous to bmean, but now for the objective values.% coeff : analogous to coeff1, but now for the merged subdataset. % If the data is not split up, the 10 best estimates obtained after csteps1 iterations % will be stored in bmean, bcov and bobj.coeff=repmat(NaN,p,1);bobj=repmat(inf,1,10);bmean=cell(1,10);bcov=cell(1,10);[bmean{:}]=deal(NaN);[bcov{:}]=deal(NaN);      seed=0;while final~=2         if fine | (~part & final)            nsamp=10;             	if final                                      adjh=h;         ngroup=1;                        if n*p <= 1e+5            csteps=csteps3;         elseif n*p <=1e+6         	csteps=10-(ceil(n*p/1e+5)-2);      	else         	csteps=1;      	end                  if n > 5000         	nsamp=1;         end                         else                               adjh=floor(minigr*percent);         csteps=csteps2;               end         end      % found : becomes 1 if we have a singular intermediate MCD estimate.    found=0;      for k=1:ngroup            if ~fine         found=0;      end      		for i=1:nsamp                  tottimes=tottimes+1;                  % ns becomes 1 if we have a singular trial subsample and if there are at          % least adjh observations in the subdataset that lie on the concerning hyperplane.           % In that case we don't have to take C-steps. The determinant is zero which is           % already the lowest possible value. If ns=1, no C-steps will be taken and we          % start with the next sample. If we, for the considered subdataset, haven't          % already found a singular MCD estimate, then the results must be first stored in          % bmean, bcov, bobj or in bmean1, bcov1 and bobj1.  If we, however, already found          % a singular result for that subdataset, then the results won't be stored          % (the hyperplane we just found is probably the same as the one we found earlier.          % We then let adj be zero. This will guarantee us that the results won't be          % stored) and we start immediately with the next sample.          adj=1;         ns=0;                  % For the second and final stage of the algorithm the array sortdist(1:adjh)          % contains the indices of the observations corresponding to the adjh observations          % with minimal relative distances with respect to the best estimates of the          % previous stage. An exception to this, is when the estimate of the previous          % stage is singular.  For the second stage we then distinguish two cases :           %          % 1. There aren't adjh observations in the merged set that lie on the hyperplane.          %    The observations on the hyperplane are then extended to adjh observations by          %    adding the observations of the merged set with smallest orthogonal distances          %    to that hyperplane.           % 2. There are adjh or more observations in the merged set that lie on the          %    hyperplane. We distinguish two cases. We haven't or have already found such          %    a hyperplane. In the first case we start with a new sample.  But first, we          %    store the results in bmean1, bcov1 and bobj1. In the second case we          %    immediately start with a new sample.         %         % For the final stage we do the same as 1. above (if we had h or more observations          % on the hyperplane we would already have found it).         if final            if ~isinf(bobj(i))            	meanvct=bmean{i};            	covmat=bcov{i};            	if bobj(i)==0                    [dis,sortdist]=sort(abs(sum((data-repmat(meanvct,n,1))'.*repmat(coeff,1,n))));               else                  sortdist=mahal(data,meanvct,covmat,part,fine,final,k,obsingroup,group,...                                 minigr,n,p);               end            else               break;            end         elseif fine             if ~isinf(bobj1(k,i))               meanvct=bmean1{k,i};            	covmat=bcov1{k,i};            	if bobj1(k,i)==0                 	[dis,ind]=sort(abs(sum((data(obsingroup{end},:)-repmat(meanvct,minigr,1))'.*repmat(coeff1(:,k),1,minigr))));                  sortdist=obsingroup{end}(ind);                  if dis(adjh) < 1e-8                     if found==0                     	obj=0;                     	coeff=coeff1(:,k);                     	found=1;                  	else                     	adj=0;                  	end                  	ns=1;               	end               else                  sortdist=mahal(data,meanvct,covmat,part,fine,final,k,obsingroup,group,...                                 minigr,n,p);               end            else               break;            end            else            % The first stage of the algorithm.            % index : contains trial subsample.            if ~part               if allsubset      				k=p+1;      				perm(k)=perm(k)+1;      				while ~(k==1 |perm(k) <=(n-(p+1-k)))          				k=k-1;         				perm(k)=perm(k)+1;							for j=(k+1):p+1      						perm(j)=perm(j-1)+1;   						end      				end                  index=perm;               else                  [index,seed]=randomset(n,p+1,seed);                                 end            else               [index,seed]=randomset(group(k),p+1,seed);               index=obsingroup{k}(index);            end                           meanvct=mean(data(index,:));            covmat=cov(data(index,:));   		            if det(covmat) < exp(-50*p)                               % The trial subsample is singular.               % We distinguish two cases :               %               % 1. There are adjh or more observations in the subdataset that lie               %    on the hyperplane. If the data is not split up, we have adjh=h and thus               %    an exact fit. If the data is split up we distinguish two cases.                %    We haven't or have already found such a hyperplane.  In the first case               %    we check if there are more than h observations in the entire set                %    that lie on the hyperplane. If so, we have an exact fit situation.                %    If not, we start with a new trial subsample.  But first, the                %    results must be stored bmean1, bcov1 and bobj1.  In the second case               %    we immediately start with a new trial subsample.               %                  % 2. There aren't adjh observations in the subdataset that lie on the                %    hyperplane. We then extend the trial subsample until it isn't singular                %    anymore.                                             % eigvct : contains the coefficients of the hyperplane.               [eigvct, eigvl]=eigs(covmat,1,0,struct('disp',0));                                 if ~part                  dist=abs(sum((data-repmat(meanvct,n,1))'.*repmat(eigvct,1,n)));               else                  dist=abs(sum((data(obsingroup{k},:)-repmat(meanvct,group(k),1))'.*repmat(eigvct,1,group(k))));               end                                 obsinplane=find(dist < 1e-8);               % count : number of observations that lie on the hyperplane.               count=length(obsinplane);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -