📄 fastmcd.m

📁 SVDD的工具箱
💻 M
📖 第 1 页 / 共 5 页
字号:
                              if count >= adjh                                                   if ~part                        [center,covar,eigvct,correl]=fit(data,obsinplane,med,mad,p,eigvct,cor);                     res.plane=eigvct;                     weights(obsinplane)=1;                     [raw.center,raw.cov,res.center,res.cov,raw.objective,...                     raw.wt,res.flag,res.method]=displ(2,count,weights,n,p,center,covar,...                     res.method,eigvct,ok,raw.wt,res.flag,file,fid,cor,correl);                             if cor                        [res.cor,raw.cor]=deal(correl);                     end                     return                  elseif found==0                     dist=abs(sum((data-repmat(meanvct,n,1))'.*repmat(eigvct,1,n)));                     obsinplane=find(dist < 1e-8);                     count2=length(obsinplane);                     if count2>=h                        [center,covar,eigvct,correl]=fit(data,obsinplane,med,mad,p,eigvct,cor);                        res.plane=eigvct;                        weights(obsinplane)=1;                        [raw.center,raw.cov,res.center,res.cov,raw.objective,...                        raw.wt,res.flag,res.method]=displ(2,count2,weights,n,p,center,covar,...                        res.method,eigvct,ok,raw.wt,res.flag,file,fid,cor,correl);                                if cor                           [res.cor,raw.cor]=deal(correl);                        end                       return                     end                     obj=0;                     inplane(k)=count;                     coeff1(:,k)=eigvct;                     found=1;                     ns=1;                                            else                      ns=1;                     adj=0;                     end                                    else                                    while det(covmat) < exp(-50*p)                     [index,seed]=addobs(index,n,seed);                     covmat=cov(data(index,:));                           end                  meanvct=mean(data(index,:));                                          end                         end                             if ~ns               sortdist=mahal(data,meanvct,covmat,part,fine,final,k,obsingroup,group,...                              minigr,n,p);              end                  end                  if ~ns                            	for j=1:csteps                        tottimes=tottimes+1;                              if j > 1                  % The observations correponding to the adjh smallest mahalanobis                   % distances determine the subset for the next iteration.                  sortdist=mahal(data,meanvct,covmat,part,fine,final,k,obsingroup,group,...                     			   minigr,n,p);            	end                                 	obs_in_set=sort(sortdist(1:adjh));               meanvct=mean(data(obs_in_set,:));               covmat=cov(data(obs_in_set,:));               obj=det(covmat);                  	  			if obj < exp(-50*p)                                       % The adjh-subset is singular. If adjh=h we have an exact fit situation.                  % If adjh < h we distinguish two cases :                  %                  % 1. We haven't found earlier a singular adjh-subset. We first check if                   %    in the entire set there are h observations that lie on the hyperplane.                  %    If so, we have an exact fit situation. If not, we stop taking C-steps                  %    (the determinant is zero which is the lowest possible value) and                   %    store the results in the appropriate arrays.  We then begin with                   %    the next trial subsample.                  %                  % 2. We have, for the concerning subdataset, already found a singular                  %    adjh-subset. We then immediately begin with the next trial subsample.                                      if ~part | final | (fine & n==minigr)                     [center,covar,z,correl,obsinplane,count]=fit(data,NaN,med,mad,p,NaN,...                                                           cor,meanvct,covmat,n);                     res.plane=z;                                                        weights(obsinplane)=1;                     [raw.center,raw.cov,res.center,res.cov,raw.objective,...                     raw.wt,res.flag,res.method]=displ(2,count,weights,n,p,center,covar,...                     res.method,z,ok,raw.wt,res.flag,file,fid,cor,correl);                             if cor                        [res.cor,raw.cor]=deal(correl);                     end                     return                                 	elseif found==0                     [eigvct, eigvl]=eigs(covmat,1,0,struct('disp',0));                                dist=abs(sum((data-repmat(meanvct,n,1))'.*repmat(eigvct,1,n)));                     obsinplane=find(dist<1e-8);                     count=length(obsinplane);                     if count >= h                         [center,covar,eigvct,correl]=fit(data,obsinplane,med,mad,p,eigvct,cor);                        res.plane=eigvct;                        weights(obsinplane)=1;                        [raw.center,raw.cov,res.center,res.cov,raw.objective,...                        raw.wt,res.flag,res.method]=displ(2,count,weights,n,p,center,covar,...                        res.method,eigvct,ok,raw.wt,res.flag,file,fid,cor,correl);                                if cor                           [res.cor,raw.cor]=deal(correl);                        end                       return                  	end                  	obj=0;                     found=1;                     if ~fine                        coeff1(:,k)=eigvct;                        dist=abs(sum((data(obsingroup{k},:)-repmat(meanvct,group(k),1))'.*repmat(eigvct,1,group(k))));                        inplane(k)=length(dist(dist<1e-8));                     else                        coeff=eigvct;                        dist=abs(sum((data(obsingroup{end},:)-repmat(meanvct,minigr,1))'.*repmat(eigvct,1,minigr)));                        inplane=length(dist(dist<1e-8));                     end                     break;                        	else                   	adj=0;                  	break;               	end                                 end                              % We stop taking C-steps when two subsequent determinants become equal.               % We have then reached convergence.               if j >= 2 & obj == prevdet               	break;            	end               prevdet=obj;            	                        end % C-steps                     end                           % After each iteration, it has to be checked whether the new solution         % is better than some previous one.  A distinction is made between the         % different stages of the algorithm:         %         %  - Let us first consider the first (second) stage of the algorithm.          %    We distinguish two cases if the objective value is lower than the largest          %    value in bobj1 (bobj) :          %         %      1. The new objective value did not yet occur in bobj1 (bobj).  We then store         %         this value, the corresponding mean and covariance matrix at the right          %         place in resp. bobj1 (bobj), bmean1 (bmean) and bcov1 (bcov).         %         The objective value is inserted by shifting the greater determinants          %         upwards. We perform the same shifting in bmean1 (bmean) and bcov1 (bcov).          %         %      2. The new objective value already occurs in bobj1 (bobj). A comparison is          %         made between the new mean vector and covariance matrix and those          %         estimates with the same determinant. When for an equal determinant,          %         the mean vector or covariance matrix do not correspond, the new results           %         will be stored in bobj1 (bobj), bmean1 (bmean) and bcov1 (bcov).         %         %    If the objective value is not lower than the largest value in bobj1 (bobj),          %    nothing happens.         %         %  - For the final stage of the algorithm, only the best solution has to be kept.         %    We then check if the objective value is lower than the till then lowest value.          %    If so, we have a new best solution. If not, nothing happens.                                if ~final & adj                        if fine | ~part               if obj < max(bobj)                  [bmean,bcov,bobj]=insertion(bmean,bcov,bobj,meanvct,covmat,obj,1,eps);               end               else               if obj < max(bobj1(k,:))                  [bmean1,bcov1,bobj1]=insertion(bmean1,bcov1,bobj1,meanvct,covmat,obj,k,eps);               end                end                  end                   if final & obj< bestobj             % bestset           : the best subset for the whole data.              % bestobj           : objective value for this set.             % initmean, initcov : resp. the mean and covariance matrix of this set.               bestset=obs_in_set;             bestobj=obj;                initmean=meanvct;             initcov=covmat;         end                   end % nsamp     end % ngroup            if part & ~fine      fine=1;   elseif (part & fine & ~final) | (~part & ~final)      final=1;   else      final=2;   end      end % while loop% factor : if we multiply the raw MCD covariance matrix with factor, we obtain consistency  %          when the data come from a multivariate normal distribution.factor=rawconsfactor(h,n,p);factor=factor*rawcorfactor(p,n,alpha);% initcov=factor*initcov;%%NIEUWraw.cov=factor*initcov;raw.objective=bestobj*prod(mad)^2;[raw.cov,raw.center]=trafo(raw.cov,initmean,med,mad,p);if cor   raw.cor=initcov./(sqrt(diag(initcov))*sqrt(diag(initcov))');end% We express the results in the original units.%[raw.cov,raw.center]=trafo(initcov,initmean,med,mad,p);%raw.cov=factor*raw.cov;%raw.objective=bestobj*prod(mad)^2;%The reweighted robust estimates are now computed.%mah=mahalanobis(data,initmean,initcov,n,p);%%%NIEUWmah=mahalanobis(data,initmean,initcov*factor,n,p);raw.robdist=sqrt(mah);%m=2*n/asvardiag(h,n,p);%quantile=qf(0.975,p,m-p+1);quantile=chi2q(p);
%weights=mah*(m-p+1)/(m*p)<quantile;weights=mah<quantile;
raw.wt=weights;[res.center,res.cov]=weightmecov(data,weights,n,p);factor=rewconsfactor(weights,n,p);factor=factor*rewcorfactor(p,n,alpha);res.cov=factor*res.cov;[trcov,trcenter]=trafo(res.cov,res.center,med,mad,p);  if cor   res.cor=res.cov./(sqrt(diag(res.cov))*sqrt(diag(res.cov))');endif det(trcov) < exp(-50*p)   [center,covar,z,correl,plane,count]=fit(data,NaN,med,mad,p,z,cor,res.center,res.cov,n);     res.plane=z;   if cor      correl=covar./(sqrt(diag(covar))*sqrt(diag(covar))');   end        res.method=displrw(count,n,p,center,covar,res.method,file,z,fid,cor,correl);      res.flag=weights;   res.robdist=raw.robdist;else   mah=mahalanobis(data,res.center,res.cov,n,p);   res.flag=(mah <= chi2q(p));   res.robdist=sqrt(mah);endres.mahalanobis=sqrt(mahalanobis(data,clmean,clcov,n,p));res.cov=trcov;res.center=trcenter;if ~lts   disp(res.method)endspec.ask=1;if ~lts   plotmcd(res,spec);end%-----------------------------------------------------------------------------------------function [raw_center,raw_cov,center,covar,raw_objective,raw_wt,mcd_wt,method]=displ(exactfit,...          count,weights,n,p,center,covar,method,z,ok,raw_wt,mcd_wt,file,fid,cor,correl,varargin)      % Determines some fields of the output argument RES for the exact fit situation.  It also % displays and writes the messages concerning the exact fit situation.  If the raw MCD % covariance matrix is not singular but the reweighted is, then the function displrw is % called instead of this function.[raw_center,center]=deal(center);[raw_cov,cov]=deal(covar);raw_objective=0;mcd_wt=weights;raw_wt=weights;switch exactfitcase 1   msg='The covariance matrix of the data is singular.';case 2   msg='The covariance matrix has become singular during the iterations of the MCD algorithm.';case 3   msg=sprintf('The %g-th order statistic of the absolute deviation of variable %g is zero. ',varargin{1},varargin{2});   end      msg=sprintf([msg '\nThere are %g observations in the entire dataset of %g observations that lie on the \n'],count,n);switch pcase 2   msg=sprintf([msg 'line with equation %g(x_i1-m_1)%+g(x_i2-m_2)=0 \n'],z);          msg=sprintf([msg 'where the mean (m_1,m_2) of these observations is the MCD location']);case 3   msg=sprintf([msg 'plane with equation %g(x_i1-m_1)%+g(x_i2-m_2)%+g(x_i3-m_3)=0 \n'],z);   msg=sprintf([msg 'where the mean (m_1,m_2,m_3) of these observations is the MCD location']);otherwise   msg=sprintf([msg 'hyperplane with equation a_1 (x_i1-m_1) + ... + a_p (x_ip-m_p) = 0 \n']);   msg=sprintf([msg 'with coefficients a_i equal to : \n\n']);   msg=sprintf([msg sprintf('%g  ',z)]);   msg=sprintf([msg '\n\nand where the mean (m_1,...,m_p) of these observations is the MCD location']); endmethod=strvcat(method,[msg '.']);            disp(method)%-----------------------------------------------------------------------------------------function method=displrw(count,n,p,center,covar,method,file,z,fid,cor,correl)                               % Displays and writes messages in the case the reweighted robust covariance matrix % is singular.msg=sprintf('The reweighted MCD scatter matrix is singular. \n');msg=sprintf([msg 'There are %g observations in the entire dataset of %g observations that lie on the\n'],count,n);switch pcase 2   msg=sprintf([msg 'line with equation %g(x_i1-m_1)%+g(x_i2-m_2)=0 \n\n'],z);          msg=sprintf([msg 'where the mean (m_1,m_2) of these observations is : \n\n']);case 3   msg=sprintf([msg 'plane with equation %g(x_i1-m_1)%+g(x_i2-m_2)%+g(x_i3-m_3)=0 \n\n'],z);   msg=sprintf([msg 'where the mean (m_1,m_2,m_3) of these observations is : \n\n']);otherwise   msg=sprintf([msg 'hyperplane with equation a_1 (x_i1-m_1) + ... + a_p (x_ip-m_p) = 0 \n']);   msg=sprintf([msg 'with coefficients a_i equal to : \n\n']);   msg=sprintf([msg sprintf('%g  ',z)]);   msg=sprintf([msg '\n\nand where the mean (m_1,...,m_p) of these observations is : \n\n']);endmsg=sprintf([msg sprintf('%g  ',center)]);msg=sprintf([msg '\n\nTheir covariance matrix equals : \n\n']);msg=sprintf([msg sprintf([repmat('% 13.5g ',1,p) '\n'],covar)]);if cor   msg=sprintf([msg '\n\nand their correlation matrix equals : \n\n']);   msg=sprintf([msg sprintf([repmat('% 13.5g ',1,p) '\n'],correl)]);end   method=strvcat(method,msg);%------------------------------------------------------------------------------------------   function [wmean,wcov]=weightmecov(dat,weights,n,nvar)% Computes the reweighted estimates.if size(weights,1)==1   weights=weights';endwmean=sum(dat.*repmat(weights,1,nvar))/sum(weights);wcov=zeros(nvar,nvar);for obs=1:n   hlp=dat(obs,:)-wmean;   wcov=wcov+weights(obs)*hlp'*hlp;endwcov=wcov/(sum(weights)-1);
💿 文件大小 440 K
👤 上传用户 wyly
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#SVDD #工具箱
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -