📄 fastmcd.m
字号:
if count >= adjh if ~part [center,covar,eigvct,correl]=fit(data,obsinplane,med,mad,p,eigvct,cor); res.plane=eigvct; weights(obsinplane)=1; [raw.center,raw.cov,res.center,res.cov,raw.objective,... raw.wt,res.flag,res.method]=displ(2,count,weights,n,p,center,covar,... res.method,eigvct,ok,raw.wt,res.flag,file,fid,cor,correl); if cor [res.cor,raw.cor]=deal(correl); end return elseif found==0 dist=abs(sum((data-repmat(meanvct,n,1))'.*repmat(eigvct,1,n))); obsinplane=find(dist < 1e-8); count2=length(obsinplane); if count2>=h [center,covar,eigvct,correl]=fit(data,obsinplane,med,mad,p,eigvct,cor); res.plane=eigvct; weights(obsinplane)=1; [raw.center,raw.cov,res.center,res.cov,raw.objective,... raw.wt,res.flag,res.method]=displ(2,count2,weights,n,p,center,covar,... res.method,eigvct,ok,raw.wt,res.flag,file,fid,cor,correl); if cor [res.cor,raw.cor]=deal(correl); end return end obj=0; inplane(k)=count; coeff1(:,k)=eigvct; found=1; ns=1; else ns=1; adj=0; end else while det(covmat) < exp(-50*p) [index,seed]=addobs(index,n,seed); covmat=cov(data(index,:)); end meanvct=mean(data(index,:)); end end if ~ns sortdist=mahal(data,meanvct,covmat,part,fine,final,k,obsingroup,group,... minigr,n,p); end end if ~ns for j=1:csteps tottimes=tottimes+1; if j > 1 % The observations correponding to the adjh smallest mahalanobis % distances determine the subset for the next iteration. sortdist=mahal(data,meanvct,covmat,part,fine,final,k,obsingroup,group,... minigr,n,p); end obs_in_set=sort(sortdist(1:adjh)); meanvct=mean(data(obs_in_set,:)); covmat=cov(data(obs_in_set,:)); obj=det(covmat); if obj < exp(-50*p) % The adjh-subset is singular. If adjh=h we have an exact fit situation. % If adjh < h we distinguish two cases : % % 1. We haven't found earlier a singular adjh-subset. We first check if % in the entire set there are h observations that lie on the hyperplane. % If so, we have an exact fit situation. If not, we stop taking C-steps % (the determinant is zero which is the lowest possible value) and % store the results in the appropriate arrays. We then begin with % the next trial subsample. % % 2. We have, for the concerning subdataset, already found a singular % adjh-subset. We then immediately begin with the next trial subsample. if ~part | final | (fine & n==minigr) [center,covar,z,correl,obsinplane,count]=fit(data,NaN,med,mad,p,NaN,... cor,meanvct,covmat,n); res.plane=z; weights(obsinplane)=1; [raw.center,raw.cov,res.center,res.cov,raw.objective,... raw.wt,res.flag,res.method]=displ(2,count,weights,n,p,center,covar,... res.method,z,ok,raw.wt,res.flag,file,fid,cor,correl); if cor [res.cor,raw.cor]=deal(correl); end return elseif found==0 [eigvct, eigvl]=eigs(covmat,1,0,struct('disp',0)); dist=abs(sum((data-repmat(meanvct,n,1))'.*repmat(eigvct,1,n))); obsinplane=find(dist<1e-8); count=length(obsinplane); if count >= h [center,covar,eigvct,correl]=fit(data,obsinplane,med,mad,p,eigvct,cor); res.plane=eigvct; weights(obsinplane)=1; [raw.center,raw.cov,res.center,res.cov,raw.objective,... raw.wt,res.flag,res.method]=displ(2,count,weights,n,p,center,covar,... res.method,eigvct,ok,raw.wt,res.flag,file,fid,cor,correl); if cor [res.cor,raw.cor]=deal(correl); end return end obj=0; found=1; if ~fine coeff1(:,k)=eigvct; dist=abs(sum((data(obsingroup{k},:)-repmat(meanvct,group(k),1))'.*repmat(eigvct,1,group(k)))); inplane(k)=length(dist(dist<1e-8)); else coeff=eigvct; dist=abs(sum((data(obsingroup{end},:)-repmat(meanvct,minigr,1))'.*repmat(eigvct,1,minigr))); inplane=length(dist(dist<1e-8)); end break; else adj=0; break; end end % We stop taking C-steps when two subsequent determinants become equal. % We have then reached convergence. if j >= 2 & obj == prevdet break; end prevdet=obj; end % C-steps end % After each iteration, it has to be checked whether the new solution % is better than some previous one. A distinction is made between the % different stages of the algorithm: % % - Let us first consider the first (second) stage of the algorithm. % We distinguish two cases if the objective value is lower than the largest % value in bobj1 (bobj) : % % 1. The new objective value did not yet occur in bobj1 (bobj). We then store % this value, the corresponding mean and covariance matrix at the right % place in resp. bobj1 (bobj), bmean1 (bmean) and bcov1 (bcov). % The objective value is inserted by shifting the greater determinants % upwards. We perform the same shifting in bmean1 (bmean) and bcov1 (bcov). % % 2. The new objective value already occurs in bobj1 (bobj). A comparison is % made between the new mean vector and covariance matrix and those % estimates with the same determinant. When for an equal determinant, % the mean vector or covariance matrix do not correspond, the new results % will be stored in bobj1 (bobj), bmean1 (bmean) and bcov1 (bcov). % % If the objective value is not lower than the largest value in bobj1 (bobj), % nothing happens. % % - For the final stage of the algorithm, only the best solution has to be kept. % We then check if the objective value is lower than the till then lowest value. % If so, we have a new best solution. If not, nothing happens. if ~final & adj if fine | ~part if obj < max(bobj) [bmean,bcov,bobj]=insertion(bmean,bcov,bobj,meanvct,covmat,obj,1,eps); end else if obj < max(bobj1(k,:)) [bmean1,bcov1,bobj1]=insertion(bmean1,bcov1,bobj1,meanvct,covmat,obj,k,eps); end end end if final & obj< bestobj % bestset : the best subset for the whole data. % bestobj : objective value for this set. % initmean, initcov : resp. the mean and covariance matrix of this set. bestset=obs_in_set; bestobj=obj; initmean=meanvct; initcov=covmat; end end % nsamp end % ngroup if part & ~fine fine=1; elseif (part & fine & ~final) | (~part & ~final) final=1; else final=2; end end % while loop% factor : if we multiply the raw MCD covariance matrix with factor, we obtain consistency % when the data come from a multivariate normal distribution.factor=rawconsfactor(h,n,p);factor=factor*rawcorfactor(p,n,alpha);% initcov=factor*initcov;%%NIEUWraw.cov=factor*initcov;raw.objective=bestobj*prod(mad)^2;[raw.cov,raw.center]=trafo(raw.cov,initmean,med,mad,p);if cor raw.cor=initcov./(sqrt(diag(initcov))*sqrt(diag(initcov))');end% We express the results in the original units.%[raw.cov,raw.center]=trafo(initcov,initmean,med,mad,p);%raw.cov=factor*raw.cov;%raw.objective=bestobj*prod(mad)^2;%The reweighted robust estimates are now computed.%mah=mahalanobis(data,initmean,initcov,n,p);%%%NIEUWmah=mahalanobis(data,initmean,initcov*factor,n,p);raw.robdist=sqrt(mah);%m=2*n/asvardiag(h,n,p);%quantile=qf(0.975,p,m-p+1);quantile=chi2q(p);
%weights=mah*(m-p+1)/(m*p)<quantile;weights=mah<quantile;
raw.wt=weights;[res.center,res.cov]=weightmecov(data,weights,n,p);factor=rewconsfactor(weights,n,p);factor=factor*rewcorfactor(p,n,alpha);res.cov=factor*res.cov;[trcov,trcenter]=trafo(res.cov,res.center,med,mad,p); if cor res.cor=res.cov./(sqrt(diag(res.cov))*sqrt(diag(res.cov))');endif det(trcov) < exp(-50*p) [center,covar,z,correl,plane,count]=fit(data,NaN,med,mad,p,z,cor,res.center,res.cov,n); res.plane=z; if cor correl=covar./(sqrt(diag(covar))*sqrt(diag(covar))'); end res.method=displrw(count,n,p,center,covar,res.method,file,z,fid,cor,correl); res.flag=weights; res.robdist=raw.robdist;else mah=mahalanobis(data,res.center,res.cov,n,p); res.flag=(mah <= chi2q(p)); res.robdist=sqrt(mah);endres.mahalanobis=sqrt(mahalanobis(data,clmean,clcov,n,p));res.cov=trcov;res.center=trcenter;if ~lts disp(res.method)endspec.ask=1;if ~lts plotmcd(res,spec);end%-----------------------------------------------------------------------------------------function [raw_center,raw_cov,center,covar,raw_objective,raw_wt,mcd_wt,method]=displ(exactfit,... count,weights,n,p,center,covar,method,z,ok,raw_wt,mcd_wt,file,fid,cor,correl,varargin) % Determines some fields of the output argument RES for the exact fit situation. It also % displays and writes the messages concerning the exact fit situation. If the raw MCD % covariance matrix is not singular but the reweighted is, then the function displrw is % called instead of this function.[raw_center,center]=deal(center);[raw_cov,cov]=deal(covar);raw_objective=0;mcd_wt=weights;raw_wt=weights;switch exactfitcase 1 msg='The covariance matrix of the data is singular.';case 2 msg='The covariance matrix has become singular during the iterations of the MCD algorithm.';case 3 msg=sprintf('The %g-th order statistic of the absolute deviation of variable %g is zero. ',varargin{1},varargin{2}); end msg=sprintf([msg '\nThere are %g observations in the entire dataset of %g observations that lie on the \n'],count,n);switch pcase 2 msg=sprintf([msg 'line with equation %g(x_i1-m_1)%+g(x_i2-m_2)=0 \n'],z); msg=sprintf([msg 'where the mean (m_1,m_2) of these observations is the MCD location']);case 3 msg=sprintf([msg 'plane with equation %g(x_i1-m_1)%+g(x_i2-m_2)%+g(x_i3-m_3)=0 \n'],z); msg=sprintf([msg 'where the mean (m_1,m_2,m_3) of these observations is the MCD location']);otherwise msg=sprintf([msg 'hyperplane with equation a_1 (x_i1-m_1) + ... + a_p (x_ip-m_p) = 0 \n']); msg=sprintf([msg 'with coefficients a_i equal to : \n\n']); msg=sprintf([msg sprintf('%g ',z)]); msg=sprintf([msg '\n\nand where the mean (m_1,...,m_p) of these observations is the MCD location']); endmethod=strvcat(method,[msg '.']); disp(method)%-----------------------------------------------------------------------------------------function method=displrw(count,n,p,center,covar,method,file,z,fid,cor,correl) % Displays and writes messages in the case the reweighted robust covariance matrix % is singular.msg=sprintf('The reweighted MCD scatter matrix is singular. \n');msg=sprintf([msg 'There are %g observations in the entire dataset of %g observations that lie on the\n'],count,n);switch pcase 2 msg=sprintf([msg 'line with equation %g(x_i1-m_1)%+g(x_i2-m_2)=0 \n\n'],z); msg=sprintf([msg 'where the mean (m_1,m_2) of these observations is : \n\n']);case 3 msg=sprintf([msg 'plane with equation %g(x_i1-m_1)%+g(x_i2-m_2)%+g(x_i3-m_3)=0 \n\n'],z); msg=sprintf([msg 'where the mean (m_1,m_2,m_3) of these observations is : \n\n']);otherwise msg=sprintf([msg 'hyperplane with equation a_1 (x_i1-m_1) + ... + a_p (x_ip-m_p) = 0 \n']); msg=sprintf([msg 'with coefficients a_i equal to : \n\n']); msg=sprintf([msg sprintf('%g ',z)]); msg=sprintf([msg '\n\nand where the mean (m_1,...,m_p) of these observations is : \n\n']);endmsg=sprintf([msg sprintf('%g ',center)]);msg=sprintf([msg '\n\nTheir covariance matrix equals : \n\n']);msg=sprintf([msg sprintf([repmat('% 13.5g ',1,p) '\n'],covar)]);if cor msg=sprintf([msg '\n\nand their correlation matrix equals : \n\n']); msg=sprintf([msg sprintf([repmat('% 13.5g ',1,p) '\n'],correl)]);end method=strvcat(method,msg);%------------------------------------------------------------------------------------------ function [wmean,wcov]=weightmecov(dat,weights,n,nvar)% Computes the reweighted estimates.if size(weights,1)==1 weights=weights';endwmean=sum(dat.*repmat(weights,1,nvar))/sum(weights);wcov=zeros(nvar,nvar);for obs=1:n hlp=dat(obs,:)-wmean; wcov=wcov+weights(obs)*hlp'*hlp;endwcov=wcov/(sum(weights)-1);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -