📄 fastmcd.m
字号:
%--------------------------------------------------------------------------------------function [initmean,initcov]=mcduni(y,ncas,h,len,alpha)% The exact MCD algorithm for the univariate case. y=sort(y);ay(1)=sum(y(1:h));for samp=2:len ay(samp)=ay(samp-1)-y(samp-1)+y(samp+h-1);enday2=ay.^2/h;sq(1)=sum(y(1:h).^2)-ay2(1);for samp=2:len sq(samp)=sq(samp-1)-y(samp-1)^2+y(samp+h-1)^2-ay2(samp)+ay2(samp-1);endsqmin=min(sq);ii=find(sq==sqmin);ndup=length(ii);slutn(1:ndup)=ay(ii);initmean=slutn(floor((ndup+1)/2))/h;factor=rawcorfactor(1,ncas,alpha);factor=factor*rawconsfactor(h,ncas,1);initcov=factor*sqmin/h;%-----------------------------------------------------------------------------------------function [initmean,initcov,z,correl,varargout]=fit(dat,plane,med,mad,p,z,cor,varargin)% This function is called in the case of an exact fit. It computes the correlation% matrix and transforms the coefficients of the hyperplane, the mean, the covariance% and the correlation matrix to the original units.if isnan(plane) [meanvct,covmat,n]=deal(varargin{:}); [z, eigvl]=eigs(covmat,1,0,struct('disp',0)); dist=abs(sum((dat-repmat(meanvct,n,1))'.*repmat(z,1,n))); plane=find(dist < 1e-8); varargout{1}=plane; varargout{2}=length(plane); endz=z./mad';[initcov,initmean]=trafo(cov(dat(plane,:)),mean(dat(plane,:)),med,mad,p);if cor correl=initcov./(sqrt(diag(initcov))*sqrt(diag(initcov))');else correl=NaN;end%------------------------------------------------------------------------------------------function obsingroup=fillgroup(n,group,ngroup,seed,fid)% Creates the subdatasets.obsingroup=cell(1,ngroup+1); jndex=0;for k=1:ngroup for m=1:group(k) [random,seed]=uniran(seed); ran=floor(random*(n-jndex)+1); jndex=jndex+1; if jndex==1 index(1,jndex)=ran; index(2,jndex)=k; else index(1,jndex)=ran+jndex-1; index(2,jndex)=k; ii=min(find(index(1,1:jndex-1) > ran-1+[1:jndex-1])); if length(ii) index(1,jndex:-1:ii+1)=index(1,jndex-1:-1:ii); index(2,jndex:-1:ii+1)=index(2,jndex-1:-1:ii); index(1,ii)=ran+ii-1; index(2,ii)=k; end end end obsingroup{k}=index(1,index(2,:)==k); obsingroup{ngroup+1}=[obsingroup{ngroup+1},obsingroup{k}]; end%-----------------------------------------------------------------------------------------function [ranset,seed]=randomset(tot,nel,seed)% This function is called if not all (p+1)-subsets out of n will be considered. % It randomly draws a subsample of nel cases out of tot. for j=1:nel [random,seed]=uniran(seed); num=floor(random*tot)+1; if j > 1 while any(ranset==num) [random,seed]=uniran(seed); num=floor(random*tot)+1; end end ranset(j)=num;end %-----------------------------------------------------------------------------------------function [index,seed]=addobs(index,n,seed)% Extends a trial subsample with one observation.jndex=length(index);[random,seed]=uniran(seed);ran=floor(random*(n-jndex)+1);jndex=jndex+1;index(jndex)=ran+jndex-1;ii=min(find(index(1:jndex-1) > ran-1+[1:jndex-1]));if length(ii)~=0 index(jndex:-1:ii+1)=index(jndex-1:-1:ii); index(ii)=ran+ii-1;end%-----------------------------------------------------------------------------------------function mahsort=mahal(dat,meanvct,covmat,part,fine,final,k,obsingroup,group,minigr,n,nvar)% Orders the observations according to the mahalanobis distances.if ~part | final [dis,ind]=sort(mahalanobis(dat,meanvct,covmat,n,nvar)); mahsort=ind;elseif fine [dis,ind]=sort(mahalanobis(dat(obsingroup{end},:),meanvct,covmat,minigr,nvar)); mahsort=obsingroup{end}(ind); else [dis,ind]=sort(mahalanobis(dat(obsingroup{k},:),meanvct,covmat,group(k),nvar)); mahsort=obsingroup{k}(ind);end %-----------------------------------------------------------------------------------------function [covmat,meanvct]=trafo(covmat,meanvct,med,mad,nvar)% Transforms a mean vector and a covariance matrix to the original units.covmat=covmat.*repmat(mad,nvar,1).*repmat(mad',1,nvar);meanvct=meanvct.*mad+med;%-----------------------------------------------------------------------------------------function [bestmean,bestcov,bobj]=insertion(bestmean,bestcov,bobj,meanvct,covmat,obj,row,eps)% Stores, for the first and second stage of the algorithm, the results in the appropriate % arrays if it belongs to the 10 best results.insert=1;equ=find(obj==bobj(row,:)); for j=equ if (meanvct==bestmean{row,j}) & all(covmat==bestcov{row,j}) insert=0; endend if insert ins=min(find(obj < bobj(row,:))); if ins==10 bestmean{row,ins}=meanvct; bestcov{row,ins}=covmat; bobj(row,ins)=obj; else [bestmean{row,ins+1:10}]=deal(bestmean{row,ins:9}); bestmean{row,ins}=meanvct; [bestcov{row,ins+1:10}]=deal(bestcov{row,ins:9}); bestcov{row,ins}=covmat; bobj(row,ins+1:10)=bobj(row,ins:9); bobj(row,ins)=obj; end end%-----------------------------------------------------------------------------------------function mah=mahalanobis(dat,meanvct,covmat,n,p)% Computes the mahalanobis distances.for k=1:p d=covmat(k,k); covmat(k,:)=covmat(k,:)/d; rows=setdiff(1:p,k); b=covmat(rows,k); covmat(rows,:)=covmat(rows,:)-b*covmat(k,:); covmat(rows,k)=-b/d; covmat(k,k)=1/d;endhlp=dat-repmat(meanvct,n,1);mah=sum(hlp*covmat.*hlp,2)';%------------------------------------------------------------------------------------------function [random,seed]=uniran(seed)% The random generator.seed=floor(seed*5761)+999;quot=floor(seed/65536);seed=floor(seed)-floor(quot*65536);random=seed/65536.D0; %------------------------------------------------------------------------------------------ function plotmcd(mcdres,options)% The 0.975 quantile of the chi-squared distribution:chi2q=[5.02389,7.37776,9.34840,11.1433,12.8325,... 14.4494,16.0128,17.5346,19.0228,20.4831,21.920,23.337,... 24.736,26.119,27.488,28.845,30.191,31.526,32.852,34.170,... 35.479,36.781,38.076,39.364,40.646,41.923,43.194,44.461,... 45.722,46.979,48.232,49.481,50.725,51.966,53.203,54.437,... 55.668,56.896,58.120,59.342,60.561,61.777,62.990,64.201,... 65.410,66.617,67.821,69.022,70.222,71.420]; p=size(mcdres.X,2);if det(mcdres.cov) < exp(-50*p) error('The MCD covariance matrix is singular ') end% The value of the fields of the input argument OPTIONS are now determined.% If the user hasn't given a value to one of the fields, the default value % is assigned to it.if nargin==1 ask=0; nid=3; xlab='X1'; ylab='X2';elseif isstruct(options) names=fieldnames(options); if strmatch('ask',names,'exact') ask=options.ask; else ask=0; end if strmatch('nid',names,'exact') nid=options.nid; else nid=3; end if strmatch('xlab',names,'exact') xlab=options.xlab; else xlab='X1'; end if strmatch('ylab',names,'exact') ylab=options.ylab; else ylab='X2'; endelse error('The second input argument is not a structure')enddata=mcdres.X;choice=1;n=size(data,1);ellip=[];if ask allsubset=0;else allsubset=1;endcloseplot=0;% md and rd contain resp. the classical and robust distances.md=sqrt(mahalanobis(data,mean(data),cov(data),n,p));%rd=sqrt(mahalanobis(data,mcdres.center,mcdres.cov,n,p));
rd=sqrt(mahalanobis(data,mcdres.center,mcdres.cov,n,p));while choice ~=7 if ask choice=menu('Make a plot selection :','All','Robust Distances',... 'Mahalanobis Distances','QQ plot of Robust Distances',... 'Robust versus Mahalanobis Distances',... 'MCD Tolerance Ellipse','Exit'); if closeplot==1 & choice~=7 & ~(choice==6 & p~=2) % Close previous plots. for i=1:5 close end closeplot=0; end if choice==1 allsubset=2; end end if choice==1 choice=2; end if allsubset & ~(choice==6 & p~=2 | choice==2) % Create a new figure window. figure end switch choice case 2 x=1:n; y=rd; ymax=max([max(y),sqrt(chi2q(p)),2.5])*1.05;% beg('Index','Robust Distance',rd,x,y,nid,n,-0.025*n,n*1.05,-0.025*ymax,ymax); beg('Production Sequence','Robust Distance',rd,x,y,nid,n,-0.025*n,n*1.05,-0.025*ymax,ymax); line([-0.025*n,n*1.05],repmat(max([sqrt(chi2q(p)),2.5]),1,2),'Color','r'); case 3 x=1:n; y=md; ymax=max([max(y),sqrt(chi2q(p)),2.5])*1.05;% beg('Index','Mahalanobis Distance',md,x,y,nid,n,-0.025*n,n*1.05,-0.025*ymax,ymax);
beg('Production Sequence','Mahalanobis Distance',md,x,y,nid,n,-0.025*n,n*1.05,-0.025*ymax,ymax); line([-0.025*n,n*1.05],repmat(max([sqrt(chi2q(p)),2.5]),1,2),'Color','r'); case 4 chisqquantile=repmat(0,1,n); for i=1:n chisqquantile(i)=qchisq((i-1/3)/(n+1/3),p); end normqqplot(sqrt(chisqquantile),rd); box; xlabel('Square root of the quantiles of the chi-squared distribution'); ylabel('Robust distances'); case 5 x=md; y=rd; ymax=max([max(y),sqrt(chi2q(p)),2.5])*1.05; xmax=max([max(x),sqrt(chi2q(p)),2.5])*1.05; beg('Mahalanobis Distance','Robust Distance',rd,x,y,nid,n,-0.01*xmax,xmax,-0.01*ymax,ymax); line(repmat(max([sqrt(chi2q(p)),2.5]),1,2),[-0.01*ymax,ymax],'Color','r'); line([-0.01*xmax,xmax],repmat(max([sqrt(chi2q(p)),2.5]),1,2),'Color','r'); hold on plot([-0.01*xmax,min([xmax,ymax])],[-0.01*ymax,min([xmax,ymax])],':','Color','g'); hold off case 6 if p~=2 disp('MCD Tolerance Ellips is only drawn for two-dimensional datasets') else if isempty(ellip) ellip=ellipse(mcdres.center,mcdres.cov); end xmin=min([data(:,1);ellip(:,1)]); xmax=max([data(:,1);ellip(:,1)]); ymin=min([data(:,2);ellip(:,2)]); ymax=max([data(:,2);ellip(:,2)]); xmarg=0.05*abs(xmax-xmin); ymarg=0.05*abs(ymax-ymin); xmin=xmin-xmarg; xmax=xmax+xmarg; ymin=ymin-ymarg; ymax=ymax+ymarg; beg(xlab,ylab,rd,data(:,1)',data(:,2)',nid,n,xmin,xmax,ymin,ymax); title('Tolerance ellipse ( 97.5 % )');
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -