📄 crossval.m
字号:
function [press,cumpress,rmsecv,rmsec] = crossval(x,y,rm,cvm,lv,split,iter,mc,out,osc);
%CROSSVAL Cross-validation for PCA, PLS and PCR
% This function does cross-validation for regression
% and principal components analysis models by several
% different methods including:
% leave-one-out ('loo')
% venetian blind ('vet')
% continuous blocks ('con')
% repeated random test sets ('rnd')
%
% The inputs are the scaled predictor variable matrix (x),
% predicted variable (y, not needed for PCA models), regression method
% or PCA (rm) which can be:
% PLS via NIPALS algorithm ('nip') (Old, slow way of doing PLS)
% PLS via SIMPLS algorithm ('sim') (New, fast way of doing PLS)
% PCR ('pcr')
% MLR ('mlr')
% PCA ('pca')
%
% cross-validation method (cvm, as described above), number of latent
% variables or principal components to calcluate (lv), number of
% sections to split the data into (split, needed for 'vet', 'con'
% and 'rnd' cross validations), number of iterations (iter, needed
% for 'rnd'), an optional variable which suppresses mean centering
% of the subsets when set to 0 (mc), an optional variable which
% supresses the output when set to 0 (out), and an optional variable
% which specifies the number of orthogonal signal correction components
% to use (osc), which can be a three element vector which specifies
% number of components, iterations and tolerance (osc = [nocomp,iter,tol]).
%
% The outputs are the predictive residual error
% sum of squares (press) for each subset and the cumulative
% PRESS (cumpress). Note that for multivariate y the press
% output is grouped by output variable, i.e. all of the PRESS values
% for the first variable are followed by all of the PRESS values
% for the second variable, etc. Optional outputs are the root mean
% square error of cross-validation (rmsecv) and root mean square
% error of (rmsec).
%
%I/O: [press,cumpress,rmsecv,rmsec] = crossval(x,y,rm,cvm,lv,split,iter,mc,out,osc);
%
% Some examples of how you might call CROSSVAL are:
% [press,cumpress] = crossval(x,y,'mlr','loo');
% [press,cumpress] = crossval(x,y,'nip','loo',10);
% [press,cumpress] = crossval(x,y,'pcr','vet',10,3);
% [press,cumpress] = crossval(x,y,'nip','con',10,5);
% [press,cumpress] = crossval(x,y,'sim','rnd',10,3,20);
%
% To suppress mean centering in the first two cases:
% [press,cumpress] = crossval(x,y,'mlr','loo',[],[],[],0);
% [press,cumpress] = crossval(x,y,'nip','loo',10,[],[],0);
%
% To add orthogonal signal correction with 2 components:
% [press,cumpress] = crossval(x,y,'sim','vet',10,3,[],[],[],2);
% [press,cumpress] = crossval(x,y,'pcr','con',10,3,[],[],[],2);
%
% For use with PCA, you might call CROSSVAL like:
% [press,cumpress] = crossval(x,[],'pca','loo',10);
% [press,cumpress] = crossval(x,[],'pca','vet',10,3);
% [press,cumpress] = crossval(x,[],'pca','con',10,5);
%
%See also: CROSSVUS, OSCCALC
%Copyright Eigenvector Research, Inc. 1997-8
%Modified by BMW 7/17/97, 4/6/97
%NBG 7/98, 9/98
%BMW 12/98 -added OSC
%BMW 1/99 -improved OSC
if strcmp(rm,'mlr')
lv = 1;
end
if isempty(x)
error('xblock matrix is empty')
elseif isempty(rm)
error('regression type not specified')
elseif isempty(cvm)
error('cross-validation method not specified')
elseif lv > min(size(x))
error('number of LVs must be <= min(size(x))')
end
[mx,nx] = size(x);
if strcmp(rm,'pca')
y = zeros(mx,1);
reg = 0;
else
reg = 1;
end
[my,ny] = size(y);
cumpress = zeros(ny,lv);
if (nargin < 8 | isempty(mc))
mc = 1;
end
if (nargin < 9 | isempty(out))
out = 1;
end
if (nargin < 10 | isempty(osc))
osc = 0; oiter = []; otol = [];
else
if length(osc) > 1
oiter = osc(2);
else
oiter = [];
end
if length(osc) > 2
otol = osc(3)
else
otol = [];
end
osc = osc(1);
end
if strcmp(cvm,'loo')
% Do leave one out
press = zeros(mx*ny,lv);
for i = 1:mx
if mc ~= 0
[calx,mnsx] = mncn([x(1:i-1,:);[x(i+1:mx,:)]]);
testx = scale(x(i,:),mnsx);
[caly,mnsy] = mncn([y(1:i-1,:);[y(i+1:mx,:)]]);
testy = scale(y(i,:),mnsy);
else
calx = [x(1:i-1,:);[x(i+1:mx,:)]];
testx = x(i,:);
caly = [y(1:i-1,:);[y(i+1:mx,:)]];
testy = y(i,:);
end
if osc ~= 0
[calx,nw,np] = osccalc(calx,caly,osc,oiter,otol);
testx = testx - testx*nw*inv(np'*nw)*np';
end
if strcmp(rm,'sim')
bbr = simpls(calx,caly,lv);
elseif strcmp(rm,'pcr')
bbr = pcr(calx,caly,lv,0);
elseif strcmp(rm,'nip')
bbr = pls(calx,caly,lv,0);
elseif strcmp(rm,'mlr')
bbr = (calx\caly)';
elseif strcmp(rm,'pca')
[u,s,v] = svd(calx,0);
rpca = eye(nx)-v(:,1)*v(:,1)';
repmat = zeros(nx);
else
error('Regression method not of known type')
end
for j = 1:lv
if reg == 1
ypred = testx*bbr((j-1)*ny+1:j*ny,:)';
press((i-1)*ny+1:i*ny,j) = ((ypred-testy).^2)';
else
for kkk = 1:nx
repmat(:,kkk) = -(1/rpca(kkk,kkk))*rpca(:,kkk);
repmat(kkk,kkk) = -1;
end
press(i,j) = sum(sum((testx*repmat).^2));
if j ~= lv
rpca = rpca - v(:,j+1)*v(:,j+1)';
end
end
end
end
for i = 1:ny
cumpress(i,:) = sum(press(i:ny:mx*ny,:),1);
end
elseif strcmp(cvm,'vet')
% Do venetian blinds
press = zeros(split*ny,lv);
for i = 1:split
ind = zeros(mx,1);
count = 0;
for j = 1:mx
test = round((j+i-1)/split) - ((j+i-1)/split);
if test == 0
ind(j,1) = 1;
count = count + 1;
end
end
[a,b] = sort(ind);
newx = x(b,:);
newy = y(b,:);
if mc ~= 0
[calx,mnsx] = mncn(newx(1:mx-count,:));
testx = scale(newx(mx-count+1:mx,:),mnsx);
[caly,mnsy] = mncn(newy(1:mx-count,:));
testy = scale(newy(mx-count+1:mx,:),mnsy);
else
calx = newx(1:mx-count,:);
testx = newx(mx-count+1:mx,:);
caly = newy(1:mx-count,:);
testy = newy(mx-count+1:mx,:);
end
if osc ~= 0
[calx,nw,np] = osccalc(calx,caly,osc,oiter,otol);
testx = testx - testx*nw*inv(np'*nw)*np';
end
if strcmp(rm,'sim')
bbr = simpls(calx,caly,lv);
elseif strcmp(rm,'pcr')
bbr = pcr(calx,caly,lv,0);
elseif strcmp(rm,'nip')
bbr = pls(calx,caly,lv,0);
elseif strcmp(rm,'mlr')
bbr = (calx\caly)';
elseif strcmp(rm,'pca')
[u,s,v] = svd(calx,0);
rpca = eye(nx)-v(:,1)*v(:,1)';
repmat = zeros(nx);
else
error('Regression method not of known type')
end
for j = 1:lv
if reg == 1
ypred = testx*bbr((j-1)*ny+1:j*ny,:)';
press((i-1)*ny+1:i*ny,j) = sum((ypred-testy).^2)';
else
for kkk = 1:nx
repmat(:,kkk) = -(1/rpca(kkk,kkk))*rpca(:,kkk);
repmat(kkk,kkk) = -1;
end
press(i,j) = sum(sum((testx*repmat).^2));
if j ~= lv
rpca = rpca - v(:,j+1)*v(:,j+1)';
end
end
end
end
for i = 1:ny
cumpress(i,:) = sum(press(i:ny:split*ny,:),1);
end
elseif strcmp(cvm,'con')
% Use contiguous subsets
press = zeros(split*ny,lv);
ind = ones(split,2);
for i = 1:split
ind(i,2) = round(i*mx/split);
end
for i = 1:split-1
ind(i+1,1) = ind(i,2) +1;
end
for i = 1:split
if mc ~= 0
[calx,mnsx] = mncn([x(1:ind(i,1)-1,:); x(ind(i,2)+1:mx,:)]);
testx = scale(x(ind(i,1):ind(i,2),:),mnsx);
[caly,mnsy] = mncn([y(1:ind(i,1)-1,:); y(ind(i,2)+1:mx,:)]);
testy = scale(y(ind(i,1):ind(i,2),:),mnsy);
else
calx = [x(1:ind(i,1)-1,:); x(ind(i,2)+1:mx,:)];
testx = x(ind(i,1):ind(i,2),:);
caly = [y(1:ind(i,1)-1,:); y(ind(i,2)+1:mx,:)];
testy = y(ind(i,1):ind(i,2),:);
end
if osc ~= 0
[calx,nw,np] = osccalc(calx,caly,osc,oiter,otol);
testx = testx - testx*nw*inv(np'*nw)*np';
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -