📄 pcrcv.m
字号:
function [press,cumpress,minpc,b,p,qlim,t2lim,tvar] = pcrcv(x,y,split,pc,np,mc)
%PCRCV Cross validation for PCR models
% Inputs are the matrix of predictor variables (x), matrix
% of predicted variables (y), number of divisions of the data
% (split), maximum number of principal components to calculate (pc),
% an optional variable (np) which can be used to supress
% the user override prompt to select optimum numbers of principal
% components and an optional variable (mc) which can be used to
% set the routine so mean centering is not performed on each
% cross validation. Outputs are the prediction residual error
% sum of squares for each division (press), cumulative PRESS
% (cumpress), number of principal components at minimum PRESS (minpc),
% the final regression vector or matrix (b) at minimum PRESS, the
% x-block loadings (p), the x-block Q statistic limit (qlim),
% T^2 statistic limit (t2lim), and variance of the scores on the PCs
% retained in the model (tvar).
%
% Note: This cross validation routine uses the "venetian blind"
% method of forming test sets, e.g. leaving out every other
% or every third, etc. sample. See PCRCV1, PCRCVRND and PCRCVBLK
% for other methods of forming the test sets.
%
% I/O format is:
% [press,cumpress,minpc,b,p,qlim,t2lim,tvar] = pcrcv(x,y,split,pc,np,mc);
% An input of 0 for np will suppress the prompt.
% An input of 0 for mc will not mean center the subsets.
% Copyright
% Barry M. Wise
% 1991
% Modified February 1994
% Modified May 1994
[mx,nx] = size(x);
[my,ny] = size(y);
if mx ~= my
error('Number of samples must be the same in both blocks')
end
if nargin < 5
np = 1;
end
if nargin < 6
mc = 1;
end
press = zeros(split*ny,pc);
clc
for i = 1:split
home
s = sprintf('Now working on test set %g out of %g',i,split);
disp(s)
ind = zeros(mx,1);
count = 0;
for j = 1:mx
test = round((j+i-1)/split) - ((j+i-1)/split);
if test == 0
ind(j,1) = 1;
count = count + 1;
end
end
[a,b] = sort(ind);
newx = x(b,:);
newy = y(b,:);
if mc ~= 0
[calx,mnsx] = mncn(newx(1:mx-count,:));
testx = scale(newx(mx-count+1:mx,:),mnsx);
[caly,mnsy] = mncn(newy(1:mx-count,:));
testy = scale(newy(mx-count+1:mx,:),mnsy);
else
calx = newx(1:mx-count,:);
testx = newx(mx-count+1:mx,:);
caly = newy(1:mx-count,:);
testy = newy(mx-count+1:mx,:);
end
[t,p,bbr] = pcr(calx,caly,pc);
for j = 1:pc
ypred = testx*bbr((j-1)*ny+1:j*ny,:)';
press((i-1)*ny+1:i*ny,j) = sum((ypred-testy).^2)';
end
plot(press((i-1)*ny+1:i*ny,:)')
txt = sprintf('PRESS for Test Set Number %g out of %g',i,split);
title(txt)
xlabel('Number of Principal Components')
ylabel('PRESS')
drawnow
end
pause(2)
cumpress = sum(press);
plot(cumpress)
title('Cumulative PRESS as a Function of Number of Principal Components')
xlabel('Number of Principal Components')
ylabel('PRESS')
drawnow
[a,minpc] = min(cumpress);
t = sprintf('Minimum Cumulative PRESS is at %g pcs',minpc);
disp(t)
if nargout > 3
if np ~= 0
answ = input('Would you like to choose a different number of pcs? (Yes = 1) ');
if answ == 1
txt = sprintf('How many Principal Components would you like? (Max = %g) ',minpc);
minpc = input(txt);
end
end
disp(' ')
disp('Now working on final PCR model')
[t,p,b,ssq,eigs] = pcr(x,y,minpc);
b = b((minpc-1)*ny+1:minpc*ny,:)';
plot(b), hold on, plot(b,'o'), plot(zeros(nx,1),'-g'), hold off
s = sprintf('Regression Coefficients in Final Model with %g pcs',minpc);
title(s)
xlabel('Variable Number')
ylabel('Regression Coefficient')
pause
end
if nargout > 5
% Calculate qlim
res = sum(((x - t*p').^2)');
th1 = sum(eigs(minpc+1:min([mx nx])));
th2 = sum((eigs(minpc+1:min([mx nx]))).^2);
th3 = sum((eigs(minpc+1:min([mx nx]))).^3);
h0 = 1 - ((2*th1*th3)/(3*th2^2));
if h0 <= 0, h0 = 0.0001, end
qlim = th1*(((1.65*sqrt(2*th2*h0^2)/th1) + 1 + th2*h0*(h0-1)/th1^2)^(1/h0));
disp(' ')
s = sprintf('The 95 Percent Q limit is %g',qlim);
disp(s)
plot(1:mx,res,1:mx,res,'+',[1 mx],[qlim qlim],'--g')
s = sprintf('Value of Q with 95 Percent Limit Based on %g PC Model',minpc);
title(s)
xlabel('Sample Number')
ylabel('Value of Q')
pause
end
if nargout > 6
% Calculate t2lim
if minpc > 1
if mx > 300
t2lim = (minpc*(mx-1)/(mx-minpc))*ftest(.05,minpc,300);
else
t2lim = (minpc*(mx-1)/(mx-minpc))*ftest(.05,minpc,mx-minpc);
end
disp(' ')
s = sprintf('The 95 Percent T^2 limit is %g',t2lim);
disp(s)
disp(' ')
tvar = std(t);
tsqvals = sum((auto(t)').^2);
plot(1:mx,tsqvals,1:mx,tsqvals,'+',[0 mx],[t2lim t2lim],'--g')
s = sprintf('Value of T^2 with 95 Percent Limit Based on %g PC Model',minpc);
title(s)
xlabel('Sample Number')
ylabel('Value of T^2')
else
tvar = std(t);
disp('T^2 not calculated when number of latent variables = 1')
t2lim = 1.96^2;
end
end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -