📄 plscvrnd.m
字号:
function [press,cumpress,minlv,b,r,w,p,qlim,t2lim,tvar] = plscvrnd(x,y,split,iter,lv,np,mc)
%PLSCVRND PLS Cross Validation using random re-ordering of test data
% Inputs are the matrix of predictor variables (x), matrix
% of predicted variables (y), number of divisions of the data
% (split), number of cross-validation iterations (iter),
% maximum number of latent variables to calculate (lv),
% an optional variable (np) which can be used to supress
% the user override prompt to select optimum numbers of latent
% variables and an optional variable (mc) which can be used to
% set the routine so mean centering is not performed on each
% cross validation. Outputs are the prediction residual error
% sum of squares for each division (press), cumulative PRESS
% (cumpress), number of latent variables at minimum PRESS (minlv),
% the final regression vector (b) at minimum PRESS, the PLS x-matrix
% inverse (r), the x-block weights (w), the x-block loadings,
% the x-block Q statistic limit (qlim), T^2 statistic limit (t2lim)
% and the variance of the scores on the LVs retained in the
% PLS model (tvar).
%
% Note: This routine uses the "SDEP" method for cross validation. For
% each iteration the data is "shuffled" and cross validated.
% See PLSCV, PLSCVBLK and PLSCV1 for other methods of cross validation.
%
% I/O format is:
% [press,cumpress,minlv,b,w,p,qlim,t2lim,tvar] = plscvrnd(x,y,split,iter,lv,np,mc);
% An input of 0 for np will suppress the prompt.
% An input of 0 for mc will not mean center the subsets.
% Copyright
% Barry M. Wise
% 1991
% Modified February 1994
% Modified May 1994
[mx,nx] = size(x);
[my,ny] = size(y);
if mx ~= my
error('Number of samples must be the same in both blocks')
end
if nargin < 6
np = 1;
end
if nargin < 7
mc = 1;
end
press = zeros(split*iter*ny,lv);
clc
kk = 0;
ind = ones(split,2);
for i = 1:split
ind(i,2) = round(i*mx/split);
end
for i = 1:split-1
ind(i+1,1) = ind(i,2) +1;
end
for k = 1:iter
home
s = sprintf('Now working on iteration %g out of %g',k,iter);
disp(s)
[z,inds] = sort(randn(mx,1));
xx = x(inds,:);
yy = y(inds,:);
for i = 1:split
kk = kk + 1;
if mc ~= 0
[calx,mnsx] = mncn([xx(1:ind(i,1)-1,:); xx(ind(i,2)+1:mx,:)]);
testx = scale(xx(ind(i,1):ind(i,2),:),mnsx);
[caly,mnsy] = mncn([yy(1:ind(i,1)-1,:); yy(ind(i,2)+1:mx,:)]);
testy = scale(yy(ind(i,1):ind(i,2),:),mnsy);
else
calx = [xx(1:ind(i,1)-1,:); xx(ind(i,2)+1:mx,:)];
testx = xx(ind(i,1):ind(i,2),:);
caly = [yy(1:ind(i,1)-1,:); yy(ind(i,2)+1:mx,:)];
testy = yy(ind(i,1):ind(i,2),:);
end
if ny > 1
[p,q,w,t,u,b,ssqdif] = pls(calx,caly,lv);
bbr = conpred(b,w,p,q,lv);
for jj = 2:lv
i1 = (jj-1)*ny+1; i0 = i1-ny;
bbr(i1:jj*ny,:) = bbr(i1:jj*ny,:) + bbr(i0:(jj-1)*ny,:);
end
else
bbr = simpls1(calx,caly,lv);
end
for j = 1:lv
ypred = testx*bbr((j-1)*ny+1:j*ny,:)';
press((kk-1)*ny+1:kk*ny,j) = sum((ypred-testy).^2)';
end
end
plot(sum(press(kk*ny-split*ny+1:kk*ny,:)))
txt = sprintf('Cumulative PRESS for Iteration Number %g out of %g',k,iter);
title(txt)
xlabel('Number of Latent Variables')
ylabel('PRESS')
drawnow
end
pause(2)
cumpress = sum(press)/iter;
plot(cumpress)
title('Cumulative PRESS as a Function of Number of Latent Variables')
xlabel('Number of Latent Variables')
ylabel('PRESS')
drawnow
[a,minlv] = min(cumpress);
t = sprintf('Minimum Cumulative PRESS is at %g LVs',minlv);
disp(t)
if nargout > 3
if np ~= 0
answ = input('Would you like to choose a different number of LVs? (Yes = 1) ');
if answ == 1
txt = sprintf('How many Latent Variables would you like? (Max = %g) ',minlv);
minlv = input(txt);
end
end
disp(' ')
disp('Now working on final PLS model')
[p,q,w,t,u,bb,ssqdif] = pls(x,y,minlv);
b = conpred1(bb,w,p,q,minlv);
plot(b), hold on, plot(b,'o'), plot(zeros(nx,1),'-g'), hold off
s = sprintf('Regression Coefficients in Final Model with %g LVs',minlv);
title(s)
xlabel('Variable Number')
ylabel('Regression Coefficient')
pause
end
if nargout > 4
r = w*inv(p'*w)*inv(t'*t)*t';
end
if nargout > 7
% Calculate qlim
res = sum(((x - t*p').^2)');
th1 = sum(res)/(mx - 1);
th2 = (min([mx nx])-minlv)*((th1/(min([mx nx])-minlv))^2);
th3 = (min([mx nx])-minlv)*((th1/(min([mx nx])-minlv))^3);
h0 = 1 - ((2*th1*th3)/(3*th2^2));
qlim = th1*(((2.33*sqrt(2*th2*h0^2)/th1) + 1 + th2*h0*(h0-1)/th1^2)^(1/h0));
disp(' ')
s = sprintf('The Approximate 95 Percent Q limit is %g',qlim);
disp(s)
plot(1:mx,res,1:mx,res,'+',[1 mx],[qlim qlim],'--g')
s = sprintf('Value of Q with Approximate 95 Percent Limit Based on %g LV Model',minlv);
title(s)
xlabel('Sample Number')
ylabel('Value of Q')
pause
end
if nargout > 8
% Calculate t2lim
if minlv > 1
if mx > 300
t2lim = (minlv*(mx-1)/(mx-minlv))*ftest(.05,minlv,300);
else
t2lim = (minlv*(mx-1)/(mx-minlv))*ftest(.05,minlv,mx-minlv);
end
disp(' ')
s = sprintf('The 95 Percent T^2 limit is %g',t2lim);
disp(s)
disp(' ')
tvar = std(t);
tsqvals = sum((auto(t)').^2);
plot(1:mx,tsqvals,1:mx,tsqvals,'+',[0 mx],[t2lim t2lim],'--g')
s = sprintf('Value of T^2 with 95 Percent Limit Based on %g LV Model',minlv);
title(s)
xlabel('Sample Number')
ylabel('Value of T^2')
else
tvar = std(t);
disp('T^2 not calculated when number of latent variables = 1')
t2lim = 1.96^2;
end
end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -