📄 plscvblk.m
字号:
function [press,cumpress,minlv,b,r,w,p,qlim,t2lim,tvar] = plscvblk(x,y,split,lv,np,mc)
%PLSCVBLK Cross validation for PLS using contiguous data blocks
% Inputs are the matrix of predictor variables (x), matrix
% of predicted variables (y), number of divisions of the data
% (split), maximum number of latent variables to calculate (lv),
% an optional variable (np) which can be used to supress
% the user override prompt to select optimum numbers of latent
% variables and an optional variable (mc) which can be used to
% set the routine so mean centering is not performed on each
% cross validation. Outputs are the prediction residual error
% sum of squares for each division (press), cumulative PRESS
% (cumpress), number of latent variables at minimum PRESS (minlv),
% the final regression vector (b) at minimum PRESS, the PLS x-matrix
% inverse (r), the x-block weights (w), the x-block loadings,
% the x-block Q statistic limit (qlim), T^2 statistic limit (t2lim)
% and the variance of the scores on the LVs retained in
% the PLS model (tvar).
%
% Note: This cross validation routine forms the test sets out of
% contiguous blocks of data and is the best choice when time
% series data are being modelled. See PLSCV, PLSCV1 and PLSCVRND
% for other methods of forming the cross validation test sets.
%
% I/O format is:
% [press,cumpress,minlv,b,w,p,qlim,t2lim,tvar]=plscvblk(x,y,split,lv,np,mc);
% An input of 0 for np will suppress the prompt.
% An input of 0 for mc will not mean center the subsets.
% Copyright
% Barry M. Wise
% 1991
% Modified February 1994
% Modified May 1994
[mx,nx] = size(x);
[my,ny] = size(y);
if mx ~= my
error('Number of samples must be the same in both blocks')
end
if nargin < 5
np = 1;
end
if nargin < 6
mc = 1;
end
press = zeros(split*ny,lv);
ind = ones(split,2);
for i = 1:split
ind(i,2) = round(i*mx/split);
end
for i = 1:split-1
ind(i+1,1) = ind(i,2) +1;
end
clc
for i = 1:split
home
s = sprintf('Now working on test set %g out of %g',i,split);
disp(s)
if mc ~= 0
[calx,mnsx] = mncn([x(1:ind(i,1)-1,:); x(ind(i,2)+1:mx,:)]);
testx = scale(x(ind(i,1):ind(i,2),:),mnsx);
[caly,mnsy] = mncn([y(1:ind(i,1)-1,:); y(ind(i,2)+1:mx,:)]);
testy = scale(y(ind(i,1):ind(i,2),:),mnsy);
else
calx = [x(1:ind(i,1)-1,:); x(ind(i,2)+1:mx,:)];
testx = x(ind(i,1):ind(i,2),:);
caly = [y(1:ind(i,1)-1,:); y(ind(i,2)+1:mx,:)];
testy = y(ind(i,1):ind(i,2),:);
end
if ny > 1
[p,q,w,t,u,b,ssqdif] = pls(calx,caly,lv);
bbr = conpred(b,w,p,q,lv);
for k = 2:lv
i1 = (k-1)*ny+1; i0 = i1-ny;
bbr(i1:k*ny,:) = bbr(i1:k*ny,:) + bbr(i0:(k-1)*ny,:);
end
else
bbr = simpls1(calx,caly,lv);
end
for j = 1:lv
ypred = testx*bbr((j-1)*ny+1:j*ny,:)';
press((i-1)*ny+1:i*ny,j) = sum((ypred-testy).^2)';
end
plot(press((i-1)*ny+1:i*ny,:)')
txt = sprintf('PRESS for Test Set Number %g out of %g',i,split);
title(txt)
xlabel('Number of Latent Variables')
ylabel('PRESS')
drawnow
end
pause(2)
cumpress = sum(press);
plot(cumpress)
title('Cumulative PRESS as a Function of Number of Latent Variables')
xlabel('Number of Latent Variables')
ylabel('PRESS')
drawnow
[a,minlv] = min(cumpress);
t = sprintf('Minimum Cumulative PRESS is at %g LVs',minlv);
disp(t)
if nargout > 3
if np ~= 0
answ = input('Would you like to choose a different number of LVs? (Yes = 1) ');
if answ == 1
txt = sprintf('How many Latent Variables would you like? (Max = %g) ',minlv);
minlv = input(txt);
end
end
disp(' ')
disp('Now working on final PLS model')
[p,q,w,t,u,bb,ssqdif] = pls(x,y,minlv);
b = conpred1(bb,w,p,q,minlv);
plot(b), hold on, plot(b,'o'), plot(zeros(nx,1),'-g'), hold off
s = sprintf('Regression Coefficients in Final Model with %g LVs',minlv);
title(s)
xlabel('Variable Number')
ylabel('Regression Coefficient')
pause
end
if nargout > 4
r = w*inv(p'*w)*inv(t'*t)*t';
end
if nargout > 7
% Calculate qlim
res = sum(((x - t*p').^2)');
th1 = sum(res)/(mx - 1);
th2 = (min([mx nx])-minlv)*((th1/(min([mx nx])-minlv))^2);
th3 = (min([mx nx])-minlv)*((th1/(min([mx nx])-minlv))^3);
h0 = 1 - ((2*th1*th3)/(3*th2^2));
qlim = th1*(((2.33*sqrt(2*th2*h0^2)/th1) + 1 + th2*h0*(h0-1)/th1^2)^(1/h0));
disp(' ')
s = sprintf('The Approximate 95 Percent Q limit is %g',qlim);
disp(s)
plot(1:mx,res,1:mx,res,'+',[1 mx],[qlim qlim],'--g')
s = sprintf('Value of Q with Approximate 95 Percent Limit Based on %g LV Model',minlv);
title(s)
xlabel('Sample Number')
ylabel('Value of Q')
pause
end
if nargout > 8
% Calculate t2lim
if minlv > 1
if mx > 300
t2lim = (minlv*(mx-1)/(mx-minlv))*ftest(.05,minlv,300);
else
t2lim = (minlv*(mx-1)/(mx-minlv))*ftest(.05,minlv,mx-minlv);
end
disp(' ')
s = sprintf('The 95 Percent T^2 limit is %g',t2lim);
disp(s)
disp(' ')
tvar = std(t);
tsqvals = sum((auto(t)').^2);
plot(1:mx,tsqvals,1:mx,tsqvals,'+',[0 mx],[t2lim t2lim],'--g')
s = sprintf('Value of T^2 with 95 Percent Limit Based on %g LV Model',minlv);
title(s)
xlabel('Sample Number')
ylabel('Value of T^2')
else
tvar = std(t);
disp('T^2 not calculated when number of latent variables = 1')
t2lim = 1.96^2;
end
end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -