⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 plscvblk.m

📁 偏最小二乘算法在MATLAB中的实现
💻 M
字号:
function [press,cumpress,minlv,b,r,w,p,qlim,t2lim,tvar] = plscvblk(x,y,split,lv,np,mc)
%PLSCVBLK Cross validation for PLS using contiguous data blocks
%  Inputs are the matrix of predictor variables (x), matrix
%  of predicted variables (y), number of divisions of the data
%  (split), maximum number of latent variables to calculate (lv),
%  an optional variable (np) which can be used to supress
%  the user override prompt to select optimum numbers of latent
%  variables and an optional variable (mc) which can be used to
%  set the routine so mean centering is not performed on each
%  cross validation. Outputs are the prediction residual error
%  sum of squares for each division (press), cumulative PRESS 
%  (cumpress), number of latent variables at minimum PRESS (minlv),
%  the final regression vector (b) at minimum PRESS, the PLS x-matrix
%  inverse (r), the x-block weights (w), the x-block loadings, 
%  the x-block Q statistic limit (qlim), T^2 statistic limit (t2lim)
%  and the variance of the scores on the LVs retained in 
%  the PLS model (tvar). 
%  
%  Note: This cross validation routine forms the test sets out of 
%  contiguous blocks of data and is the best choice when time
%  series data are being modelled. See PLSCV, PLSCV1 and PLSCVRND
%  for other methods of forming the cross validation test sets. 
%
%  I/O format is: 
%  [press,cumpress,minlv,b,w,p,qlim,t2lim,tvar]=plscvblk(x,y,split,lv,np,mc);
%  An input of 0 for np will suppress the prompt.
%  An input of 0 for mc will not mean center the subsets.

%  Copyright
%  Barry M. Wise
%  1991
%  Modified February 1994
%  Modified May 1994

[mx,nx] = size(x);
[my,ny] = size(y);
if mx ~= my
  error('Number of samples must be the same in both blocks')
end
if nargin < 5
  np = 1;
end
if nargin < 6
  mc = 1;
end
press = zeros(split*ny,lv);
ind = ones(split,2);
for i = 1:split
  ind(i,2) = round(i*mx/split);
end 
for i = 1:split-1
  ind(i+1,1) = ind(i,2) +1;
end
clc
for i = 1:split
  home
  s = sprintf('Now working on test set %g out of %g',i,split);
  disp(s)
  if mc ~= 0
    [calx,mnsx] = mncn([x(1:ind(i,1)-1,:); x(ind(i,2)+1:mx,:)]);
    testx = scale(x(ind(i,1):ind(i,2),:),mnsx);
    [caly,mnsy] = mncn([y(1:ind(i,1)-1,:); y(ind(i,2)+1:mx,:)]);
    testy = scale(y(ind(i,1):ind(i,2),:),mnsy);
  else
    calx = [x(1:ind(i,1)-1,:); x(ind(i,2)+1:mx,:)];
    testx = x(ind(i,1):ind(i,2),:);
    caly = [y(1:ind(i,1)-1,:); y(ind(i,2)+1:mx,:)];
    testy = y(ind(i,1):ind(i,2),:);
  end
  if ny > 1
    [p,q,w,t,u,b,ssqdif] = pls(calx,caly,lv);
    bbr = conpred(b,w,p,q,lv);
    for k = 2:lv
	  i1 = (k-1)*ny+1; i0 = i1-ny;
	  bbr(i1:k*ny,:) = bbr(i1:k*ny,:) + bbr(i0:(k-1)*ny,:);
    end
  else
    bbr = simpls1(calx,caly,lv);
  end
  for j = 1:lv
    ypred = testx*bbr((j-1)*ny+1:j*ny,:)';
    press((i-1)*ny+1:i*ny,j) = sum((ypred-testy).^2)';
  end
  plot(press((i-1)*ny+1:i*ny,:)')
  txt = sprintf('PRESS for Test Set Number %g out of %g',i,split);
  title(txt)
  xlabel('Number of Latent Variables')
  ylabel('PRESS')
  drawnow
end
pause(2)
cumpress = sum(press);
plot(cumpress)
title('Cumulative PRESS as a Function of Number of Latent Variables')
xlabel('Number of Latent Variables')
ylabel('PRESS')
drawnow
[a,minlv] = min(cumpress);
t = sprintf('Minimum Cumulative PRESS is at %g LVs',minlv);
disp(t)
if nargout > 3
  if np ~= 0
answ = input('Would you like to choose a different number of LVs?  (Yes = 1) ');
    if answ == 1
txt = sprintf('How many Latent Variables would you like? (Max = %g) ',minlv);
      minlv = input(txt);
    end
  end
  disp('  ')
  disp('Now working on final PLS model')
  [p,q,w,t,u,bb,ssqdif] = pls(x,y,minlv);
  b = conpred1(bb,w,p,q,minlv);
  plot(b), hold on, plot(b,'o'), plot(zeros(nx,1),'-g'), hold off
  s = sprintf('Regression Coefficients in Final Model with %g LVs',minlv);
  title(s)
  xlabel('Variable Number')
  ylabel('Regression Coefficient')
  pause
end
if nargout > 4
  r = w*inv(p'*w)*inv(t'*t)*t';
end
if nargout > 7
  % Calculate qlim
  res = sum(((x - t*p').^2)');
  th1 = sum(res)/(mx - 1);
  th2 = (min([mx nx])-minlv)*((th1/(min([mx nx])-minlv))^2);
  th3 = (min([mx nx])-minlv)*((th1/(min([mx nx])-minlv))^3);
  h0 = 1 - ((2*th1*th3)/(3*th2^2));
  qlim = th1*(((2.33*sqrt(2*th2*h0^2)/th1) + 1 + th2*h0*(h0-1)/th1^2)^(1/h0));
  disp('  ')
  s = sprintf('The Approximate 95 Percent Q limit is %g',qlim);
  disp(s)
  plot(1:mx,res,1:mx,res,'+',[1 mx],[qlim qlim],'--g')
  s = sprintf('Value of Q with Approximate 95 Percent Limit Based on %g LV Model',minlv);
  title(s)
  xlabel('Sample Number')
  ylabel('Value of Q')
  pause
end
if nargout > 8
  % Calculate t2lim
  if minlv > 1
    if mx > 300
      t2lim = (minlv*(mx-1)/(mx-minlv))*ftest(.05,minlv,300);
    else
      t2lim = (minlv*(mx-1)/(mx-minlv))*ftest(.05,minlv,mx-minlv);
    end
    disp('  ')
    s = sprintf('The 95 Percent T^2 limit is %g',t2lim);
    disp(s)
	disp('  ')
	tvar = std(t);
	tsqvals = sum((auto(t)').^2);
	plot(1:mx,tsqvals,1:mx,tsqvals,'+',[0 mx],[t2lim t2lim],'--g')
	s = sprintf('Value of T^2 with 95 Percent Limit Based on %g LV Model',minlv);
    title(s)
    xlabel('Sample Number')
    ylabel('Value of T^2')
  else
    tvar = std(t);
    disp('T^2 not calculated when number of latent variables = 1')
    t2lim = 1.96^2;
  end 
end

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -