⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pcrcv.m

📁 偏最小二乘算法在MATLAB中的实现
💻 M
字号:
function [press,cumpress,minpc,b,p,qlim,t2lim,tvar] = pcrcv(x,y,split,pc,np,mc)
%PCRCV Cross validation for PCR models
%  Inputs are the matrix of predictor variables (x), matrix
%  of predicted variables (y), number of divisions of the data
%  (split), maximum number of principal components to calculate (pc),
%  an optional variable (np) which can be used to supress
%  the user override prompt to select optimum numbers of principal
%  components and an optional variable (mc) which can be used to
%  set the routine so mean centering is not performed on each
%  cross validation. Outputs are the prediction residual error
%  sum of squares for each division (press), cumulative PRESS 
%  (cumpress), number of principal components at minimum PRESS (minpc),
%  the final regression vector or matrix (b) at minimum PRESS, the 
%  x-block loadings (p), the x-block Q statistic limit (qlim), 
%  T^2 statistic limit (t2lim), and variance of the scores on the PCs
%  retained in the model (tvar). 
%  
%  Note: This cross validation routine uses the "venetian blind"
%  method of forming test sets, e.g. leaving out every other
%  or every third, etc. sample. See PCRCV1, PCRCVRND and PCRCVBLK
%  for other methods of forming the test sets. 
%
%  I/O format is: 
%  [press,cumpress,minpc,b,p,qlim,t2lim,tvar] = pcrcv(x,y,split,pc,np,mc);
%  An input of 0 for np will suppress the prompt.
%  An input of 0 for mc will not mean center the subsets.

%  Copyright
%  Barry M. Wise
%  1991
%  Modified February 1994
%  Modified May 1994

[mx,nx] = size(x);
[my,ny] = size(y);
if mx ~= my
  error('Number of samples must be the same in both blocks')
end
if nargin < 5
  np = 1;
end
if nargin < 6
  mc = 1;
end
press = zeros(split*ny,pc);
clc
for i = 1:split
  home
  s = sprintf('Now working on test set %g out of %g',i,split);
  disp(s)
  ind = zeros(mx,1);
  count = 0;
  for j = 1:mx
    test = round((j+i-1)/split) - ((j+i-1)/split);
    if test == 0
      ind(j,1) = 1;
      count = count + 1;
    end
  end
  [a,b] = sort(ind);
  newx = x(b,:);
  newy = y(b,:);
  if mc ~= 0
    [calx,mnsx] = mncn(newx(1:mx-count,:));
    testx = scale(newx(mx-count+1:mx,:),mnsx);
    [caly,mnsy] = mncn(newy(1:mx-count,:));
    testy = scale(newy(mx-count+1:mx,:),mnsy);
  else
    calx = newx(1:mx-count,:);
    testx = newx(mx-count+1:mx,:);
    caly = newy(1:mx-count,:);
    testy = newy(mx-count+1:mx,:);
  end
  [t,p,bbr] = pcr(calx,caly,pc);
  for j = 1:pc
    ypred = testx*bbr((j-1)*ny+1:j*ny,:)';
	press((i-1)*ny+1:i*ny,j) = sum((ypred-testy).^2)';
  end
  plot(press((i-1)*ny+1:i*ny,:)')
  txt = sprintf('PRESS for Test Set Number %g out of %g',i,split);
  title(txt)
  xlabel('Number of Principal Components')
  ylabel('PRESS')
  drawnow
end
pause(2)
cumpress = sum(press);
plot(cumpress)
title('Cumulative PRESS as a Function of Number of Principal Components')
xlabel('Number of Principal Components')
ylabel('PRESS')
drawnow
[a,minpc] = min(cumpress);
t = sprintf('Minimum Cumulative PRESS is at %g pcs',minpc);
disp(t)
if nargout > 3
  if np ~= 0
answ = input('Would you like to choose a different number of pcs?  (Yes = 1) ');
    if answ == 1
txt = sprintf('How many Principal Components would you like? (Max = %g) ',minpc);
      minpc = input(txt);
    end
  end
  disp('  ')
  disp('Now working on final PCR model')
  [t,p,b,ssq,eigs] = pcr(x,y,minpc);
  b = b((minpc-1)*ny+1:minpc*ny,:)';
  plot(b), hold on, plot(b,'o'), plot(zeros(nx,1),'-g'), hold off
  s = sprintf('Regression Coefficients in Final Model with %g pcs',minpc);
  title(s)
  xlabel('Variable Number')
  ylabel('Regression Coefficient')
  pause
end
if nargout > 5
  % Calculate qlim
  res = sum(((x - t*p').^2)');
  th1 = sum(eigs(minpc+1:min([mx nx])));
  th2 = sum((eigs(minpc+1:min([mx nx]))).^2);
  th3 = sum((eigs(minpc+1:min([mx nx]))).^3);
  h0 = 1 - ((2*th1*th3)/(3*th2^2));
  if h0 <= 0, h0 = 0.0001, end
  qlim = th1*(((1.65*sqrt(2*th2*h0^2)/th1) + 1 + th2*h0*(h0-1)/th1^2)^(1/h0));
  disp('  ')
  s = sprintf('The 95 Percent Q limit is %g',qlim);
  disp(s)
  plot(1:mx,res,1:mx,res,'+',[1 mx],[qlim qlim],'--g')
  s = sprintf('Value of Q with 95 Percent Limit Based on %g PC Model',minpc);
  title(s)
  xlabel('Sample Number')
  ylabel('Value of Q')
  pause
end
if nargout > 6
  % Calculate t2lim
  if minpc > 1
    if mx > 300
      t2lim = (minpc*(mx-1)/(mx-minpc))*ftest(.05,minpc,300);
    else
      t2lim = (minpc*(mx-1)/(mx-minpc))*ftest(.05,minpc,mx-minpc);
    end
    disp('  ')
    s = sprintf('The 95 Percent T^2 limit is %g',t2lim);
    disp(s)
	disp('  ')
	tvar = std(t);
	tsqvals = sum((auto(t)').^2);
	plot(1:mx,tsqvals,1:mx,tsqvals,'+',[0 mx],[t2lim t2lim],'--g')
	s = sprintf('Value of T^2 with 95 Percent Limit Based on %g PC Model',minpc);
    title(s)
    xlabel('Sample Number')
    ylabel('Value of T^2')
  else
    tvar = std(t);
    disp('T^2 not calculated when number of latent variables = 1')
    t2lim = 1.96^2;
  end 
end

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -