⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pcrcvblk.m

📁 偏最小二乘算法在MATLAB中的实现
💻 M
字号:
function [press,cumpress,minpc,b,p,qlim,t2lim,tvar] = pcrcvblk(x,y,split,pc,np,mc)
%PCRCVBLK Cross validation for PCR models using contiguous blocks
%  Inputs are the matrix of predictor variables (x), matrix
%  of predicted variables (y), number of divisions of the data
%  (split), maximum number of principal components to calculate (pc),
%  an optional variable (np) which can be used to supress
%  the user override prompt to select optimum numbers of principal
%  components and an optional variable (mc) which can be used to
%  set the routine so mean centering is not performed on each
%  cross validation. Outputs are the prediction residual error
%  sum of squares for each division (press), cumulative PRESS 
%  (cumpress), number of principal components at minimum PRESS (minpc),
%  the final regression vector or matrix (b) at minimum PRESS, the 
%  x-block loadings (p), the x-block Q statistic limit (qlim), 
%  T^2 statistic limit (t2lim) and variance of the scores on the
%  PCs retained in the model (tvar).
%  
%  Note: This cross validation routine forms the test sets out of 
%  contiguous blocks of data and is the best choice when time
%  series data are being modelled.  See PCRCV, PCRCV1 and PCRCVRND
%  for other methods of forming the test sets. 
%
%  I/O format is: 
%  [press,cumpress,minpc,b,p,qlim,t2lim] = pcrcvblk(x,y,split,pc,np,mc);
%  An input of 0 for np will suppress the prompt.
%  An input of 0 for mc will not mean center the subsets.

%  Copyright
%  Barry M. Wise
%  1991
%  Modified February 1994
%  Modified May 1994

[mx,nx] = size(x);
[my,ny] = size(y);
if mx ~= my
  error('Number of samples must be the same in both blocks')
end
if nargin < 5
  np = 1;
end
if nargin < 6
  mc = 1;
end
press = zeros(split*ny,pc);
ind = ones(split,2);
for i = 1:split
  ind(i,2) = round(i*mx/split);
end 
for i = 1:split-1
  ind(i+1,1) = ind(i,2) +1;
end
clc
for i = 1:split
  home
  s = sprintf('Now working on test set %g out of %g',i,split);
  disp(s)
  if mc ~= 0
    [calx,mnsx] = mncn([x(1:ind(i,1)-1,:); x(ind(i,2)+1:mx,:)]);
    testx = scale(x(ind(i,1):ind(i,2),:),mnsx);
    [caly,mnsy] = mncn([y(1:ind(i,1)-1,:); y(ind(i,2)+1:mx,:)]);
    testy = scale(y(ind(i,1):ind(i,2),:),mnsy);
  else
    calx = [x(1:ind(i,1)-1,:); x(ind(i,2)+1:mx,:)];
    testx = x(ind(i,1):ind(i,2),:);
    caly = [y(1:ind(i,1)-1,:); y(ind(i,2)+1:mx,:)];
    testy = y(ind(i,1):ind(i,2),:);
  end
  [t,p,bbr] = pcr(calx,caly,pc);
  for j = 1:pc
    ypred = testx*bbr((j-1)*ny+1:j*ny,:)';
	press((i-1)*ny+1:i*ny,j) = sum((ypred-testy).^2)';
  end
  plot(press((i-1)*ny+1:i*ny,:)')
  txt = sprintf('PRESS for Test Set Number %g out of %g',i,split);
  title(txt)
  xlabel('Number of Principal Components')
  ylabel('PRESS')
  drawnow
end
pause(2)
cumpress = sum(press);
plot(cumpress)
title('Cumulative PRESS as a Function of Number of Principal Components')
xlabel('Number of Principal Components')
ylabel('PRESS')
drawnow
[a,minpc] = min(cumpress);
t = sprintf('Minimum Cumulative PRESS is at %g pcs',minpc);
disp(t)
if nargout > 3
  if np ~= 0
answ = input('Would you like to choose a different number of pcs?  (Yes = 1) ');
    if answ == 1
txt = sprintf('How many Principal Components would you like? (Max = %g) ',minpc);
      minpc = input(txt);
    end
  end
  disp('  ')
  disp('Now working on final PCR model')
  [t,p,b,ssq,eigs] = pcr(x,y,minpc);
  b = b((minpc-1)*ny+1:minpc*ny,:)';
  plot(b), hold on, plot(b,'o'), plot(zeros(nx,1),'-g'), hold off
  s = sprintf('Regression Coefficients in Final Model with %g pcs',minpc);
  title(s)
  xlabel('Variable Number')
  ylabel('Regression Coefficient')
  pause
end
if nargout > 5
  % Calculate qlim
  res = sum(((x - t*p').^2)');
  th1 = sum(eigs(minpc+1:min([mx nx])));
  th2 = sum((eigs(minpc+1:min([mx nx]))).^2);
  th3 = sum((eigs(minpc+1:min([mx nx]))).^3);
  h0 = 1 - ((2*th1*th3)/(3*th2^2));
  if h0 <= 0, h0 = 0.0001, end
  qlim = th1*(((1.65*sqrt(2*th2*h0^2)/th1) + 1 + th2*h0*(h0-1)/th1^2)^(1/h0));
  disp('  ')
  s = sprintf('The 95 Percent Q limit is %g',qlim);
  disp(s)
  plot(1:mx,res,1:mx,res,'+',[1 mx],[qlim qlim],'--g')
  s = sprintf('Value of Q with 95 Percent Limit Based on %g PC Model',minpc);
  title(s)
  xlabel('Sample Number')
  ylabel('Value of Q')
  pause
end
if nargout > 6
  % Calculate t2lim
  if minpc > 1
    if mx > 300
      t2lim = (minpc*(mx-1)/(mx-minpc))*ftest(.05,minpc,300);
    else
      t2lim = (minpc*(mx-1)/(mx-minpc))*ftest(.05,minpc,mx-minpc);
    end
    disp('  ')
    s = sprintf('The 95 Percent T^2 limit is %g',t2lim);
    disp(s)
	disp('  ')
	tvar = std(t);
	tsqvals = sum((auto(t)').^2);
	plot(1:mx,tsqvals,1:mx,tsqvals,'+',[0 mx],[t2lim t2lim],'--g')
	s = sprintf('Value of T^2 with 95 Percent Limit Based on %g PC Model',minpc);
    title(s)
    xlabel('Sample Number')
    ylabel('Value of T^2')
  else
    tvar = std(t);
    disp('T^2 not calculated when number of latent variables = 1')
    t2lim = 1.96^2;
  end 
end

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -