⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 crossvus.m

📁 PLS_Toolbox是用于故障检测与诊断方面的matlab工具箱
💻 M
字号:
function [press,cumpress,rmsecv,rmsec] = crossvus(x,y,rm,cvi,lv,out,mc);
%CROSSVUS User defined subset cross-validation for PCA, PLS and PCR
%  CROSSVUS performs cross-validation for regression and
%  principal components analysis models using pre-defined
%  data subsets for calibration and test sets.
%
%  Inputs include the scaled predictor variable matrix (x), and
%  predicted variable (y), [not needed for PCA models]. Input (rm)
%  defines the regression method or PCA (rm) which can be:
%   rm = 'nip': PLS via NIPALS algorithm (slower iterative algorithm PLS)
%   rm = 'sim': PLS via SIMPLS algorithm (fast algorithm for PLS)
%   rm = 'pcr': PCR
%   rm = 'mlr': MLR
%   rm = 'pca': PCA
%
%  Input (cvi) is a vector with integer elements defining data subsets:
%    -2 the sample is always in the test set,
%    -1 the sample is always in the calibration set,
%     0 the sample is never used, and
%     1,2,3,... defines each subset.
%  Note that length(cvi) must = size(x,1);
%
%  Input (lv) is the number of latent variables or principal components
%  to calcluate, optional variable (out) is used to suppress output when
%  set to 0 [default = 1], and an optional variable which suppresses
%  mean centering of the subsets when set to 0 (mc).
%  Outputs are (press) the predictive residual error sum of squares PRESS
%  for each subset, the cumulative PRESS (cumpress), root mean square
%  error of cross validation (rmsecv), and the root mean square error of
%  calibration (rmsec). Note that for multivariate y the press
%  output is grouped by output variable, i.e. all of the PRESS values
%  for the first variable are followed by all of the PRESS values
%  for the second variable, etc.
%
%I/O: [press,cumpress,rmsecv,rmsec] = crossvus(x,y,rm,cvi,lv,out,mc);
% 
%See also: CROSSVAL

%Copyright Eigenvector Research, Inc. 1998-2000
%nbg  1/00

if strcmp(rm,'mlr')
  lv   = 1;
end
if isempty(x)
  error('ERROR - xblock matrix is empty')
elseif isempty(rm)
  error('ERROR - regression type not specified')
elseif isempty(cvi)
  error('ERROR - cross-validation subsets not specified')
elseif lv>min(size(x))
  error('ERROR - number of LVs must be <= min(size(x))')
elseif length(cvi)~=size(x,1)
  error('ERROR - length(cvi) must equal size(x,1)')
end
[mx,nx] = size(x);
if strcmp(rm,'pca')
  y     = zeros(mx,1);
  reg   = 0;
else
  reg   = 1;
end
if nargin<6
  out   = 1;
end
if nargin<7
  mc    = 1;
end
cumpress = zeros(size(y,2),lv);

isamp   = [1:mx]';            % eliminate cvi==0 samples
isamp   = delsamps(isamp,find(cvi==0));
x       = x(isamp,:);
y       = y(isamp,:);
cvi     = cvi(isamp,:);
[mx,nx] = size(x);            % identify subsets
ny      = size(y,2);
isamp   = [1:mx]';
itst0   = find(cvi==-2);      % test samples
ical0   = find(cvi==-1);      % cal samples
isamp   = isamp(find(cvi>0)); % subsets
if length(isamp)>0
  [cvi,cvj] = sort(cvi(isamp));
  isamp   = isamp(cvj);
  cvk     = find(diff(cvi));
  lcvk    = length(cvk)+1;
  samps   = cell(lcvk,1);
  for ii=1:lcvk-1
    samps{ii,1} = isamp(find(cvi==cvi(cvk(ii))));
  end
  samps{lcvk,1} = isamp(find(cvi==cvi(cvk(lcvk-1)+1)));
  press   = zeros(lcvk*ny,lv);
  %if nargout>4
  %  stdb    = press;
  %  bcv     = zeros(lcvk*ny,lv,
  %end
end
for ii=1:lcvk
  ical  = [ical0;delsamps([1:mx]',samps{ii})];
  itst  = [itst0;samps{ii}];
  if mc~=0
    [calx,mnsx] = mncn(x(ical,:));
    tstx  = scale(x(itst,:),mnsx);
    [caly,mnsy] = mncn(y(ical,:));
    tsty  = scale(y(itst,:),mnsy);
  else
    calx  = x(ical,:);
    tstx  = x(itst,:);
    caly  = y(ical,:);
    tsty  = y(itst,:);
  end
  switch rm
  case 'sim'
    bbr = simpls(calx,caly,lv,[],0);
  case 'pcr'
    bbr = pcr(calx,caly,lv,0);
  case 'nip'
    bbr = pls(calx,caly,lv,0);
  case 'mlr'
    bbr = (calx\caly)';
  case 'pca'
    [u,s,v] = svd(calx,0);
    rpca = eye(nx)-v(:,1)*v(:,1)';
    repmat = zeros(nx);
  otherwise
    error('ERROR - Regression method not of known type')
  end
  for j1= 1:lv
    if reg == 1
      ypred = tstx*bbr((j1-1)*ny+1:j1*ny,:)';
      press((ii-1)*ny+1:ii*ny,j1) = sum((ypred-tsty).^2)';
    else 
      for kkk = 1:nx
        repmat(:,kkk) = -(1/rpca(kkk,kkk))*rpca(:,kkk);
        repmat(kkk,kkk) = -1;
      end
      press(ii,j1) = sum(sum((tstx*repmat).^2));
      if j1~=lv
        rpca = rpca - v(:,j1+1)*v(:,j1+1)';
      end
    end
  end
end
clear calx tstx caly tsty ical itst ypred repmat
for jj=1:ny
  cumpress(jj,:) = sum(press(jj:ny:lcvk*ny,:),1);
end

if ny > 1
  [mp,np] = size(press);
  ind   = zeros(mp,1);
  blk  = mp/ny;
  for ii=1:ny
    ind((ii-1)*blk+1:blk*ii,1) = (ii:ny:mp)'; 
  end
  press = press(ind,:);
end

rmsecv  = sqrt(cumpress/mx);

switch rm
case 'sim'
  [bbr,ssq] = simpls(x,y,lv,[],0);
case 'pcr'
  [bbr,ssq] = pcr(x,y,lv,0);
case 'nip'
  [bbr,ssq] = pls(x,y,lv,0);
case 'mlr'
  bbr   = (x\y)';
case 'pca'
  %need to put in cov(x)...
  [u,s,v] = svd(x'*x/(mx-1),0);
  rpca  = eye(nx)-v(:,1)*v(:,1)';
  repmat = zeros(nx);
  rmsec = zeros(size(press));
  for j1=1:lv
    for kkk = 1:nx
      repmat(:,kkk) = -(1/rpca(kkk,kkk))*rpca(:,kkk);
      repmat(kkk,kkk) = -1;
    end
    rmsec(ii,j1) = sum(sum((x*repmat).^2));
    if j1~=lv
      rpca = rpca - v(:,j1+1)*v(:,j1+1)';
    end
  end
  rmsec = sqrt(sum(rmsec)/mx);
  if out~=0
    ssq = zeros(lv,4);
    ssq(:,1) = [1:lv]';
    ssq(:,2) = diag(s(1:lv,1:lv));
    ssq(:,3) = ssq(:,2)/sum(diag(s))*100;
    ssq(:,4) = cumsum(ssq(:,3));    
    h = plotyy([1:lv],ssq(1:lv,2),[1:lv],rmsecv);
    set(get(h(1),'children'),'color',[0 0 1],'marker','p')
    set(get(h(1),'ylabel'),'string','Eigenvalue of Cov(x) (p)', ...
      'color',[0 0 0])
    set(h(1),'ycolor',[0 0 0])
    set(get(h(2),'children'),'color',[1 0 0],'marker','s')
    set(get(h(2),'ylabel'),'string','RMSECV (s)')
    set(h(2),'ycolor',[0 0 0])
    axes(h(2))
   % h1 = line('marker','o','color','b','xdata',[1:lv],'ydata',rmsec);
    xlabel('Latent Variable')
    dispssqp(ssq,lv)
  end
end
if reg==1
  rmsec = zeros(size(cumpress));
  for j1= 1:lv
    ypred = x*bbr((j1-1)*ny+1:j1*ny,:)';
    rmsec(:,j1) = sum((ypred-y).^2)';
  end
  rmsec = sqrt(rmsec/mx);
  if (out~=0)&(~strcmp(rm,'mlr'))
    plot([1:lv],rmsecv,'-or',[1:lv],rmsec,'-sb')
   % legend('RMSECV','RMSEC')
    xlabel('Latent Variable')
    ylabel('RMSECV (o), RMSEC (s)')
    dispssqr(ssq,lv)
  end
end

function [] = dispssqr(ssq,lv)
disp('  ')
disp('       Percent Variance Captured by PLS Model   ')
disp('  ')
disp('           -----X-Block-----    -----Y-Block-----')
disp('   LV #    This LV    Total     This LV    Total ')
disp('   ----    -------   -------    -------   -------')
format = '   %3.0f     %6.2f    %6.2f     %6.2f    %6.2f';
for ii=1:lv
  tab = sprintf(format,ssq(ii,:)); disp(tab)
end
disp('  ')

function [] = dispssqp(ssq,lv)
disp('   ')
disp('        Percent Variance Captured by PCA Model')
disp('  ')
disp('Principal     Eigenvalue     % Variance     % Variance')
disp('Component         of          Captured       Captured')
disp(' Number         Cov(X)        This  PC        Total')
disp('---------     ----------     ----------     ----------')
format = '   %3.0f         %3.2e        %6.2f         %6.2f';
for ii=1:lv
  tab = sprintf(format,ssq(ii,:)); disp(tab)
end

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -