⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 npls.m

📁 多维数据分析,有nPLS,PARAFAC,TURKER等
💻 M
📖 第 1 页 / 共 3 页
字号:
function [Xfactors,Yfactors,Core,B,ypred,ssx,ssy,reg] = npls(X,Y,Fac,show);

%NPLS multilinear partial least squares regression
%
% See also:
% 'parafac' 'tucker'
%
%
% MULTILINEAR PLS  -  N-PLS
%
% INPUT
% X        Array of independent variables
% Y        Array of dependent variables
% Fac      Number of factors to compute
% 
% OPTIONAL
% show	   If show = NaN, no outputs are given
%
%
% OUTPUT
% Xfactors Holds the components of the model of X in a cell array.
%          Use fac2let to convert the parameters to scores and
%          weight matrices. I.e., for a three-way array do
%          [T,Wj,Wk]=fac2let(Xfactors);
% Yfactors Similar to Xfactors but for Y
% Core     Core array used for calculating the model of X
% B        The regression coefficients from which the scores in
%          the Y-space are estimated from the scores in the X-
%          space (U = TB);
% ypred    The predicted values of Y for one to Fac components
%          (array with dimension Fac in the last mode)
% ssx      Variation explained in the X-space.
%          ssx(f+1,1) is the sum-squared residual after first f factors.
%          ssx(f+1,2) is the percentage explained by first f factors.
% ssy      As above for the Y-space
% reg      Cell array with regression coefficients for raw (preprocessed) X
%
%
% AUXILIARY
%
% If missing elements occur these must be represented by NaN.
%
%
% [Xfactors,Yfactors,Core,B,ypred,ssx,ssy,reg] = npls(X,y,Fac);
% or short
% [Xfactors,Yfactors,Core,B] = npls(X,y,Fac);
%

% Copyright, 1998 - 
% This M-file and the code in it belongs to the holder of the
% copyrights and is made public under the following constraints:
% It must not be changed or modified and code cannot be added.
% The file must be regarded as read-only. Furthermore, the
% code can not be made part of anything but the 'N-way Toolbox'.
% In case of doubt, contact the holder of the copyrights.
%
% Rasmus Bro
% Chemometrics Group, Food Technology
% Department of Food and Dairy Science
% Royal Veterinary and Agricultutal University
% Rolighedsvej 30, DK-1958 Frederiksberg, Denmark
% Phone  +45 35283296
% Fax    +45 35283245
% E-mail rb@kvl.dk
%


% $ Version 1.02 $ Date July 1998 $ Not compiled $
% $ Version 1.03 $ Date 4. December 1998 $ Not compiled $ Cosmetic changes
% $ Version 1.04 $ Date 4. December 1999 $ Not compiled $ Cosmetic changes
% $ Version 1.05 $ Date July 2000 $ Not compiled $ error caused weights not to be normalized for four-way and higher
% $ Version 1.06 $ Date November 2000 $ Not compiled $ increase max it and decrease conv crit to better handle difficult data
% $ Version 2.00 $ May 2001 $ Changed to array notation $ RB $ Not compiled $
% $ Version 2.01 $ June 2001 $ Changed to handle new core in X $ RB $ Not compiled $
% $ Version 2.02 $ January 2002 $ Outputs all predictions (1 - LV components) $ RB $ Not compiled $

if nargin==0
   disp(' ')
   disp(' ')
   disp(' THE N-PLS REGRESSION MODEL')
   disp(' ')
   disp(' Type <<help npls>> for more info')
   disp('  ')
   disp(' [Xfactors,Yfactors,Core,B,ypred,ssx,ssy] = npls(X,y,Fac);')
   disp(' or short')
   disp(' [Xfactors,Yfactors,Core,B] = npls(X,y,Fac);')
   disp(' ')
   return
elseif nargin<3
   error(' The inputs X, y, and Fac must be given')
end



if ~exist('show')==1|nargin<4
   show=1;
end

maxit=120;

DimX = size(X);
X = reshape(X,DimX(1),prod(DimX(2:end)));
ordX = length(DimX);if ordX==2&size(X,2)==1;ordX = 1;end
DimY = size(Y);
Y = reshape(Y,DimY(1),prod(DimY(2:end)));
ordY = length(DimY);if ordY==2&size(Y,2)==1;ordY = 1;end


[I,Jx]=size(X);
[I,Jy]=size(Y);

missX=0;
missy=0;
MissingX = 0;
MissingY = 0;
if any(isnan(X(:)))|any(isnan(Y(:)))
   if any(isnan(X(:)))
      MissingX=1;
   else
      MissingX=0;
   end
   if any(isnan(Y(:)))
      MissingY=1;
   else
      MissingY=0;
   end
   if show~=0&~isnan(show)
      disp(' ')
      disp(' Don''t worry, missing values will be taken care of')
      disp(' ')
   end
   missX=abs(1-isnan(X));
   missy=abs(1-isnan(Y));
end
crit=1e-10;
B=zeros(Fac,Fac);
T=[];
U=[];
Qkron =[];
if MissingX
   SSX=sum(sum(X(find(missX)).^2));
else
   SSX=sum(sum(X.^2));
end
if MissingY
   SSy=sum(sum(Y(find(missy)).^2));
else
   SSy=sum(sum(Y.^2));
end
ssx=[];
ssy=[];
Xres=X;
Yres=Y;
xmodel=zeros(size(X));
Q=[];
W=[];

for num_lv=1:Fac
   
   %init
   u=rand(DimX(1),1);t=rand(DimX(1),1);tgl=t+2;it=0;
   while (norm(t-tgl)/norm(t))>crit&it<maxit
      tgl=t;
      it=it+1;
      
      % w=X'u
      [wloads,wkron] = Xtu(X,u,MissingX,missX,Jx,DimX,ordX);
      
      % t=Xw
      if MissingX
         for i=1:I,
            m = find(missX(i,:));
            t(i)=X(i,m)*wkron(m)/(wkron(m)'*wkron(m));
         end
      else
         t=X*wkron;
      end
      
      % w=X'u
      [qloads,qkron] = Xtu(Yres,t,MissingY,missy,Jy,DimY,ordY);
      % u=yq
      if MissingY
         for i=1:I
            m = find(missy(i,:));
            u(i)=Yres(i,m)*qkron(m)/(qkron(m)'*qkron(m));
         end
      else
         u=Yres*qkron;
      end
   end
   
   T=[T t];
   for i = 1:ordX-1
      if num_lv == 1
         W{i} = wloads{i};
      else
         W{i} = [W{i} wloads{i}];
      end
   end
   U=[U u];
   for i = 1:max(ordY-1,1)
      if num_lv == 1
         Q{i} = qloads{i};
      else
         Q{i} = [Q{i} qloads{i}];
      end
   end
   Qkron = [Qkron qkron];
  
   % Make core arrays
   if ordX>1
      Xfac{1}=T;Xfac(2:ordX)=W;
      Core{num_lv} = calcore(reshape(X,DimX),Xfac,[],0,1);
   else
      Core{num_lv} = 1;
   end
%   if ordY>1
%      Yfac{1}=U;Yfac(2:ordY)=Q;
%      Ycore{num_lv} = calcore(reshape(Y,DimY),Yfac,[],0,1);
%   else
%      Ycore{num_lv} = 1;
%   end
   
   
   B(1:num_lv,num_lv)=inv(T'*T)*T'*U(:,num_lv);
   
   if Jy > 1
      if show~=0&~isnan(show)
         disp(' ') 
         fprintf('number of iterations: %g',it);
         disp(' ')
      end
   end
   
   % Make X model
   if ordX>2
      Wkron = kron(W{end},W{end-1});
   else
      Wkron = W{end};
   end
   for i = ordX-3:-1:1
      Wkron = kron(Wkron,W{i});
   end
   if num_lv>1
      xmodel=T*reshape(Core{num_lv},num_lv,num_lv^(ordX-1))*Wkron';
   else
      xmodel = T*Core{num_lv}*Wkron';
   end
   
   % Make Y model   
 %  if ordY>2
 %     Qkron = kron(Q{end},Q{end-1});
 %  else
 %     Qkron = Q{end};
 %  end
 %  for i = ordY-3:-1:1
 %     Qkron = kron(Qkron,Q{i});
 %  end
 %  if num_lv>1
 %     ypred=T*B(1:num_lv,1:num_lv)*reshape(Ycore{num_lv},num_lv,num_lv^(ordY-1))*Qkron';
 %  else
 %     ypred = T*B(1:num_lv,1:num_lv)*Ycore{num_lv}*Qkron';
 %  end
 ypred=T*B(1:num_lv,1:num_lv)*Qkron';
 Ypred(:,num_lv) = ypred(:); % Vectorize to avoid problems with different orders and the de-vectorize later on
   
   Xres=X-xmodel; 
   Yres=Y-ypred;
   if MissingX
      ssx=[ssx;sum(sum(Xres(find(missX)).^2))];
   else
      ssx=[ssx;sum(sum(Xres.^2))];
   end
   if MissingY
      ssy=[ssy;sum(sum((Y(find(missy))-ypred(find(missy))).^2))];
   else
      ssy=[ssy;sum(sum((Y-ypred).^2))];
   end
end
ypred = reshape(Ypred',[size(Ypred,2) DimY]);
ypred = permute(ypred,[2:ordY+1 1]);

ssx= [ [SSX(1);ssx] [0;100*(1-ssx/SSX(1))]];
ssy= [ [SSy(1);ssy] [0;100*(1-ssy/SSy(1))]];

if show~=0&~isnan(show)
   disp('  ')
   disp('   Percent Variation Captured by N-PLS Model   ')
   disp('  ')
   disp('   LV      X-Block    Y-Block')
   disp('   ----    -------    -------')
   ssq = [(1:Fac)' ssx(2:Fac+1,2) ssy(2:Fac+1,2)];
   format = '   %3.0f     %6.2f     %6.2f';
   for i = 1:Fac
      tab = sprintf(format,ssq(i,:)); disp(tab)
   end
end

Xfactors{1}=T;
for j = 1:ordX-1
   Xfactors{j+1}=W{j};
end

Yfactors{1}=U;
for j = 1:max(ordY-1,1)
   Yfactors{j+1}=Q{j};
end


% Calculate regression coefficients that apply directly to X
  if nargout>7
    if length(DimY)>2
      error(' Regression coefficients are only calculated for models with vector Y or multivariate Y (not multi-way Y)')
    end
      R = outerm(W,0,1);
    for iy=1:size(Y,2)
      if length(DimX) == 2
        dd = [DimX(2) 1];
      else
        dd = DimX(2:end);
      end
      for i=1:Fac
        sR = R(:,1:i)*B(1:i,1:i)*diag(Q{1}(iy,1:i));
        ssR = sum( sR',1)';
        reg{iy,i} = reshape( ssR ,dd);
      end       
    end
    
  end




function [wloads,wkron] = Xtu(X,u,Missing,miss,J,DimX,ord);


% w=X'u
if Missing
   for i=1:J
      m = find(miss(:,i));
      if (u(m)'*u(m))~=0
        ww=X(m,i)'*u(m)/(u(m)'*u(m));
      else
        ww=X(m,i)'*u(m);
      end
      if length(ww)==0
         w(i)=0;
      else
         w(i)=ww;
      end
   end
else
   w=X'*u;
end

% Reshape to array
if length(DimX)>2
   w_reshaped=reshape(w,DimX(2),prod(DimX(3:length(DimX))));
else
   w_reshaped = w(:);
end


% Find one-comp decomposition
if length(DimX)==2
   wloads{1} = w_reshaped/norm(w_reshaped);
   
elseif length(DimX)==3&~any(isnan(w_reshaped))
   [w1,s,w2]=svd(w_reshaped);
   wloads{1}=w1(:,1);
   wloads{2}=w2(:,1);
else
   wloads=parafac(reshape(w_reshaped,DimX(2:length(DimX))),1,[0 2 0 0 NaN]');
   for j = 1:length(wloads);
      wloads{j} = wloads{j}/norm(wloads{j});
   end
end

% Apply sign convention
for i = 1:length(wloads)
   sq = (wloads{i}.^2).*sign(wloads{i});
   wloads{i} = wloads{i}*sign(sum(sq));
end


% Unfold solution
if length(wloads)==1
   wkron = wloads{1};
else
   wkron = kron(wloads{end},wloads{end-1});
   for o = ord-3:-1:1
      wkron = kron(wkron,wloads{o});
   end
end


function [Factors,it,err,corcondia]=parafac(X,Fac,Options,const,OldLoad,FixMode,Weights);

% PARAFAC multiway parafac model
%
% See also:
% 'npls' 'tucker' 'dtld' 'gram'
%
%
%     ___________________________________________________
%
%                  THE PARAFAC MODEL
%     ___________________________________________________
% 
% [Factors,it,err,corcondia,Weights] = parafac(X,Fac,Options,const,OldLoad,FixMode,Weights);
%
% or skipping optional in/outputs
%
% Factors = parafac(X,Fac);
%
% Algorithm for computing an N-way PARAFAC model. Optionally
% constraints can be put on individual modes for obtaining 
% orthogonal, nonnegative, or unimodal solutions. The algorithm
% also handles missing data. For details of PARAFAC 
% modeling see R. Bro, Chemom. Intell. Lab. Syst., 1997.
%
% Several possibilities exist for speeding up the algorithm. 
% Compressing has been incorporated, so that large arrays can be
% compressed by using Tucker (see Bro & Andersson, Chemom. 
% Intell. Lab. Syst., 1998).
% Another acceleration method incorporated here is to 
% extrapolate the individual loading elements a number of 
% iterations ahead after a specified number of iterations.
%
% A temporary MAT-file called TEMP.mat is saved for every 
% 50 iterations. IF the computer breaks down or the model 
% seems to be good enough, one can break the program and 
% load the last saved estimate. The loadings in TEMP.MAT
% are given a cell array as described below and can be 
% converted to A, B, C etc. by FAC2LET.M
% 
% All loading vectors except in first mode are normalized, 
% so that all variance is kept in the first mode (as is 
% common in two-way PCA). The components are arranged as
% in PCA. After iterating, the most important component is
% made the first component etc.
%
%
%
% ----------------------INPUT---------------------
%
% X          X is the input array, which can be from three- to N-way (also
%            twoway if the third mode is interpreted as a onedimensional
%            mode). 
%
% Fac		    No of factors/components sought.
%
%
% ----------------OPTIONAL INPUT---------------------
%
% Options    Optional parameters. If not given or set to zero or [], 
%            defaults will be used. If you want Options(5) to be 2 and
%            not change others, simply write Options(5)=2. Even if Options
%            hasn't been defined Options will contain zeros except its
%            fifth element.
%
%            Options(1) - Convergence criterion
%            The relative change in fit for which the algorithm stops.
%            Standard is 1e-6, but difficult data might require a lower value.
%  
%            Options(2) - Initialization method
%            This option is ignored if PARAFAC is started with old values.
%            If no default values are given the default Options(2) is 0.
%            The advantage of using DTLD or SVD for initialization is that
%            they often provide good starting values. However, since the 
%            initial values are then fixed, repeating the fitting will give
%            the exact same solution. Therefore it is not possible to substantiate
%            if a local minimum has been reached. To avoid that use an initialization
%            based on random values (2).
%
%            0  = fit using DTLD/GRAM for initialization (default if three-way and no missing)
%            1  = fit using SVD vectors for initialization (default if higher than three-way or missing)
%            2  = fit using random orthogonalized values for initialization
%            10 = fit using the best-fitting models of several models
%            fitted using a few iterations
%
%            Options(3) - Plotting options
%            2=produces several graphical outputs (loadings shown during iterations)
%            1=as above, but graphics only shown after convergence
%            0=no plots
%
%            Options(4) - Not user-accesible
% 
%            Options(5) - How often to show fit
%            Determines how often the deviation between the model and the data
%            is shown. This is helpful for adjusting the output to the number
%            of iterations. Default is 10. If showfit is set to NaN, almost no
%            outputs are given 
%
%            Options(6) - Maximal number of iterations
%            Maximal number of iterations allowed. Default is 2500.
%
% const      A vector telling type of constraints put on the loadings of the
%            different modes. Same size as DimX but the i'th element tells
%            what constraint is on that mode.
%            0 => no constraint,
%            1 => orthogonality
%            2 => nonnegativity
%            3 => unimodality (and nonnegativitiy)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -