📄 npls.m
字号:
function [Xfactors,Yfactors,Core,B,ypred,ssx,ssy,reg] = npls(X,Y,Fac,show);
%NPLS multilinear partial least squares regression
%
% See also:
% 'parafac' 'tucker'
%
%
% MULTILINEAR PLS - N-PLS
%
% INPUT
% X Array of independent variables
% Y Array of dependent variables
% Fac Number of factors to compute
%
% OPTIONAL
% show If show = NaN, no outputs are given
%
%
% OUTPUT
% Xfactors Holds the components of the model of X in a cell array.
% Use fac2let to convert the parameters to scores and
% weight matrices. I.e., for a three-way array do
% [T,Wj,Wk]=fac2let(Xfactors);
% Yfactors Similar to Xfactors but for Y
% Core Core array used for calculating the model of X
% B The regression coefficients from which the scores in
% the Y-space are estimated from the scores in the X-
% space (U = TB);
% ypred The predicted values of Y for one to Fac components
% (array with dimension Fac in the last mode)
% ssx Variation explained in the X-space.
% ssx(f+1,1) is the sum-squared residual after first f factors.
% ssx(f+1,2) is the percentage explained by first f factors.
% ssy As above for the Y-space
% reg Cell array with regression coefficients for raw (preprocessed) X
%
%
% AUXILIARY
%
% If missing elements occur these must be represented by NaN.
%
%
% [Xfactors,Yfactors,Core,B,ypred,ssx,ssy,reg] = npls(X,y,Fac);
% or short
% [Xfactors,Yfactors,Core,B] = npls(X,y,Fac);
%
% Copyright, 1998 -
% This M-file and the code in it belongs to the holder of the
% copyrights and is made public under the following constraints:
% It must not be changed or modified and code cannot be added.
% The file must be regarded as read-only. Furthermore, the
% code can not be made part of anything but the 'N-way Toolbox'.
% In case of doubt, contact the holder of the copyrights.
%
% Rasmus Bro
% Chemometrics Group, Food Technology
% Department of Food and Dairy Science
% Royal Veterinary and Agricultutal University
% Rolighedsvej 30, DK-1958 Frederiksberg, Denmark
% Phone +45 35283296
% Fax +45 35283245
% E-mail rb@kvl.dk
%
% $ Version 1.02 $ Date July 1998 $ Not compiled $
% $ Version 1.03 $ Date 4. December 1998 $ Not compiled $ Cosmetic changes
% $ Version 1.04 $ Date 4. December 1999 $ Not compiled $ Cosmetic changes
% $ Version 1.05 $ Date July 2000 $ Not compiled $ error caused weights not to be normalized for four-way and higher
% $ Version 1.06 $ Date November 2000 $ Not compiled $ increase max it and decrease conv crit to better handle difficult data
% $ Version 2.00 $ May 2001 $ Changed to array notation $ RB $ Not compiled $
% $ Version 2.01 $ June 2001 $ Changed to handle new core in X $ RB $ Not compiled $
% $ Version 2.02 $ January 2002 $ Outputs all predictions (1 - LV components) $ RB $ Not compiled $
if nargin==0
disp(' ')
disp(' ')
disp(' THE N-PLS REGRESSION MODEL')
disp(' ')
disp(' Type <<help npls>> for more info')
disp(' ')
disp(' [Xfactors,Yfactors,Core,B,ypred,ssx,ssy] = npls(X,y,Fac);')
disp(' or short')
disp(' [Xfactors,Yfactors,Core,B] = npls(X,y,Fac);')
disp(' ')
return
elseif nargin<3
error(' The inputs X, y, and Fac must be given')
end
if ~exist('show')==1|nargin<4
show=1;
end
maxit=120;
DimX = size(X);
X = reshape(X,DimX(1),prod(DimX(2:end)));
ordX = length(DimX);if ordX==2&size(X,2)==1;ordX = 1;end
DimY = size(Y);
Y = reshape(Y,DimY(1),prod(DimY(2:end)));
ordY = length(DimY);if ordY==2&size(Y,2)==1;ordY = 1;end
[I,Jx]=size(X);
[I,Jy]=size(Y);
missX=0;
missy=0;
MissingX = 0;
MissingY = 0;
if any(isnan(X(:)))|any(isnan(Y(:)))
if any(isnan(X(:)))
MissingX=1;
else
MissingX=0;
end
if any(isnan(Y(:)))
MissingY=1;
else
MissingY=0;
end
if show~=0&~isnan(show)
disp(' ')
disp(' Don''t worry, missing values will be taken care of')
disp(' ')
end
missX=abs(1-isnan(X));
missy=abs(1-isnan(Y));
end
crit=1e-10;
B=zeros(Fac,Fac);
T=[];
U=[];
Qkron =[];
if MissingX
SSX=sum(sum(X(find(missX)).^2));
else
SSX=sum(sum(X.^2));
end
if MissingY
SSy=sum(sum(Y(find(missy)).^2));
else
SSy=sum(sum(Y.^2));
end
ssx=[];
ssy=[];
Xres=X;
Yres=Y;
xmodel=zeros(size(X));
Q=[];
W=[];
for num_lv=1:Fac
%init
u=rand(DimX(1),1);t=rand(DimX(1),1);tgl=t+2;it=0;
while (norm(t-tgl)/norm(t))>crit&it<maxit
tgl=t;
it=it+1;
% w=X'u
[wloads,wkron] = Xtu(X,u,MissingX,missX,Jx,DimX,ordX);
% t=Xw
if MissingX
for i=1:I,
m = find(missX(i,:));
t(i)=X(i,m)*wkron(m)/(wkron(m)'*wkron(m));
end
else
t=X*wkron;
end
% w=X'u
[qloads,qkron] = Xtu(Yres,t,MissingY,missy,Jy,DimY,ordY);
% u=yq
if MissingY
for i=1:I
m = find(missy(i,:));
u(i)=Yres(i,m)*qkron(m)/(qkron(m)'*qkron(m));
end
else
u=Yres*qkron;
end
end
T=[T t];
for i = 1:ordX-1
if num_lv == 1
W{i} = wloads{i};
else
W{i} = [W{i} wloads{i}];
end
end
U=[U u];
for i = 1:max(ordY-1,1)
if num_lv == 1
Q{i} = qloads{i};
else
Q{i} = [Q{i} qloads{i}];
end
end
Qkron = [Qkron qkron];
% Make core arrays
if ordX>1
Xfac{1}=T;Xfac(2:ordX)=W;
Core{num_lv} = calcore(reshape(X,DimX),Xfac,[],0,1);
else
Core{num_lv} = 1;
end
% if ordY>1
% Yfac{1}=U;Yfac(2:ordY)=Q;
% Ycore{num_lv} = calcore(reshape(Y,DimY),Yfac,[],0,1);
% else
% Ycore{num_lv} = 1;
% end
B(1:num_lv,num_lv)=inv(T'*T)*T'*U(:,num_lv);
if Jy > 1
if show~=0&~isnan(show)
disp(' ')
fprintf('number of iterations: %g',it);
disp(' ')
end
end
% Make X model
if ordX>2
Wkron = kron(W{end},W{end-1});
else
Wkron = W{end};
end
for i = ordX-3:-1:1
Wkron = kron(Wkron,W{i});
end
if num_lv>1
xmodel=T*reshape(Core{num_lv},num_lv,num_lv^(ordX-1))*Wkron';
else
xmodel = T*Core{num_lv}*Wkron';
end
% Make Y model
% if ordY>2
% Qkron = kron(Q{end},Q{end-1});
% else
% Qkron = Q{end};
% end
% for i = ordY-3:-1:1
% Qkron = kron(Qkron,Q{i});
% end
% if num_lv>1
% ypred=T*B(1:num_lv,1:num_lv)*reshape(Ycore{num_lv},num_lv,num_lv^(ordY-1))*Qkron';
% else
% ypred = T*B(1:num_lv,1:num_lv)*Ycore{num_lv}*Qkron';
% end
ypred=T*B(1:num_lv,1:num_lv)*Qkron';
Ypred(:,num_lv) = ypred(:); % Vectorize to avoid problems with different orders and the de-vectorize later on
Xres=X-xmodel;
Yres=Y-ypred;
if MissingX
ssx=[ssx;sum(sum(Xres(find(missX)).^2))];
else
ssx=[ssx;sum(sum(Xres.^2))];
end
if MissingY
ssy=[ssy;sum(sum((Y(find(missy))-ypred(find(missy))).^2))];
else
ssy=[ssy;sum(sum((Y-ypred).^2))];
end
end
ypred = reshape(Ypred',[size(Ypred,2) DimY]);
ypred = permute(ypred,[2:ordY+1 1]);
ssx= [ [SSX(1);ssx] [0;100*(1-ssx/SSX(1))]];
ssy= [ [SSy(1);ssy] [0;100*(1-ssy/SSy(1))]];
if show~=0&~isnan(show)
disp(' ')
disp(' Percent Variation Captured by N-PLS Model ')
disp(' ')
disp(' LV X-Block Y-Block')
disp(' ---- ------- -------')
ssq = [(1:Fac)' ssx(2:Fac+1,2) ssy(2:Fac+1,2)];
format = ' %3.0f %6.2f %6.2f';
for i = 1:Fac
tab = sprintf(format,ssq(i,:)); disp(tab)
end
end
Xfactors{1}=T;
for j = 1:ordX-1
Xfactors{j+1}=W{j};
end
Yfactors{1}=U;
for j = 1:max(ordY-1,1)
Yfactors{j+1}=Q{j};
end
% Calculate regression coefficients that apply directly to X
if nargout>7
if length(DimY)>2
error(' Regression coefficients are only calculated for models with vector Y or multivariate Y (not multi-way Y)')
end
R = outerm(W,0,1);
for iy=1:size(Y,2)
if length(DimX) == 2
dd = [DimX(2) 1];
else
dd = DimX(2:end);
end
for i=1:Fac
sR = R(:,1:i)*B(1:i,1:i)*diag(Q{1}(iy,1:i));
ssR = sum( sR',1)';
reg{iy,i} = reshape( ssR ,dd);
end
end
end
function [wloads,wkron] = Xtu(X,u,Missing,miss,J,DimX,ord);
% w=X'u
if Missing
for i=1:J
m = find(miss(:,i));
if (u(m)'*u(m))~=0
ww=X(m,i)'*u(m)/(u(m)'*u(m));
else
ww=X(m,i)'*u(m);
end
if length(ww)==0
w(i)=0;
else
w(i)=ww;
end
end
else
w=X'*u;
end
% Reshape to array
if length(DimX)>2
w_reshaped=reshape(w,DimX(2),prod(DimX(3:length(DimX))));
else
w_reshaped = w(:);
end
% Find one-comp decomposition
if length(DimX)==2
wloads{1} = w_reshaped/norm(w_reshaped);
elseif length(DimX)==3&~any(isnan(w_reshaped))
[w1,s,w2]=svd(w_reshaped);
wloads{1}=w1(:,1);
wloads{2}=w2(:,1);
else
wloads=parafac(reshape(w_reshaped,DimX(2:length(DimX))),1,[0 2 0 0 NaN]');
for j = 1:length(wloads);
wloads{j} = wloads{j}/norm(wloads{j});
end
end
% Apply sign convention
for i = 1:length(wloads)
sq = (wloads{i}.^2).*sign(wloads{i});
wloads{i} = wloads{i}*sign(sum(sq));
end
% Unfold solution
if length(wloads)==1
wkron = wloads{1};
else
wkron = kron(wloads{end},wloads{end-1});
for o = ord-3:-1:1
wkron = kron(wkron,wloads{o});
end
end
function [Factors,it,err,corcondia]=parafac(X,Fac,Options,const,OldLoad,FixMode,Weights);
% PARAFAC multiway parafac model
%
% See also:
% 'npls' 'tucker' 'dtld' 'gram'
%
%
% ___________________________________________________
%
% THE PARAFAC MODEL
% ___________________________________________________
%
% [Factors,it,err,corcondia,Weights] = parafac(X,Fac,Options,const,OldLoad,FixMode,Weights);
%
% or skipping optional in/outputs
%
% Factors = parafac(X,Fac);
%
% Algorithm for computing an N-way PARAFAC model. Optionally
% constraints can be put on individual modes for obtaining
% orthogonal, nonnegative, or unimodal solutions. The algorithm
% also handles missing data. For details of PARAFAC
% modeling see R. Bro, Chemom. Intell. Lab. Syst., 1997.
%
% Several possibilities exist for speeding up the algorithm.
% Compressing has been incorporated, so that large arrays can be
% compressed by using Tucker (see Bro & Andersson, Chemom.
% Intell. Lab. Syst., 1998).
% Another acceleration method incorporated here is to
% extrapolate the individual loading elements a number of
% iterations ahead after a specified number of iterations.
%
% A temporary MAT-file called TEMP.mat is saved for every
% 50 iterations. IF the computer breaks down or the model
% seems to be good enough, one can break the program and
% load the last saved estimate. The loadings in TEMP.MAT
% are given a cell array as described below and can be
% converted to A, B, C etc. by FAC2LET.M
%
% All loading vectors except in first mode are normalized,
% so that all variance is kept in the first mode (as is
% common in two-way PCA). The components are arranged as
% in PCA. After iterating, the most important component is
% made the first component etc.
%
%
%
% ----------------------INPUT---------------------
%
% X X is the input array, which can be from three- to N-way (also
% twoway if the third mode is interpreted as a onedimensional
% mode).
%
% Fac No of factors/components sought.
%
%
% ----------------OPTIONAL INPUT---------------------
%
% Options Optional parameters. If not given or set to zero or [],
% defaults will be used. If you want Options(5) to be 2 and
% not change others, simply write Options(5)=2. Even if Options
% hasn't been defined Options will contain zeros except its
% fifth element.
%
% Options(1) - Convergence criterion
% The relative change in fit for which the algorithm stops.
% Standard is 1e-6, but difficult data might require a lower value.
%
% Options(2) - Initialization method
% This option is ignored if PARAFAC is started with old values.
% If no default values are given the default Options(2) is 0.
% The advantage of using DTLD or SVD for initialization is that
% they often provide good starting values. However, since the
% initial values are then fixed, repeating the fitting will give
% the exact same solution. Therefore it is not possible to substantiate
% if a local minimum has been reached. To avoid that use an initialization
% based on random values (2).
%
% 0 = fit using DTLD/GRAM for initialization (default if three-way and no missing)
% 1 = fit using SVD vectors for initialization (default if higher than three-way or missing)
% 2 = fit using random orthogonalized values for initialization
% 10 = fit using the best-fitting models of several models
% fitted using a few iterations
%
% Options(3) - Plotting options
% 2=produces several graphical outputs (loadings shown during iterations)
% 1=as above, but graphics only shown after convergence
% 0=no plots
%
% Options(4) - Not user-accesible
%
% Options(5) - How often to show fit
% Determines how often the deviation between the model and the data
% is shown. This is helpful for adjusting the output to the number
% of iterations. Default is 10. If showfit is set to NaN, almost no
% outputs are given
%
% Options(6) - Maximal number of iterations
% Maximal number of iterations allowed. Default is 2500.
%
% const A vector telling type of constraints put on the loadings of the
% different modes. Same size as DimX but the i'th element tells
% what constraint is on that mode.
% 0 => no constraint,
% 1 => orthogonality
% 2 => nonnegativity
% 3 => unimodality (and nonnegativitiy)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -