📄 pls_core.m
字号:
function pls=pls_core(X,Y,varargin)
% *************************************************************************
% * *
% * ---=== PLS ===--- *
% * *
% *************************************************************************
%
% Algorithm version 1 (PLS-1 version)
%
% This algorith handles full cross-validation.
%
% INPUT:
% Xm - data matrix of size NxM, where N is a number of objects and M is a
% number of variables.
% Ym - Y data vector of size Nx1, where N is a number of objects.
%
% OPTIONS:
% Options description syntax:
% - option_name, parameter_1, parameter_2, ..., parameter_n - description
% Example of command line syntax:
% pls_core(Xm,Ym,'option_name', p_1, p_2);
% Options list:
% - test, Xt, Yt - data marix and Y data vestor of test set required to
% calculate SDEP, r2t etc. values.
% - nsv - normal calculation of the stability vector (default)
% - rsv - robust calculation of the stability vector
% - rmsep - standard calculation os RMSEP parameter (default)
% - rgie - Gieleciak's method of RMSEP calculation
% - A, A - maximal number of latent componets
% Optimal number of latent components (Aopt) depends on method of RMSEP
% calculation. Options are as follows:
% - fmin - Aopt is the position of first minimum in RMSEP
% - min - Aopt is the position of global minimum in RMSEP
% - flc - Aopt is A
% - optim - Aopt is computed according to options. Computed Aopt is used
% instead of A and computation is repeated and results in final Aopt
% value. It shoud not be used together with 'flc' option because 'flc'
% overrides all methods of Aopt calculation.
% Options list: (continued)
% - cv, type - cross-validation type. 1 results in LOO, >1 results in LSO
% - cv_step, step - number of combination ommited in each loop. Useful in
% connection with LSO cross-validation.
% - full - gives full answer also with some low level outputs.
%
% OUTPUT:
% This program creates output structure containing fields:
% q2: cross-validated r2
% s: cross-validated standard error
% q2m: cross-validated r2 - cv model set
% sm: cross-validated standard error - cv model set
% q2a: cross-validated r2 - cv model + cv test sets
% RMS: Root Mean Square deviation
% SDEP: Standard Deviation Error of Prediction *)
% r2: r2
% r2t: r2 - test set *)
% Ymp: Y model prediction
% Ytp: Y test prediction *)
% bb: set of cross-validated PLS coefficients (see Centner's publication)
% sv: stability vector
% b: PLS coefficients
% b0: bias
% A: maximal number of PLS latent components
% Aopt: model complexity
%
% *) This output parameters are computed only if test set is used
%
% COPYRIGHT (c) 2006 tmag@us.edu.pl & rgielec@us.edu.pl
% *************************************************************************
% Reading basic parameters
% *************************************************************************
% varargin v1.2 ...
% default
test=false;
cv=1;
cv_step=1;
preproces=1;
A=10;
Amet='fmin';
Argie='rmsep';
optim=1; % no optim
svmet='nsv';
full=false; % full answer, possibly big
% reading
for vai=1:length(varargin)
c_vai=varargin{vai};
if ischar(c_vai)
switch c_vai
case 'test'
Xt=varargin{vai+1};
Yt=varargin{vai+2};
test=true;
case 'nsv'
svmet='nsv'; % normal (default)
case 'rsv'
svmet='rsv'; % robust
case 'fmin'
Amet='fmin'; % first minimum (default)
case 'min'
Amet='min'; % minimum
case 'flc'
Amet='flc'; % fixed
case 'optim'
optim=2; % optimal Aopt
case 'rgie'
Argie='rgie'; % Gieleciak's method rgielec@us.edu.pl
case 'rmsep'
Argie='rmsep'; % Normal method (default)
case 'A'
A=varargin{vai+1};
case 'cv'
cv=varargin{vai+1};
case 'cv_step'
cv_step=varargin{vai+1};
case 'full'
full=true;
end
end
end
% default
% ... varargin
% *************************************************************************
% Model calculation
% *************************************************************************
% optim!
for coptim=1:optim
% optim
if optim==2&coptim==2
A=mc.Aopt;
end
% Model construction
mc=emb_mc(X,Y,A,cv,cv_step,preproces);
% Gieleciak's method
if strcmp(Argie,'rgie')
RMSEPm=sqrt(mc.PRESSm./(-[1:mc.A]-1+mc.row));
RMSEPt=sqrt(mc.PRESSt./(-[1:mc.A]-1+mc.row));
mc.RMSEPm=RMSEPm;
mc.RMSEPt=RMSEPt;
end
% Aopt
mc.Amet=Amet;
if strcmp(mc.Amet,'flc')
mc.Aopt=mc.A;
else
mc.Aopt=emb_aopt(mc);
end
%mc.Ycvp=mc.Ycvp(:,mc.Aopt);
end
% CV estimators
cve=emb_cve(mc,X,Y);
mc.svmet=svmet;
cve.sv=emb_sv(mc);
% Final model
warning off;
fm=emb_fm(X,Y,mc,preproces);
warning on;
fm.Aopt=mc.Aopt;
% Model Fitted statistics like estimators
mfse=emb_fse(X,Y,fm);
% Test Fitted statistics like estimators
if test
tfse=emb_fse(Xt,Yt,fm);
end
% *************************************************************************
% Output structure
% *************************************************************************
pls.q2=cve.q2;
pls.s=cve.s;
pls.q2m=cve.q2m;
pls.sm=cve.sm;
pls.q2a=cve.q2a;
pls.RMS=mfse.RMS;
if test
pls.SDEP=tfse.RMS;
end
%pls.rmt2=1/(RMStest/RMS);
pls.r2=mfse.r2;
if test
pls.r2t=tfse.r2;
end
pls.Ymp=mfse.Yp;
if test
pls.Ytp=tfse.Yp;
end
pls.bb=mc.bb;
pls.sv=cve.sv;
pls.b=fm.b;
pls.b0=fm.b0;
pls.A=mc.A;
pls.Aopt=mc.Aopt;
pls.RMSEP=mc.RMSEPt;
% full
if full
pls.mc=mc;
pls.cve=cve;
pls.mfse=mfse;
pls.fm=fm;
if test
pls.r2t=tfse.r2;
end
end
% *************************************************************************
% End of main function
% *************************************************************************
% *********************************************************************** %
% %
% ---=== Embedded functions ===--- %
% %
% *********************************************************************** %
% ----------------------------------------------------------------------- %
function pls=emb_mc(X,Y,A,cv,cv_step,preproces)
% Model construction
% Cross-Validation
[row columns]=size(X);
if A>=columns
A=columns-1;
end
if A<1
A=1;
end
n=1:row;
X1=X(n,:); Y1=Y(n,:);
Ymp=zeros(row,A);
Ytp=zeros(row,A);
PRESS=zeros(1,A);
% first combination
alls=1:row;
tess=1:cv;
mods=alls; mods(tess)=[];
moo=zeros(row,1); % model objects occurences
too=zeros(row,1); % test objects occurences
% number of combinations
icom=round(factorial(row)/factorial(cv)/factorial(row-cv));
bb=zeros(floor((icom)/cv_step),columns);
ricom=1; % real
while ricom<=icom % cv loop now it is simple loo
Xtest=X1(tess,:);
Ytest=Y1(tess,:);
Xcal=X1(mods,:);
Ycal=Y1(mods,:);
moo(mods)=moo(mods)+1;
too(tess)=too(tess)+1;
[Xm Ym]=emb_prepro(preproces,Xcal,Ycal);
% PLS 1
scores=[]; loadings=[]; weights=[]; Q=[];
for a=1:A
cc=(Ym'*Xm*Xm'*Ym)^(-0.5);
ww=cc*Xm'*Ym;
tt=Xm*ww;
pp=Xm'*tt/(tt'*tt);
qq=Ym'*tt/(tt'*tt);
Xm=Xm-tt*pp';
Ym=Ym-tt*qq;
weights=[weights,ww];
scores=[scores,tt];
loadings=[loadings,pp];
Q=[Q;qq];
b=weights*inv(loadings'*weights)*Q;
if preproces==2
b=std(Y)*b./(std(X))';
b0=mean(Y,1)-std(Y)*mean(X,1)./std(X)*b;
else
b0=mean(Ycal,1)-mean(Xcal,1)*b;
end
Ymp(mods,a)=Ymp(mods,a)+b0+Xcal*b;
Ytp(tess,a)=Ytp(tess,a)+b0+Xtest*b;
Res=Ytest-Ytp(tess,a);
PRESS(1,a)=Res'*Res+PRESS(1,a);
end
% because of ga algorithm
if prod(size(b))
bb(ricom,:)=b;
end
% next combination
for a=1:cv_step
tess=emb_gncom(tess,row);
ricom=ricom+1;
end
mods=alls; mods(tess)=[];
end
% mean values of Y#p
m=ones(1,A);
Ymp=Ymp./(moo*m);
Ytp=Ytp./(too*m);
PRESSt=sum((Ytp-Y*m).^2,1);
PRESSm=sum((Ymp-Y*m).^2,1);
pls.PRESSt=PRESSt;
pls.PRESSm=PRESSm;
pls.Ytp=Ytp;
pls.Ymp=Ymp;
pls.A=A;
pls.columns=columns;
pls.row=row;
pls.bb=bb;
pls.RMSEPt=sqrt(pls.PRESSt/pls.row);
pls.RMSEPm=sqrt(pls.PRESSm/pls.row);
% ----------------------------------------------------------------------- %
function pls=emb_fm(X,Y,mc,preproces)
% final model, e.g. model with optimal number of factors
[Xm Ym]=emb_prepro(preproces,X,Y);
scores=[]; loadings=[]; weights=[]; Q=[];
for a=1:mc.Aopt
cc=(Ym'*Xm*Xm'*Ym)^(-0.5);
ww=cc*Xm'*Ym;
tt=Xm*ww;
pp=Xm'*tt/(tt'*tt);
qq=Ym'*tt/(tt'*tt);
Xm=Xm-tt*pp';
Ym=Ym-tt*qq;
weights=[weights,ww];
scores=[scores,tt];
loadings=[loadings,pp];
Q=[Q; qq];
end
b=weights*inv(loadings'*weights)*Q;
if preproces==2
b=std(Y)*b./(std(X))';
b0=mean(Y)-std(Y)*mean(X)./std(X)*b;
else
b0=mean(Y)-mean(X)*b;
end
pls.weights=weights;
pls.scores=scores;
pls.loadings=loadings;
pls.b=b;
pls.b0=b0;
% ----------------------------------------------------------------------- %
function [Xprep,Yprep,Xtprep,Ytprep]=emb_prepro(c,X,Y,Xt,Yt)
% PLS Preproces
row=size(X,1);
Xprep=X-ones(row,1)*mean(X);
Yprep=Y-ones(row,1)*mean(Y);
if c==2
Xprep=Xprep./(ones(row,1)*std(X));
Yprep=Yprep./(ones(row,1)*std(Y));
end
if nargin>3 % test set
rowt=size(Xt,1);
Xtprep=Xt-ones(rowt,1)*mean(X);
Ytprep=Yt-ones(rowt,1)*mean(Y);
if c==2
Xtprep=Xtprep./(ones(rowt,1)*std(X));
Ytprep=Ytprep./(ones(rowt,1)*std(Y));
end
end
% ----------------------------------------------------------------------- %
function Aopt=emb_aopt(pls)
% Aopt calculation
switch pls.Amet
case 'min'
[i1 i2]=min(pls.RMSEPt);
Aopt=i2;
case 'fmin'
Aopt=find(diff(pls.RMSEPt)>0);
if isempty(Aopt)
Aopt=pls.A;
else
Aopt=Aopt(1);
end
end
if Aopt>=pls.columns
Aopt=pls.columns-1;
end
if Aopt==0
Aopt=1;
end
% ----------------------------------------------------------------------- %
function fse=emb_fse(X,Y,fm)
warning off;
% Fitted statistic like estimators
[m,n]=size(X);
Yp=fm.b0+X*fm.b;
Res=Y-Yp;
RMS=sqrt(Res'*Res/m);
rfit=sqrt(1-(Res'*Res)/sum((Y-mean(Y)).^2));
r2=rfit^2;
%sfit=sqrt(Res'*Res)/(m-fm.Aopt-1);
fse.r2=r2;
fse.RMS=RMS;
fse.Yp=Yp;
warning on;
% ----------------------------------------------------------------------- %
function cve=emb_cve(pls,X,Y)
% Cross-Validation estimators
cve.q2=1-(pls.PRESSt(:,pls.Aopt))/sum((Y-mean(Y)).^2);
cve.s=sqrt(pls.PRESSt(:,pls.Aopt)/(pls.row-pls.Aopt-1));
cve.q2m=1-(pls.PRESSm(:,pls.Aopt))/sum((Y-mean(Y)).^2);
cve.sm=sqrt(pls.PRESSm(:,pls.Aopt)/(pls.row-pls.Aopt-1));
cve.q2a=1-pls.PRESSt(:,pls.Aopt)/pls.PRESSm(:,pls.Aopt);
%cve.q2a=1/((1-cve.q2)/(1-cve.q2m));
% ----------------------------------------------------------------------- %
function sv=emb_sv(pls)
% Stability Vector
switch pls.svmet
case 'nsv' % normal
sv=std(pls.bb);
z=find(sv==0);
sv(z)=1;
sv=mean(pls.bb)./sv;
sv(z)=0;
case 'rsv' % robust
sv=iqr(pls.bb);
sv=median(pls.bb)./sv;
end
% ----------------------------------------------------------------------- %
function wp=emb_gncom(ww,n)
s=size(ww,2);
if ww(s)<n
ww(s)=ww(s)+1;
wp=ww;
return
else
if s==1
wp=ww;
return
else
wp=ww(1:s-1);
wp=emb_gncom(wp,n-1);
ww(s)=wp(s-1)+1;
wp=[wp ww(s)];
return
end
end
% ----------------------------------------------------------------------- %
% *********************************************************************** %
% %
% ---=== End of embedded functions ===--- %
% %
% *********************************************************************** %
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -