📄 unpls.pas
字号:
unit unPLS;
{
writen by A.Ritter 2005
origins:
1) Megan Fedders & Jay William Ponder (1996) FORTRAN program QSAR
<<ftp://dasher.wustl.edu/pub/qsar>>
2) Multivariate data analysis K.H.Esbensen
}
interface
uses SysUtils, Math, Dialogs, Controls;
const
MaxRow = 300;
MaxCol = 1000;
MaxRank = 20; //max rank of matrix or max of calculated PC's
type //define some new ariable types;
TSMatr = array of array of Single; //single float matrix
TSVect = array of Single; //single float vector
TDMatr = array of array of Single; //double float matrix
TDVect = array of Single; //double float vector
var //global
// X_PLSAsSpectra : Boolean = False; //X data for PLS are spectra
// Nrow : Word; //row number in calibration data set
// Ncol : Word; //column number in X matrix
// Nexv : Word; //row number in external validation data set
// Nrank : Word; //rank
// FD_on : Boolean = False; //freedom dergee by calulations
//raw X, Y values
Yraw : TSVect; //Y: 1..Nrow
Xraw : TSMatr; //X: 1..NRow, 1..NCol
//raw X, Y values for external validation
// Yexv : TSVect; //Y: 1..Nrow
// Xexv : TSMatr; //X: 1..NRow, 1..NCol
//max,min X-value for calibration data set in i-its channel (if extrapolation?)
Xmax, Xmin : TDVect;
//regression and LOO validation values buffer
YY_fit, YY_prd : TSMatr; //[1..MaxRow,1..MaxRank] of single;
SmplLbl : array of string; //Sample labels: 1..NRow
// SmplNo : array of Integer; //Sample ID number 29.06.2005
// Use : array of Boolean; //use X col; 1..NCol
//scaling
Ybar,Ystd : Double; //Y-scaling factors
Xbar,Xstd : TDVect; //X-scaling factors: 1..NCol (mean and std over all channels)
//coorelations
XCor : TSMatr; //X-Correlations: 1..NCol,1..NCol
YXCor : TSVect; //Y-Correction : 1..NCol
b : TDMatr; // PLS Regression Coefficients: 1..NCol,1..NRank
ResValVar : array[1..MaxRank] of Double; //residual validation variance
ResCalVar : array[1..MaxRank] of Double; //residual calibration variance
tt : TDMatr; //array[1..MaxRow,1..MaxRank] of Double; //buffer for scores
Progress : Word; //calculations progress ...
Abort : Boolean = False; //abort calculations by user
//------------------------------------------------------------------------------
//sets dimension of dynamic arrays
procedure SetDim(PC_count : Word; //calculated PCs
NRow,NCol : Word //matrix dimension
);
//------------------------------------------------------------------------------
//basic statistical measures
//------------------------------------------------------------------------------
function Mean(N : Word; X : TSVect) : Double; //calculate mean average value;
function Variance(N : Word; X : TSVect; FD : Boolean) : Double; // calculate the variance of a vector; fangcha;
function StdDev(N : Word; X : TSVect; FD : Boolean) : Double; //calculate the standard deciation of vector; biao zhun cha;
function Covariance(N : Word; X,Y : TSVect; FD : Boolean) : Double; //calculate the covariance of two veftors; xie fangcha;
function Correlation(N : Word; X,Y : TSVect; FD : Boolean) : Double; //calculate the correlations between two vectors;
//------------------------------------------------------------------------------
//Residual Variance
//------------------------------------------------------------------------------
function ResVar(N : Word; X,Y : TSVect) : Double;
function RMSE(N : Word; X,Y : TSVect) : Double;
//------------------------------------------------------------------------------
//transform (scale) the response values:
procedure YScale(const Scale : Byte; //data scale - raw, main, auto
const Nrow : Word; //count of calibration
var Y : TSVect; //given values Y, scaled back
var Ybar : Double; //mean
var Ystd : Double //std.dev
); //scale type
//------------------------------------------------------------------------------
//transform (scale) the property values:
procedure XScale(const Scale : Byte; //data scale - raw, main, auto
const Nrow : Word; //count of calibration samples
const Ncol : Word; //count of spectra channels
var X : TSMatr; //X values, to scale
var Xbar,Xstd : TDVect //mean and std.dev scale values
);
//------------------------------------------------------------------------------
//nonlinear iterative PLS analysis:
procedure Nipals(const Nrow : Word; //count of calibration samples
const Ncol : Word; //count of spectra channels
const Y : TSVect; //Y
const X : TSMatr; //X
var Nrank : Word; //rank of matrix
var b : TDMatr;//pls regression coefficients for prediction
verbose : Boolean
);
//------------------------------------------------------------------------------
//PLS Model validation with an external data set
//last change 12.07.2005
procedure ExtPlsModel( const Rank : Word; //prediction rank
const NCol : Word; //data column number
const Nexv : Word //count of predicted values
);
//------------------------------------------------------------------------------
//calculate PLS-R fit values and residual calibration variance
procedure CalcPLSFitVAlues(const Nrank : Word;
const Nrow : Word; //count of calibration samples
const Ncol : Word //count of spectra channels, column
);
//------------------------------------------------------------------------------
// get predictions from PLS model:
procedure LOOPlSModel( const Scale : Byte; //data scale
const Nrow : Word; //count of calibration samples
const Ncol : Word; //count of spectra channels, column
var Nrank : Word
);
//------------------------------------------------------------------------------
//procedure predicted PLS-R values Yp for Nrow samples
//04.06.2005
function PredictY( const Nrow : Word; //count of predicted samples
const Ncol : Word; //count of column in X variables matrix
const Nrnk : Word; //prdivtion rank or PC
const Ybar, Ystd : Double; //data centering vor Y
const Xbar,Xstd : TDVect; //data centering vor X
const Xraw : TSMatr; //measurements for predicted set, row
const b : TDMatr //coeff. for Prediction
) : Double; //predicted values
//------------------------------------------------------------------------------
procedure Invert(const n : Word; var a : TDMatr);
procedure ResetData;
procedure VerifyRank(const NRow,NCol : Word; var Rank : Word);
function GetMaxRank(const NRow,NCol : Word) : SmallInt;
procedure GetPropRespCorr;
//PLS-Regression
//last changed 12.07.05
procedure PLSR(const NCol,NRow : Word; //data matrix dimension
var PC_count : Word; //to calculated PCs, number can be redused...
const ValidModel : Byte; //validation model
const DataScale : Byte //data scale: raw, main centered, autoscaled
);
procedure FindXRange(const NRow,NCol : Word;
const X : TSMatr);
//------------------------------------------------------------------------------
implementation
//------------------------------------------------------------------------------
//sets dimension of dynamic arrays
procedure SetDim(PC_count : Word; //calculated PCs
NRow,NCol : Word //matrix dimension
);
begin
SetLength(Yraw, NRow + 1); //y
SetLength(Xraw, NRow + 1,NCol + 1);//x
SetLength(Xmax,NCol+1);//max X-value for calibration data set in i-its channel
SetLength(Xmin,NCol+1);//min X-value for calibration data set in i-its channel
SetLength(YXCor, NRow + 1);
SetLength(XCor, NRow + 1,NCol + 1);
SetLength(SmplLbl, NRow + 1);//Labels
// SetLength(SmplNo, NRow + 1);//No
// SetLength(Use, NCol+1); //use X column
SetLength(Xbar, NCol+1); //X-scaling. Mean
SetLength(Xstd, NCol+1); //X-scaling. std
SetLength(tt,NRow+1,PC_count +1);
SetLength(b, NCol + 1, PC_count + 1);
SetLength(YY_fit,NRow+1,PC_count+1);
SetLength(YY_prd,NRow+1,PC_count+1);
end;
//------------------------------------------------------------------------------
//29.06.2005
//gets max possible rank
//changed 12.07.2005
function GetMaxRank(const NRow,NCol : Word) : SmallInt;
begin
Result := Min(NCol, NRow-1);
end;
//------------------------------------------------------------------------------
//verifies if the Rank is correct
procedure VerifyRank(const NRow,NCol : Word; var Rank : Word);
var MinRnk : Word;
begin
MinRnk := Min(NCol, NRow-1); //rank berechnen
MinRnk := Min(MinRnk,MaxRank); //mit max wert vergleichen
Rank := Min(Rank,MinRnk); //min Eingabewert vergleichen;
end;
//------------------------------------------------------------------------------
// initialize the number of data sets, test sets and properties
procedure ResetData;
var I,J : Word;
begin
for I := Low(Yraw) to High(Yraw) do
begin
SmplLbl[I] := '';
// SmplNo[I] := 0;
Yraw[I] := 0.0;
end;
{ for I := Low(Use) to High(Use) do
begin
Use[I] := True;
for J := Low(Yraw) to High(Yraw) do
Xraw[J,I] := 0.0;
end;}
end;
//------------------------------------------------------------------------------
//PLS-regression
//started 05.2005
//last changed 12.07.05
procedure PLSR(const NCol,NRow : Word; //data matrix dimension
var PC_count : Word; //to calculated PCs, number can be redused...
const ValidModel : Byte; //validation model
const DataScale : Byte //data scale: raw, main centered, autoscaled
);
var
I,J : Word;
//scaled, centered X and Y
Yscl : TSVect;
Xscl : TSMatr;
begin
SetDim(PC_Count,NRow,NCol); //fr黨er
SetLength(Yscl, Nrow + 1);//Y scaled
SetLength(Xscl, NRow + 1,NCol + 1);//X scaled
// autoscale the response vector values
for I := 1 to Nrow do
Yscl[I] := Yraw[I];
Yscale(DataScale,Nrow,Yscl,Ybar,Ystd); //back Yscl, Ybar,Ystd
// autoscale the property matrix values
for I := 1 to Ncol do
for J := 1 to Nrow do
Xscl[J,I] := Xraw[J,I];
Xscale(DataScale,NRow,NCol,Xscl,Xbar,Xstd);
// transform the properties via partial least squares
Nipals (Nrow,Ncol,Yscl,Xscl,PC_count,b,true);
//simpls (nrow,ncol,y,xref,Nrank,b,verbose);
{
case ValidModel of
0 : begin
end;
1 : begin //loocv validation model
LOOPLSmodel (DataScale,Nrow,Ncol,Yscl,Ybar,Ystd,Xscl,Xbar,Xstd,PC_count,b);
end;
2 : begin //external validation model
FindXRange(NRow,NCol,Xraw); //
end;
end;
}
end; //PLSR
//------------------------------------------------------------------------------
//transform the response values
procedure YScale(const Scale : Byte; //data scale - raw, main, auto
const Nrow : Word; //count of calibration
var Y : TSVect; //given values Y, scaled back
var Ybar : Double; //mean
var Ystd : Double //std.dev
); //scale type
var
I : Word;
begin
//transform the responses to mean-centered values
Ybar := 0.0;
if (Scale <> 0) and (NRow <> 0) then //not raw
begin
for I := 1 to Nrow do
Ybar := Ybar + Y[I]; //Summe
Ybar := Ybar / Nrow; //mean
for I := 1 to NRow do ////Test soll alles separat sein !!!
Y[I] := Y[I] - Ybar; //centering
end;// if
//transform the responses to autoscaled values
Ystd := 1.0;
if (Scale = 2) and (NRow > 0) then //auto
begin
Ystd := 0.0;
for I := 1 to NRow do
Ystd := Ystd + Y[I] * Y[I];
Ystd := Sqrt(Ystd / (NRow-1));
if (Ystd = 0.0) then Ystd := 1.0;
for I := 1 to Nrow do
Y[I] := Y[I] / Ystd;
end;// if
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -