📄 stat_utils.c
字号:
vY=Input vector containing data points of dependent variable
vWT=Input vector containing weights of data points, NULL if weighting not used
dRss=Output residual sum of squares for the regression
dDf=Output degrees of freedom associated with dRss
vB=Output least-squares estimates of the parameters of the regression model
vSE=Output standard errors of the parameter estimates given in vB
vCOV=Output variance-covariance matrix of estimated parameters in vB
vRES=Output weighted residuals
vH=Output diagonal elements of H
mQ=Output results of the QR decomposition
bSvd=Output flag set TRUE if singular value decomposition was performed, otherwise FALSE
iRank=Output rank of the independent variables
vP=Output details of QR decomposition if SVD is used
vCOMAR=Output information which is needed by nag_regsn_mult_linear_newyvar_if bSvd is TRUE
dTol=Input tolerance used to decide rank of independent variables, default is MR_DEFAULT_TOLERANCE
bThroughZero=Input flag forcing regression line through origin, default is FALSE
Return:
Returns the results of the NAG nag_regsn_mult_linear function.
*/
int stat_multiple_linear_regression( int nPts, int nTdx, matrix& mX, vector& vY, vector& vWT,
double& dRss, double& dDf, vector& vB, vector& vSE, vector& vCOV, vector& vRES,
vector& vH, matrix& mQ, BOOL& bSvd, int& iRank, vector& vP, vector& vCOMAR,
double dTol, BOOL bThroughZero)
{
// Declare local variable holding error code
int iErr;
// *** Declare, size, prepare, and check all NAG Linear Regression arguments (as needed) ***
// Force regression line through zero or not...
int iP;
Nag_IncludeMean iMean; // NAG variable type from header file \OriginC\system\NAG\nag_types.h
if( bThroughZero ) // If input flag says to force through zero...
{
iMean = Nag_MeanZero; // Set NAG constant to force regression line through zero
iP = nTdx; // Set number P of independent variables in the model not including the mean (or intercept)
}
else
{
iMean = Nag_MeanInclude; // Set NAG constant to NOT force regression line through zero
iP = nTdx + 1; // Set number P of independent variables in the model including the mean
}
// NAG nag_regsn_mult_linear function requires nPts > 1
if( nPts < SU_MIN_NPTS ) // If less than 2 points return error
return ERROR_TO_FEW_PTS;
// The number of independent variables equals the second dimension of the matrix
int nM = nTdx;
// Indicates which of the potential independent variables are to be included in the model...use all so set to 1
vector<int> vSX;
vSX.SetSize( nM );
vSX = 1;
// The least-squares estimates of the parameters of the regression model
vB.SetSize( iP );
// The standard errors of the iP parameter estimates given in B
vSE.SetSize( iP );
// The variance-covariance matrix of estimated parameters in B
vCOV.SetSize( iP * ( iP + 1 ) / 2);
// The (weighted) residuals
vRES.SetSize( nPts );
// The diagonal elements of H
vH.SetSize( nPts );
// The second dimension of the array Q
int nTdq;
nTdq = iP + 1;
// The results of the QR decomposition
mQ.SetSize( nPts, nTdq );
// Details of the QR decomposition and SVD if used
vP.SetSize( 2 * iP + iP * iP );
// Information which is needed by nag_regsn_mult_linear_newyvar_if bSvd is TRUE
vCOMAR.SetSize( 5 * ( iP - 1 ) + iP * iP );
// *** Call NAG function nag_regsn_mult_linear to perform linear regression ***
// From <NAG\OCN_g02.h>: g02dac nag_regsn_mult_linear: Perform multiple linear regression
iErr = nag_regsn_mult_linear( iMean, nPts, mX, nTdx, nM, vSX, iP, vY, vWT, &dRss, &dDf,
vB, vSE, vCOV, vRES, vH, mQ, nTdq, &bSvd, &iRank, vP, dTol, vCOMAR );
return iErr;
}
/*
Perform a linear regression on the input curve.
Parameters:
crvData=Input, the X and Y coordinates of the sample data
mResultCurves=Output, the result curves where
Col(0) is the X coordinates of the fit line
Col(1) is the Y coordinates of the fit line
Col(2) is the lower confidence band
Col(3) is the upper confidence band
Col(4) is the lower prediction band
Col(5) is the upper prediction band
trLR=Input settings & Output results of the linear regression
trLR.GUI.Fit.ThroughZero.nVal // Force fit to pass thru zero
trLR.GUI.Fit.FixSlope.nVal // force slope to be fixed
trLR.GUI.Fit.FixSlopeAt.dVal // fixed value of slope (Note: cannot fix slope and thru zero at same time, should return error from LLOC)
trLR.GUI.Fit.ErrBarWeight.nVal // Use error column for wt - use (1/err^2) as wt factor
trLR.GUI.Fit.UseReducedChiSq.nVal // Scale parameter errors with reduced chisqr
trLR.GUI.ResultCurves.Points.nVal // Number of points in fit curve
trLR.GUI.ResultCurves.ConfBands.nVal // Create confidence bands - if not set, then matrix will have empty columns
trLR.GUI.ResultCurves.PredBands.nVal // Create prediction bands - if not set, then matrix will have empty columns
trLR.GUI.ResultCurves.Confidence.dVal // Confidence value to be used
trLR.Calculation.Control.UseDataXRange.nVal // Option = 1 to use data range, = 0 to use X1, X2
trLR.Calculation.Control.X1.dVal // Default X minimum for fit curve
trLR.Calculation.Control.X2.dVal // Default X maximum for fit curve
trLR.Calculation.Parameters.P1.Name.strVal // Parameter Name...A
trLR.Calculation.Parameters.P1.Value.dVal // Parameter Value
trLR.Calculation.Parameters.P1.Error.dVal // Parameter Error
trLR.Calculation.Parameters.P1.Vary.nVal // Parameter Vary
trLR.Calculation.Parameters.P1.tValue.dVal // Parameter t-Value
trLR.Calculation.Parameters.P1.LCI.dVal // Parameter LCI
trLR.Calculation.Parameters.P1.UCI.dVal // Parameter UCI
trLR.Calculation.Parameters.P1.Prob.dVal // Parameter Prob > |t|
trLR.Calculation.Parameters.P2.Name.strVal // Parameter Name...B
trLR.Calculation.Parameters.P2.Value.dVal
trLR.Calculation.Parameters.P2.Error.dVal
trLR.Calculation.Parameters.P2.Vary.nVal
trLR.Calculation.Parameters.P2.tValue.dVal
trLR.Calculation.Parameters.P2.LCI.dVal
trLR.Calculation.Parameters.P2.UCI.dVal
trLR.Calculation.Parameters.P2.Prob.dVal
trLR.Calculation.Stat.Rvalue.dVal // R-value
trLR.Calculation.Stat.RSqCOD.dVal // RSq-COD value
trLR.Calculation.Stat.AdjRSq.dVal // Adjusted RSq
trLR.Calculation.Stat.RMSESD.dVal // RMSE-Standard Dev.
trLR.Calculation.Stat.N.nVal // No. of points in fit
trLR.Calculation.ANOVA.Model.DOF.nVal // Model DOF
trLR.Calculation.ANOVA.Model.SSq.dVal // Model SS
trLR.Calculation.ANOVA.Model.MeanSq.dVal // Model Mean Sq
trLR.Calculation.ANOVA.Error.DOF.nVal // Error DOF
trLR.Calculation.ANOVA.Error.SSq.dVal // Error SS
trLR.Calculation.ANOVA.Error.MeanSq.dVal // Error Mean Sq
trLR.Calculation.ANOVA.Total.DOF.dVal // Total degrees of freedom
trLR.Calculation.ANOVA.Total.SSq.dVal // Total SS
trLR.Calculation.ANOVA.Fvalue.dVal // F-value
trLR.Calculation.ANOVA.Pvalue.dVal // P-value
Return:
Returns ERROR_NO_ERROR on successful exit and an error code on failure.
NAG error codes: Negative 1 * NAG error code
ERROR_INVALID_CURVE: Invalid input curve
ERROR_INVALID_TREENODE: Invalid TreeNode
ERROR_TO_FEW_PTS: Not enough points in the curve
ERROR_X_RANGE: All the elements of X are equal
ERROR_SETTINGS: 'ThroughZero' and 'FixSlope' can not be both enabled
*/
int stat_linear_fit(const curvebase& crvData, matrix& mResultCurves, TreeNode& trLR)
{
int nRet = ERROR_NO_ERROR;
// Check the input arguments
if( !crvData.IsValid() )
return ERROR_INVALID_CURVE;
if( !trLR.IsValid() )
return ERROR_INVALID_TREENODE;
TreeNode trFit = trLR.GUI.Fit;
if( !trFit.IsValid() )
return ERROR_INVALID_TREENODE;
TreeNode trResultCurves = trLR.GUI.ResultCurves;
if( !trResultCurves.IsValid() )
return ERROR_INVALID_TREENODE;
if( trFit.ThroughZero.nVal && trFit.FixSlope.nVal )
return ERROR_SETTINGS;
double dIntercept, dSlope; // The intercept and slope of the fitted line
double da_Err, db_Err; // The standard error of the intercept and slope
double rsq, rss, dof; // The R-square, residual sum of squares, and the degrees of freedom
int mdof; // The degrees of freedom of the Model
double dSumWeight; // The sum of weights
double dSxx, dSxy, dSyy;
double dxAve, dyAve; // The average value of x and y
vector vY, vX, vWeight, vxWeight;
if( !crvData.CopyData(vX, vY, vWeight, vxWeight) )
return ERROR_INVALID_CURVE;
// Get the number of points of the curve
int nPts = vY.GetSize();
if( nPts < SU_MIN_NPTS )
return ERROR_TO_FEW_PTS;
if( trFit.FixSlope.nVal == 1 ) // Fix slope
{
trLR.Calculation.Parameters.P1.Vary.nVal = 1;
trLR.Calculation.Parameters.P2.Vary.nVal = 0;
if( 0 != stat_linear_regrssion_sum(vX, vY, vWeight, dSumWeight, dxAve, dyAve, dSxx, dSxy, dSyy) )
return ERROR_X_RANGE;
// Slope
dSlope = trFit.FixSlopeAt.dVal;
db_Err = 0;
// Intercept
dIntercept = dyAve - dSlope * dxAve;
// Residual sum of squares
rss = dSyy - 2 * dSlope * dSxy + dSlope^2 * dSxx;
// Degrees of freedom
dof = nPts - 1;
mdof = 0;
// Standard error of intercept
da_Err = sqrt(rss / dof / dSumWeight);
// R-square
rsq = dSxy^2 / dSxx / dSyy;
}
else // Slope is not fixed
{
mdof = 1;
Nag_SumSquare mean; // Whether or not force the line through zero
trLR.Calculation.Parameters.P2.Vary.nVal = 1;
if( trFit.ThroughZero.nVal == 0 ) // Without constant
{
trLR.Calculation.Parameters.P1.Vary.nVal = 1;
mean = Nag_AboutMean;
dof = nPts - 2;
}
else // With constant
{
trLR.Calculation.Parameters.P1.Vary.nVal = 0;
mean = Nag_AboutZero;
dof = nPts - 1;
}
// Linear regression by NAG function
double df;
nRet = nag_simple_linear_regression(mean, nPts, vX, vY, vWeight, &dIntercept,
&dSlope, &da_Err, &db_Err, &rsq, &rss,&df);
if( nRet != 0 )
return -1 * nRet;
// Get other statistics sums
if( 0 != stat_linear_regrssion_sum(vX, vY, vWeight, dSumWeight, dxAve, dyAve, dSxx, dSxy, dSyy) )
return ERROR_X_RANGE;
// DOF_RELATED_DIFFERENCE
if( trFit.ErrBarWeight.nVal )
{
da_Err = da_Err * sqrt(df / dof);
db_Err = db_Err * sqrt(df / dof);
rsq = dSxy^2 / dSxx / dSyy;
}
}
// Set the dimension of the result matrix
int FitPoints = trResultCurves.Points.nVal;
mResultCurves.SetSize(FitPoints, FITLR_NUM_COLS);
// Fit line
double Xmin, Xmax, Xinc;
if( trLR.Calculation.Control.UseDataXRange.nVal > 0 )
{
vX.GetMinMax(Xmin, Xmax);
}
else
{
Xmin = trLR.Calculation.Control.X1.dVal;
Xmax = trLR.Calculation.Control.X2.dVal;
}
Xinc = (Xmax - Xmin) / (FitPoints - 1);
vector vFitX(FitPoints), vFitY(FitPoints);
vFitX[0] = Xmin;
for( int ii = 1; ii < FitPoints; ii++)
{
vFitX[ii] = vFitX[ii - 1] + Xinc;
}
vFitY = vFitX * dSlope + dIntercept;
mResultCurves.SetColumn(vFitX, FITLR_X);
mResultCurves.SetColumn(vFitY, FITLR_Y);
// Standard deviation
double SD = sqrt(rss / dof);
// t(1-alpha/2,dof) ------- alpha: significance level
double tvalue = tTable(1 - (1.0 - trResultCurves.Confidence.dVal) / 2, dof);
// Confidence bands
if( trResultCurves.ConfBands.nVal )
{
vector vConf(FitPoints), vSerr(FitPoints);
if( trFit.ThroughZero.nVal )
{
vSerr = fabs(vFitX) * db_Err;
}
else
{
vSerr = sqrt(1.0 / dSumWeight + (vFitX - dxAve)^2 / dSxx) * SD;
}
vConf = vFitY - tvalue * vSerr;
mResultCurves.SetColumn(vConf, FITLR_UCB);
vConf = vFitY + tvalue * vSerr;
mResultCurves.SetColumn(vConf, FITLR_UCB);
}
// Prediction bands
if( trResultCurves.PredBands.nVal )
{
vector vPre(FitPoints), vSerr(FitPoints);
vSerr = sqrt(1.0 + 1.0 / dSumWeight + (vFitX - dxAve)^2 / dSxx) * SD;
vPre=vFitY - tvalue * vSerr;
mResultCurves.SetColumn(vPre, FITLR_LPB);
vPre=vFitY + tvalue * vSerr;
mResultCurves.SetColumn(vPre, FITLR_UPB);
}
// Simple statistics results
if( trFit.ErrBarWeight.nVal && !trFit.UseReducedChiSq.nVal )
{
double dtemp = sqrt(rss / dof);
da_Err /= dtemp;
db_Err /= dtemp;
}
string str = LR_COL_NAMES;
// Intercept
trLR.Calculation.Parameters.P1.Name.strVal = str.GetToken(0, SU_DELIM);
trLR.Calculation.Parameters.P1.Value.dVal = dIntercept;
trLR.Calculation.Parameters.P1.Error.dVal = da_Err;
trLR.Calculation.Parameters.P1.tValue.dVal = dIntercept / da_Err;
trLR.Calculation.Parameters.P1.LCI.dVal = dIntercept - da_Err * tvalue;
trLR.Calculation.Parameters.P1.UCI.dVal = dIntercept + da_Err * tvalue;
trLR.Calculation.Parameters.P1.Prob.dVal = 2 * (1 - invt(fabs(trLR.Calculation.Parameters.P1.tValue.dVal), dof));
// Slope
trLR.Calculation.Parameters.P2.Name.strVal = str.GetToken(1, SU_DELIM);
trLR.Calculation.Parameters.P2.Value.dVal = dSlope;
trLR.Calculation.Parameters.P2.Error.dVal = db_Err;
trLR.Calculation.Parameters.P2.tValue.dVal = dSlope / db_Err;
trLR.Calculation.Parameters.P2.LCI.dVal = dSlope - db_Err * tvalue;
trLR.Calculation.Parameters.P2.UCI.dVal = dSlope + db_Err * tvalue;
trLR.Calculation.Parameters.P2.Prob.dVal = 2 * (1 - invt(fabs(trLR.Calculation.Parameters.P2.tValue.dVal), dof));
// Stat - Correlation coefficient
trLR.Calculation.Stat.Rvalue.dVal = sqrt(rsq);
trLR.Calculation.Stat.RSqCOD.dVal = rsq;
trLR.Calculation.Stat.AdjRSq.dVal = 1 - (1 - rsq) * (dof + 1) / dof;
trLR.Calculation.Stat.RMSESD.dVal = SD;
trLR.Calculation.Stat.N.nVal = nPts;
// ANOVA
trLR.Calculation.ANOVA.Error.DOF.nVal = dof;
trLR.Calculation.ANOVA.Error.SSq.dVal = rss;
trLR.Calculation.ANOVA.Error.MeanSq.dVal = rss /dof;
trLR.Calculation.ANOVA.Total.DOF.nVal = dof + mdof;
trLR.Calculation.ANOVA.Total.SSq.dVal = dSyy;
trLR.Calculation.ANOVA.Model.DOF.nVal = mdof;
trLR.Calculation.ANOVA.Model.SSq.dVal = trLR.Calculation.ANOVA.Total.SSq.dVal - trLR.Calculation.ANOVA.Error.SSq.dVal;
trLR.Calculation.ANOVA.Model.MeanSq.dVal = trLR.Calculation.ANOVA.Model.SSq.dVal;
trLR.Calculation.ANOVA.Fvalue.dVal = (dSyy - rss) / rss * dof;
trLR.Calculation.ANOVA.Pvalue.dVal = 1 - invf(trLR.Calculation.ANOVA.Fvalue.dVal, 1, dof);
return nRet;
}
int stat_linear_regrssion_sum(const vectorbase& vX, const vectorbase& vY, const vectorbase& vW,
double& dSumWeight, double& dxAve, double& dyAve, double& dSxx, double& dSxy, double& dSyy)
{
vector vtemp;
vW.Sum(dSumWeight);
// Average value of y
vtemp = vY * vW;
vtemp.Sum(dyAve);
dyAve /= dSumWeight;
// Syy
vtemp *= vY;
vtemp.Sum(dSyy);
dSyy -= dyAve^2 * dSumWeight;
// Average value of x
vtemp = vX * vW;
vtemp.Sum(dxAve);
dxAve /= dSumWeight;
// Sxx
vtemp *= vX;
vtemp.Sum(dSxx);
dSxx -= dxAve^2 * dSumWeight;
if( dSxx < 1e-15 )
return -1;
// Sxy
vtemp = vW * vY * vX;
vtemp.Sum(dSxy);
dSxy -= dxAve * dSumWeight * dyAve;
return 0;
}
/*
Perform a multiple linear regression using the NAG function nag_regsn_mult_linear.
Parameters:
vDepData=Input, vector holding input dependent data
mIndepData=Input, matrix holding input independent data
trMR: Input settings and Output results, tree having the nodes:
trMR.GUI.Fit.ThroughZero.nVal // Force fit to pass thru zero
trMR.Calculation.Parameters.P1.Name.strVal // Parameter Name...Y-Intercept
trMR.Calculation.Parameters.P1.Value.dVal // Parameter Value
trMR.Calculation.Parameters.P1.Error.dVal // Parameter Error
trMR.Calculation.Parameters.P1.tValue.dVal // Parameter t-Value
trMR.Calculation.Parameters.P1.Prob.dVal // Parameter Prob > |t|
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -