📄 stat_utils.c

📁 图像处理的压缩算法
💻 C
📖 第 1 页 / 共 3 页
字号:
		vY=Input vector containing data points of dependent variable
		vWT=Input vector containing weights of data points, NULL if weighting not used
		dRss=Output residual sum of squares for the regression 
		dDf=Output degrees of freedom associated with dRss
		vB=Output least-squares estimates of the parameters of the regression model
		vSE=Output standard errors of the parameter estimates given in vB
		vCOV=Output variance-covariance matrix of estimated parameters in vB
		vRES=Output weighted residuals
		vH=Output diagonal elements of H
		mQ=Output results of the QR decomposition
		bSvd=Output flag set TRUE if singular value decomposition was performed, otherwise FALSE
		iRank=Output rank of the independent variables
		vP=Output details of QR decomposition if SVD is used
		vCOMAR=Output information which is needed by nag_regsn_mult_linear_newyvar_if bSvd is TRUE 
		dTol=Input tolerance used to decide rank of independent variables, default is MR_DEFAULT_TOLERANCE 
		bThroughZero=Input flag forcing regression line through origin, default is FALSE
	Return:
		Returns the results of the NAG nag_regsn_mult_linear function.
*/
int stat_multiple_linear_regression( int nPts, int nTdx, matrix& mX, vector& vY, vector& vWT,
	double& dRss, double& dDf, vector& vB, vector& vSE, vector& vCOV, vector& vRES,
	vector& vH, matrix& mQ, BOOL& bSvd, int& iRank, vector& vP,	vector& vCOMAR,
	double dTol, BOOL bThroughZero)
{
	// Declare local variable holding error code
	int iErr;

	// *** Declare, size, prepare, and check all NAG Linear Regression arguments (as needed) ***
	// Force regression line through zero or not...
	int iP;
	Nag_IncludeMean iMean;						// NAG variable type from header file \OriginC\system\NAG\nag_types.h
	if( bThroughZero )							// If input flag says to force through zero...
	{
		iMean = Nag_MeanZero;					// Set NAG constant to force regression line through zero
		iP = nTdx;								// Set number P of independent variables in the model not including the mean (or intercept)		
	}
	else
	{
		iMean = Nag_MeanInclude;				// Set NAG constant to NOT force regression line through zero
		iP = nTdx + 1;							// Set number P of independent variables in the model including the mean
	}

	// NAG nag_regsn_mult_linear function requires nPts > 1
	if( nPts < SU_MIN_NPTS )					// If less than 2 points return error
		return ERROR_TO_FEW_PTS;

	// The number of independent variables equals the second dimension of the matrix
	int nM = nTdx;
	
	// Indicates which of the potential independent variables are to be included in the model...use all so set to 1
	vector<int> vSX;
	vSX.SetSize( nM );
	vSX = 1;
	
	// The least-squares estimates of the parameters of the regression model
	vB.SetSize( iP );
	
	// The standard errors of the iP parameter estimates given in B
	vSE.SetSize( iP );
	
	// The variance-covariance matrix of estimated parameters in B
	vCOV.SetSize( iP * ( iP + 1 ) / 2);
	
	// The (weighted) residuals
	vRES.SetSize( nPts );
	
	// The diagonal elements of H
	vH.SetSize( nPts );
	
	// The second dimension of the array Q
	int nTdq;
	nTdq = iP + 1;
	
	// The results of the QR decomposition
	mQ.SetSize( nPts, nTdq );
	
	// Details of the QR decomposition and SVD if used
	vP.SetSize( 2 * iP + iP * iP );
	
	// Information which is needed by nag_regsn_mult_linear_newyvar_if bSvd is TRUE
	vCOMAR.SetSize( 5 * ( iP - 1 ) + iP * iP );
	
	// *** Call NAG function nag_regsn_mult_linear to perform linear regression ***
	// From <NAG\OCN_g02.h>: g02dac nag_regsn_mult_linear: Perform multiple linear regression
	iErr = nag_regsn_mult_linear( iMean, nPts, mX, nTdx, nM, vSX, iP, vY, vWT, &dRss, &dDf,
		vB, vSE, vCOV, vRES, vH, mQ, nTdq, &bSvd, &iRank, vP, dTol, vCOMAR );

	return iErr;
}

/*
		Perform a linear regression on the input curve.
	Parameters:
		crvData=Input, the X and Y coordinates of the sample data
		mResultCurves=Output, the result curves where
			Col(0) is the X coordinates of the fit line
			Col(1) is the Y coordinates of the fit line
			Col(2) is the lower confidence band
			Col(3) is the upper confidence band
			Col(4) is the lower prediction band
			Col(5) is the upper prediction band
		trLR=Input settings & Output results of the linear regression
			trLR.GUI.Fit.ThroughZero.nVal				// Force fit to pass thru zero
			trLR.GUI.Fit.FixSlope.nVal					// force slope to be fixed
			trLR.GUI.Fit.FixSlopeAt.dVal				// fixed value of slope (Note: cannot fix slope and thru zero at same time, should return error from LLOC)
			trLR.GUI.Fit.ErrBarWeight.nVal				// Use error column for wt - use (1/err^2) as wt factor
			trLR.GUI.Fit.UseReducedChiSq.nVal			// Scale parameter errors with reduced chisqr

			trLR.GUI.ResultCurves.Points.nVal			// Number of points in fit curve
			trLR.GUI.ResultCurves.ConfBands.nVal		// Create confidence bands - if not set, then matrix will have empty columns
			trLR.GUI.ResultCurves.PredBands.nVal		// Create prediction bands - if not set, then matrix will have empty columns
			trLR.GUI.ResultCurves.Confidence.dVal		// Confidence value to be used 

			trLR.Calculation.Control.UseDataXRange.nVal	// Option = 1 to use data range, = 0 to use X1, X2
			trLR.Calculation.Control.X1.dVal			// Default X minimum for fit curve
			trLR.Calculation.Control.X2.dVal			// Default X maximum for fit curve

			trLR.Calculation.Parameters.P1.Name.strVal	// Parameter Name...A
			trLR.Calculation.Parameters.P1.Value.dVal	// Parameter Value
			trLR.Calculation.Parameters.P1.Error.dVal	// Parameter Error
			trLR.Calculation.Parameters.P1.Vary.nVal	// Parameter Vary
			trLR.Calculation.Parameters.P1.tValue.dVal	// Parameter t-Value
			trLR.Calculation.Parameters.P1.LCI.dVal		// Parameter LCI
			trLR.Calculation.Parameters.P1.UCI.dVal		// Parameter UCI
			trLR.Calculation.Parameters.P1.Prob.dVal	// Parameter Prob > |t|
	
			trLR.Calculation.Parameters.P2.Name.strVal	// Parameter Name...B
			trLR.Calculation.Parameters.P2.Value.dVal
			trLR.Calculation.Parameters.P2.Error.dVal
			trLR.Calculation.Parameters.P2.Vary.nVal
			trLR.Calculation.Parameters.P2.tValue.dVal
			trLR.Calculation.Parameters.P2.LCI.dVal
			trLR.Calculation.Parameters.P2.UCI.dVal
			trLR.Calculation.Parameters.P2.Prob.dVal
	
			trLR.Calculation.Stat.Rvalue.dVal			// R-value
			trLR.Calculation.Stat.RSqCOD.dVal			// RSq-COD value
			trLR.Calculation.Stat.AdjRSq.dVal			// Adjusted RSq
			trLR.Calculation.Stat.RMSESD.dVal			// RMSE-Standard Dev.
			trLR.Calculation.Stat.N.nVal				// No. of points in fit
	
			trLR.Calculation.ANOVA.Model.DOF.nVal		// Model DOF 
			trLR.Calculation.ANOVA.Model.SSq.dVal		// Model SS
			trLR.Calculation.ANOVA.Model.MeanSq.dVal	// Model Mean Sq
			trLR.Calculation.ANOVA.Error.DOF.nVal		// Error DOF
			trLR.Calculation.ANOVA.Error.SSq.dVal		// Error SS
			trLR.Calculation.ANOVA.Error.MeanSq.dVal	// Error Mean Sq
			trLR.Calculation.ANOVA.Total.DOF.dVal		// Total degrees of freedom
			trLR.Calculation.ANOVA.Total.SSq.dVal		// Total SS
			trLR.Calculation.ANOVA.Fvalue.dVal			// F-value
			trLR.Calculation.ANOVA.Pvalue.dVal			// P-value
	Return:
		Returns ERROR_NO_ERROR on successful exit and an error code on failure.
		NAG error codes:		Negative 1 * NAG error code
		ERROR_INVALID_CURVE:	Invalid input curve
		ERROR_INVALID_TREENODE:	Invalid TreeNode
		ERROR_TO_FEW_PTS:		Not enough points in the curve
		ERROR_X_RANGE:			All the elements of X are equal
		ERROR_SETTINGS:			'ThroughZero' and 'FixSlope' can not be both enabled
*/
int stat_linear_fit(const curvebase& crvData, matrix& mResultCurves, TreeNode& trLR) 
{
	int nRet = ERROR_NO_ERROR;

	// Check the input arguments
	if( !crvData.IsValid() )
		return ERROR_INVALID_CURVE;

	if( !trLR.IsValid() )
		return ERROR_INVALID_TREENODE;

	TreeNode trFit = trLR.GUI.Fit;
	if( !trFit.IsValid() )
		return ERROR_INVALID_TREENODE;

	TreeNode trResultCurves = trLR.GUI.ResultCurves;
	if( !trResultCurves.IsValid() )
		return ERROR_INVALID_TREENODE;

	if( trFit.ThroughZero.nVal && trFit.FixSlope.nVal )
		return ERROR_SETTINGS;

	double dIntercept, dSlope;	// The intercept and slope of the fitted line
	double da_Err, db_Err;		// The standard error of the intercept and slope
	double rsq, rss, dof;		// The R-square, residual sum of squares, and the degrees of freedom
	int mdof;					// The degrees of freedom of the Model

	double dSumWeight;			// The sum of weights
	double dSxx, dSxy, dSyy;
	double dxAve, dyAve;		// The average value of x and y

	vector vY, vX, vWeight, vxWeight;
	if( !crvData.CopyData(vX, vY, vWeight, vxWeight) )
		return ERROR_INVALID_CURVE;

	// Get the number of points of the curve
	int nPts = vY.GetSize();
	if( nPts < SU_MIN_NPTS ) 
		return ERROR_TO_FEW_PTS;

	if( trFit.FixSlope.nVal == 1 ) // Fix slope
	{
		trLR.Calculation.Parameters.P1.Vary.nVal = 1;
		trLR.Calculation.Parameters.P2.Vary.nVal = 0;

		if( 0 != stat_linear_regrssion_sum(vX, vY, vWeight, dSumWeight, dxAve, dyAve, dSxx, dSxy, dSyy) )
			return ERROR_X_RANGE;

		// Slope
		dSlope = trFit.FixSlopeAt.dVal;
		db_Err = 0;

		// Intercept		
		dIntercept = dyAve - dSlope * dxAve;

		// Residual sum of squares
		rss = dSyy - 2 * dSlope * dSxy + dSlope^2 * dSxx;

		// Degrees of freedom
		dof = nPts - 1;
		mdof = 0;

		// Standard error of intercept
		da_Err = sqrt(rss / dof / dSumWeight);

		// R-square
		rsq = dSxy^2 / dSxx / dSyy;
	}
	else // Slope is not fixed
	{
		mdof = 1;
		Nag_SumSquare mean; // Whether or not force the line through zero
		trLR.Calculation.Parameters.P2.Vary.nVal = 1;

		if( trFit.ThroughZero.nVal == 0 ) // Without constant 
		{
			trLR.Calculation.Parameters.P1.Vary.nVal = 1;
			mean = Nag_AboutMean;
			dof = nPts - 2;
		}
		else // With constant
		{
			trLR.Calculation.Parameters.P1.Vary.nVal = 0;
			mean = Nag_AboutZero;
			dof = nPts - 1;
		}
	
		// Linear regression by NAG function
		double df;
		nRet = nag_simple_linear_regression(mean, nPts, vX, vY, vWeight, &dIntercept,
			&dSlope, &da_Err, &db_Err, &rsq, &rss,&df);
		if( nRet != 0 )
			return -1 * nRet;

		// Get other statistics sums
		if( 0 != stat_linear_regrssion_sum(vX, vY, vWeight, dSumWeight, dxAve, dyAve, dSxx, dSxy, dSyy) )
			return ERROR_X_RANGE;

		// DOF_RELATED_DIFFERENCE
		if( trFit.ErrBarWeight.nVal )
		{
			da_Err = da_Err * sqrt(df / dof);
			db_Err = db_Err * sqrt(df / dof);
			rsq = dSxy^2 / dSxx / dSyy;
		}
	}
	
	// Set the dimension of the result matrix
	int FitPoints = trResultCurves.Points.nVal;
	mResultCurves.SetSize(FitPoints, FITLR_NUM_COLS);
	
	// Fit line
	double Xmin, Xmax, Xinc;
	if( trLR.Calculation.Control.UseDataXRange.nVal > 0 )
	{
		vX.GetMinMax(Xmin, Xmax);
	}
	else
	{
		Xmin = trLR.Calculation.Control.X1.dVal;
		Xmax = trLR.Calculation.Control.X2.dVal;
	}
	Xinc = (Xmax - Xmin) / (FitPoints - 1);
	
	vector vFitX(FitPoints), vFitY(FitPoints);
	vFitX[0] = Xmin;
	for( int ii = 1; ii < FitPoints; ii++)
	{
		vFitX[ii] = vFitX[ii - 1] + Xinc;
	}
	vFitY = vFitX * dSlope + dIntercept;
	
	mResultCurves.SetColumn(vFitX, FITLR_X);
	mResultCurves.SetColumn(vFitY, FITLR_Y);
	
	// Standard deviation
	double SD = sqrt(rss / dof);
	
	// t(1-alpha/2,dof) ------- alpha: significance level
	double tvalue = tTable(1 - (1.0 - trResultCurves.Confidence.dVal) / 2, dof);
	
	// Confidence bands
	if( trResultCurves.ConfBands.nVal )
	{
		vector vConf(FitPoints), vSerr(FitPoints);
		
		if( trFit.ThroughZero.nVal )
		{
			vSerr = fabs(vFitX) * db_Err;
		}
		else
		{
			vSerr = sqrt(1.0 / dSumWeight + (vFitX - dxAve)^2 / dSxx) * SD;
		}

		vConf = vFitY - tvalue * vSerr;
		mResultCurves.SetColumn(vConf, FITLR_UCB);
		vConf = vFitY + tvalue * vSerr;	
		mResultCurves.SetColumn(vConf, FITLR_UCB);
	}

	// Prediction bands
	if( trResultCurves.PredBands.nVal )
	{
		vector vPre(FitPoints), vSerr(FitPoints);
		vSerr = sqrt(1.0 + 1.0 / dSumWeight + (vFitX - dxAve)^2 / dSxx) * SD;
		vPre=vFitY - tvalue * vSerr;
		mResultCurves.SetColumn(vPre, FITLR_LPB);
		vPre=vFitY + tvalue * vSerr;	
		mResultCurves.SetColumn(vPre, FITLR_UPB);
	}

	// Simple statistics results
	if( trFit.ErrBarWeight.nVal && !trFit.UseReducedChiSq.nVal ) 
	{
		double dtemp = sqrt(rss / dof);
		da_Err /= dtemp;
		db_Err /= dtemp;
	}
	
	string str = LR_COL_NAMES;
	// Intercept
	trLR.Calculation.Parameters.P1.Name.strVal = str.GetToken(0, SU_DELIM);
	trLR.Calculation.Parameters.P1.Value.dVal = dIntercept;
	trLR.Calculation.Parameters.P1.Error.dVal = da_Err; 
	trLR.Calculation.Parameters.P1.tValue.dVal = dIntercept / da_Err;
	trLR.Calculation.Parameters.P1.LCI.dVal = dIntercept - da_Err * tvalue;
	trLR.Calculation.Parameters.P1.UCI.dVal = dIntercept + da_Err * tvalue;
	trLR.Calculation.Parameters.P1.Prob.dVal = 2 * (1 - invt(fabs(trLR.Calculation.Parameters.P1.tValue.dVal), dof));
	
	// Slope
	trLR.Calculation.Parameters.P2.Name.strVal = str.GetToken(1, SU_DELIM);
	trLR.Calculation.Parameters.P2.Value.dVal = dSlope;
	trLR.Calculation.Parameters.P2.Error.dVal = db_Err;
	trLR.Calculation.Parameters.P2.tValue.dVal = dSlope / db_Err;
	trLR.Calculation.Parameters.P2.LCI.dVal = dSlope - db_Err * tvalue;
	trLR.Calculation.Parameters.P2.UCI.dVal = dSlope + db_Err * tvalue;
	trLR.Calculation.Parameters.P2.Prob.dVal = 2 * (1 - invt(fabs(trLR.Calculation.Parameters.P2.tValue.dVal), dof));
	
	// Stat - Correlation coefficient
	trLR.Calculation.Stat.Rvalue.dVal = sqrt(rsq);
	trLR.Calculation.Stat.RSqCOD.dVal = rsq;
	trLR.Calculation.Stat.AdjRSq.dVal = 1 - (1 - rsq) * (dof + 1) / dof;
	trLR.Calculation.Stat.RMSESD.dVal = SD;
	trLR.Calculation.Stat.N.nVal = nPts;
	
	// ANOVA
	trLR.Calculation.ANOVA.Error.DOF.nVal = dof;
	trLR.Calculation.ANOVA.Error.SSq.dVal = rss;
	trLR.Calculation.ANOVA.Error.MeanSq.dVal = rss /dof;
	trLR.Calculation.ANOVA.Total.DOF.nVal = dof + mdof;
	trLR.Calculation.ANOVA.Total.SSq.dVal = dSyy;
	trLR.Calculation.ANOVA.Model.DOF.nVal = mdof;
	trLR.Calculation.ANOVA.Model.SSq.dVal = trLR.Calculation.ANOVA.Total.SSq.dVal - trLR.Calculation.ANOVA.Error.SSq.dVal;
	trLR.Calculation.ANOVA.Model.MeanSq.dVal = trLR.Calculation.ANOVA.Model.SSq.dVal;
	trLR.Calculation.ANOVA.Fvalue.dVal = (dSyy - rss) / rss * dof;
	trLR.Calculation.ANOVA.Pvalue.dVal = 1 - invf(trLR.Calculation.ANOVA.Fvalue.dVal, 1, dof);

	return nRet;
}

int stat_linear_regrssion_sum(const vectorbase& vX, const vectorbase& vY, const vectorbase& vW,
	double& dSumWeight, double& dxAve, double& dyAve, double& dSxx, double& dSxy, double& dSyy)
{
	vector vtemp;	

	vW.Sum(dSumWeight);

	// Average value of y
	vtemp = vY * vW;
	vtemp.Sum(dyAve);
	dyAve /= dSumWeight;

	// Syy
	vtemp *= vY;
	vtemp.Sum(dSyy);
	dSyy -= dyAve^2 * dSumWeight;

	// Average value of x
	vtemp = vX * vW;
	vtemp.Sum(dxAve);
	dxAve /= dSumWeight;

	// Sxx
	vtemp *= vX;
	vtemp.Sum(dSxx);
	dSxx -= dxAve^2 * dSumWeight;
	if( dSxx < 1e-15 )
		return -1;

	// Sxy
	vtemp = vW * vY * vX;
	vtemp.Sum(dSxy);
	dSxy -= dxAve * dSumWeight * dyAve;

	return 0;
}

/*
		Perform a multiple linear regression using the NAG function nag_regsn_mult_linear.
	Parameters:
		vDepData=Input, vector holding input dependent data
		mIndepData=Input, matrix holding input independent data
			trMR: Input settings and Output results, tree having the nodes:
			trMR.GUI.Fit.ThroughZero.nVal					// Force fit to pass thru zero
	
			trMR.Calculation.Parameters.P1.Name.strVal		// Parameter Name...Y-Intercept
			trMR.Calculation.Parameters.P1.Value.dVal		// Parameter Value
			trMR.Calculation.Parameters.P1.Error.dVal		// Parameter Error
			trMR.Calculation.Parameters.P1.tValue.dVal		// Parameter t-Value
			trMR.Calculation.Parameters.P1.Prob.dVal		// Parameter Prob > |t|
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -