📄 stat_utils.c
字号:
/*------------------------------------------------------------------------------*
* File Name: Stat_Utils.c *
* Creation: GJL 4/1/2003 *
* Purpose: Source c file containing statistics LLOC functions. *
* Copyright (c) OriginLab Corp. 2003-2007 *
* All Rights Reserved *
* *
* Modification Log: *
*------------------------------------------------------------------------------*/
////////////////////////////////////////////////////////////////////////////////////
// Included header files
//////////////////////////////////////////////////////////////////////////////////
//
// System includes
#include <Origin.h> // Most Origin related classes
//#include <OC_const.h> // OC Constants
//#include <Common.h> // Basic types
//#include <Data.h> // Vectors and matrices
//#include <Tree.h> // Tree class
//#include <Tree_Utils.h> // Tree utilities
//#include <NAG\nag_types.h> // NAG structures
//#include <NAG\OCN_g01.h> // NAG Basic Statistics functions
//#include <NAG\OCN_g02.h> // NAG nag_regsn_mult_linear function
//#include <GetNBox.h> // GetNBox
#include "Stat_Utils.h" // SU constants, non-localized strings, and function prototypes
////////////////////////////////////////////////////////////////////////////////////
// Include definitions of all localized strings. $ causes CodeBuilder to look for
// correct version of Local.h depending on Windows Regional settings.
#include "$Local.h"
////////////////////////////////////////////////////////////////////////////////////
// Function definitions
//////////////////////////////////////////////////////////////////////////////////
//
/**
Function to compute summary statistics for a vector passing results back in a tree.
*/
bool stat_descriptive(const vector& vData, TreeNode& trWhole, int nOffset, const vector* pvWeight) // nOffset = 0, pvWeight = NULL
{
if( trWhole )
{
TreeNode tr = trWhole.Settings;
TreeNode trOther = trWhole.OtherSettings;
// *** Use NAG to compute summary statistics as needed ***
if( tr.N.Enable ||
tr.Sum.Enable ||
tr.Mean.Enable ||
tr.SD.Enable ||
tr.Kurtosis.Enable ||
tr.Skewness.Enable ||
tr.SE.Enable ||
tr.Variance.Enable ||
tr.CoefVar.Enable ||
tr.CIL.Enable ||
tr.CIU.Enable ||
tr.Min.Enable ||
tr.Max.Enable ||
tr.Range.Enable )
{
SumStats ss;
if( stat_summary(ss, vData, pvWeight, trOther.ConfLevel.dVal) )
{
if( tr.N.Enable )
tr.N.dVal = (double) ss.N;
if( tr.Sum.Enable )
tr.Sum.dVal = ss.dSum;
if( tr.Mean.Enable )
tr.Mean.dVal = ss.dMean;
if( tr.SD.Enable )
tr.SD.dVal = ss.dSD;
if( tr.Kurtosis.Enable )
tr.Kurtosis.dVal = ss.dKurtosis;
if( tr.Skewness.Enable )
tr.Skewness.dVal = ss.dSkewness;
if( tr.SE.Enable )
tr.SE.dVal = ss.dSE;
if( tr.Variance.Enable )
tr.Variance.dVal = ss.dVariance;
if( tr.CoefVar.Enable )
tr.CoefVar.dVal = ss.dCoefVar;
if( tr.CIL.Enable )
tr.CIL.dVal = ss.dCIL;
if( tr.CIU.Enable )
tr.CIU.dVal = ss.dCIU;
if( tr.Min.Enable )
tr.Min.dVal = ss.dMin;
if( tr.Max.Enable )
tr.Max.dVal = ss.dMax;
if( tr.Range.Enable )
tr.Range.dVal = ss.dRange;
tr.Missing.dVal = (double) ss.nMissing;
}
else
{
if( tr.N.Enable )
tr.N.dVal = (double) vData.GetSize();
if( tr.Sum.Enable )
tr.Sum.dVal = NANUM;
if( tr.Mean.Enable )
tr.Mean.dVal = NANUM;
if( tr.SD.Enable )
tr.SD.dVal = NANUM;
if( tr.Kurtosis.Enable )
tr.Kurtosis.dVal = NANUM;
if( tr.Skewness.Enable )
tr.Skewness.dVal = NANUM;
if( tr.SE.Enable )
tr.SE.dVal = NANUM;
if( tr.Variance.Enable )
tr.Variance.dVal = NANUM;
if( tr.CoefVar.Enable )
tr.CoefVar.dVal = NANUM;
if( tr.CIL.Enable )
tr.CIL.dVal = NANUM;
if( tr.CIU.Enable )
tr.CIU.dVal = NANUM;
if( tr.Min.Enable )
tr.Min.dVal = NANUM;
if( tr.Max.Enable )
tr.Max.dVal = NANUM;
if( tr.Range.Enable )
tr.Range.dVal = NANUM;
vector vTemp;
vTemp = vData;
vTemp.Trim();
tr.Missing.dVal = (double) (vData.GetSize() - vTemp.GetSize());
}
}
// *** Use vectorbase GetMinMax to get iMin and iMax as needed ***
if( tr.iMin.Enable || tr.iMax.Enable )
{
double dMin, dMax;
uint iMin, iMax;
if( vData.GetMinMax(dMin, dMax, &iMin, &iMax) )
{
if( tr.iMin.Enable )
tr.iMin.dVal = c_index_to_labtalk_index(iMin, nOffset);
if( tr.iMax.Enable )
tr.iMax.dVal = c_index_to_labtalk_index(iMax, nOffset);
}
else
{
if( tr.iMin.Enable )
tr.iMin.dVal = NANUM;
if( tr.iMax.Enable )
tr.iMax.dVal = NANUM;
}
}
// *** Use Percentiles function to compute quartiles and percentiles as needed ***
int nPercents, ii;
vector vPercents;
ii = 0;
vPercents.SetSize(ii);
if( tr.Q25.Enable || tr.IQR.Enable )
{
vPercents.SetSize(++ii);
vPercents[ii - 1] = 25.0;
}
if( tr.Median.Enable )
{
vPercents.SetSize(++ii);
vPercents[ii - 1] = 50.0;
}
if( tr.Q75.Enable || tr.IQR.Enable )
{
vPercents.SetSize(++ii);
vPercents[ii - 1] = 75.0;
}
if( trOther.Percentile.Enable )
{
foreach( TreeNode trN in trOther.Percents.Children )
vPercents.Add( trN.dVal );
}
nPercents = vPercents.GetSize();
if( nPercents )
{
vector vPercentiles;
vPercentiles.SetSize(nPercents);
vPercentiles = NANUM;
if( stat_percentiles(vPercentiles, vData, vPercents, trOther.Interpolate.nVal) )
{
ii = 0;
if( tr.Q25.Enable )
tr.Q25.dVal = vPercentiles[ii];
if( tr.Q25.Enable || tr.IQR.Enable )
ii++;
if( tr.Median.Enable )
{
tr.Median.dVal = vPercentiles[ii];
ii++;
}
if( tr.Q75.Enable )
tr.Q75.dVal = vPercentiles[ii];
if( tr.Q75.Enable || tr.IQR.Enable )
ii++;
if( tr.IQR.Enable )
tr.IQR.dVal = tr.Q75.dVal - tr.Q25.dVal;
if( trOther.Percentile.Enable )
{
TreeNode trSubNode;
foreach( TreeNode trN in trOther.Percents.Children )
{
trSubNode = tr.GetNode(trN.tagName);
trSubNode.dVal = vPercentiles[ii++];
}
}
}
else
{
if( tr.Q25.Enable )
tr.Q25.dVal = NANUM;
if( tr.Median.Enable )
tr.Median.dVal = NANUM;
if( tr.Q75.Enable )
tr.Q75.dVal = NANUM;
if( tr.IQR.Enable )
tr.IQR.dVal = NANUM;
if( trOther.Percentile.Enable )
{
TreeNode trSubNode;
foreach( TreeNode trN in trOther.Percents.Children )
{
trSubNode = tr.GetNode(trN.tagName);
trSubNode.dVal = NANUM;
}
}
}
}
}
return true;
}
/**
Function to compute summary statistics for a vector passing results back in a SumStats structure.
*/
bool stat_summary(SumStats& ss, const vector& vData, const vector* pvWeight, double dConfLevel) // pvWeight = NULL, dConfLevel = SU_DEFAULT_CONF_LEVEL
{
bool bRet;
int ii, iRet;
uint nPts;
double dValue, dVary, dN, dDOF, dSigLevel, dHalfWidth;
NagError neErr;
nPts = vData.GetSize(); // Number of data points in vector
if( nPts < SU_MIN_NPTS ) // NAG basic stats requires minimum number of points (2)
{
ss.N = nPts;
ss.nMissing = 0;
ss.dWeightSum = NANUM;
if( nPts ) // If at least 1 point
{
dValue = vData[0];
if( NANUM == dValue )
{
ss.nMissing = 1;
dVary = NANUM;
}
else
dVary = 0;
//if( pvWeight )
//ss.dWeightSum = *pvWeight; // GJLFix #4509 Can not dereference elements of vector using pointer to vector
}
else
{
dValue = NANUM;
dVary = NANUM;
}
ss.dSum = dValue;
ss.dMean = dValue;
ss.dSD = dVary;
ss.dKurtosis = NANUM;
ss.dSkewness = NANUM;
ss.dSE = dVary;
ss.dVariance = dVary;
ss.dCoefVar = dVary;
ss.dCIL = dValue;
ss.dCIU = dValue;
ss.dMin = dValue;
ss.dMax = dValue;
ss.dRange = dVary;
bRet = true;
}
else
{
// Use NAG to compute core statistics
if( pvWeight ) // If weighting vector is specified
iRet = nag_summary_stats_1var(nPts, vData, *pvWeight, &ss.N, &ss.dMean, &ss.dSD,
&ss.dSkewness, &ss.dKurtosis, &ss.dMin, &ss.dMax, &ss.dWeightSum);
else // Else no weighting vector
iRet = nag_summary_stats_1var(nPts, vData, NULL, &ss.N, &ss.dMean, &ss.dSD,
&ss.dSkewness, &ss.dKurtosis, &ss.dMin, &ss.dMax, &ss.dWeightSum);
if( !iRet ) // If Nag function was successful...
{
dN = (double) ss.N;
ss.dSum = ss.dMean * dN; // Reverse compute sum of data values
ss.dVariance = ss.dSD * ss.dSD; // Compute Variance
if( ss.dMean )
ss.dCoefVar = ss.dSD / ss.dMean; // If mean exists and is not 0 Compute Coefficient of Variation
else
ss.dCoefVar = NANUM;
ss.dRange = ss.dMax - ss.dMin; // Compute Range
dDOF = dN - 1.0;
dSigLevel = 1.0 - dConfLevel;
ss.dSE = ss.dSD / sqrt(dN);
dHalfWidth = nag_deviates_students_t(Nag_TwoTailSignif, dSigLevel, dDOF, &neErr);
if( NE_NOERROR == neErr.code ) // Compute Upper and Lower Confidence Intervals
{
dHalfWidth *= ss.dSE;
ss.dCIL = ss.dMean - dHalfWidth;
ss.dCIU = ss.dMean + dHalfWidth;
}
else
{
ss.dCIL = NANUM;
ss.dCIU = NANUM;
}
ss.nMissing = nPts - ss.N; // Count number of missing values = total number of points - number of valid points
bRet = true;
}
else
bRet = false;
}
return bRet;
}
/**
Function to compute percentiles for a vector.
*/
bool stat_percentiles(vector& vPercentiles, const vector& vData, const vector& vPercents, bool bInterpolate) // bInterpolate = false
{
bool bRet;
uint ii, nPercents, nPts, iIndex;
vector vTemp;
double dNpts, dIndex, dFractionalPart;
nPercents = vPercents.GetSize();
vPercentiles.SetSize(nPercents); // Size Percentiles vector
vPercentiles = NANUM; // Initialize Percentiles to NANUM
if( nPercents ) // If there is at least one Percent then continue
{
vTemp = vData; // Copy input data
vTemp.Trim(); // Remove missing values from vector
nPts = vTemp.GetSize(); // Get number of non-missing values
dNpts = (double) nPts; // Cast to double for computation
if( nPts > 0 ) // If at least one data point
{
vTemp.Sort(); // Sort temporary vector in ascending order
for( ii = 0; ii < nPercents; ii++ )
{
if( vPercents[ii] < 0. || vPercents[ii] > 100. )
vPercentiles[ii] = NANUM; // If Percent is less than 0 or greater than 100 Percentile is NANUM
else
{
// *** This algorithm for computing Percentiles is based on the ***
// *** LabTalk Percentile function in MoStatVc.cpp ***
dIndex = dNpts * vPercents[ii] / 100.0; // Absolute index
iIndex = (uint) dIndex; // Integral part of index
dFractionalPart = dIndex - (double) iIndex; // Fractional part of index
if( iIndex == 0 ) // If iIndex is 0 define Percentile = vTemp[0]
vPercentiles[ii] = vTemp[0];
else if( iIndex == nPts ) // Else if iIndex is nPts define Percentile = vTemp[nPts - 1]
vPercentiles[ii] = vTemp[nPts - 1];
else // Else will need to compute Percentile
{
if( bInterpolate ) // If interpolation is specified
{
if( dFractionalPart == 0 ) // If fractional part is 0 (exact hit)
{
if( vPercents[ii] == 100 )
vPercentiles[ii] = vTemp[iIndex - 1]; // If Percent is 100 then no interpolation
else
vPercentiles[ii] = ( vTemp[iIndex - 1] + vTemp[iIndex] ) / 2.0; // Else interpolate
}
else // Else take next larger item
vPercentiles[ii] = vTemp[iIndex];
}
else
{
if( dFractionalPart == 0 ) // If fractional part is 0 (exact hit)
vPercentiles[ii] = vTemp[iIndex - 1];
else // Else take next larger item
vPercentiles[ii] = vTemp[iIndex];
}
}
}
}
bRet = true; // Successful computation of Percentiles
}
else // Else no data so return error
bRet = false;
}
else // Else no Percents so return error
bRet = false;
return bRet; // Return true on success and false on failure
}
/**
Function to perform multiple linear regression using the NAG function nag_regsn_mult_linear.
Parameters:
nPts=Input number of rows (or data points) in the input matrix
nTdx=Input number of columns (or independent variables) in the input matrix
mX=Input matrix containing data points of the independent variables
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -