📄 stat.hpp
字号:
vector<int> &counts, T &min, T &binsize, int &extrapoints,
int numbins=10)
{ histogram(flist, counts, min, binsize, extrapoints, numbins);
for(int i=1; i<numbins; counts[i]+=counts[i-1], i++);
}
template<class T>
void cumfreq (const vector<T> &flist,
vector<int> &counts, T &rmin, T &binsize, int &extrapoints,
const T &min, const T &max, int numbins=10)
{ histogram(flist, counts, rmin, binsize, extrapoints, min, max, numbins);
for(int i=1; i<numbins; counts[i]+=counts[i-1], i++);
}
template<class T>
void relfreq (const vector<T> &flist,
vector<double> &counts, T &min, T &binsize, int &extrapoints,
int numbins=10)
{ DEFINE_TYPENAME
vector<int> hcounts;
histogram(flist, hcounts, min, binsize, extrapoints, numbins);
counts.clear();
double ls=flist.size();
ITERATE(vector<int>, ii, hcounts)
counts.push_back(*ii/ls);
}
template<class T>
void relfreq (const vector<T> &flist,
vector<double> &counts, T &rmin, T &binsize, int &extrapoints,
const T &min, const T &max, int numbins=10)
{ DEFINE_TYPENAME
vector<int> hcounts;
histogram(flist, hcounts, rmin, binsize, extrapoints, min, max, numbins);
counts.clear();
double ls=flist.size();
ITERATE(vector<int>, ii, hcounts)
counts.push_back(*ii/ls);
}
/* *********** VARIABILITY ************/
template<class T>
T samplevar(const vector<T> &flist)
{ DEFINE_TYPENAME
if (!flist.size())
throw StatException("samplevar: empty list");
T me=mean(flist);
T sum=0.0;
const_SITERATE(fi, flist)
sum+=sqr(*fi-me);
return sum/flist.size();
}
template<class T>
T samplestdev(const vector<T> &flist)
{ return sqrt(samplevar(flist)); }
template<class T>
T var(const vector<T> &flist)
{ DEFINE_TYPENAME
if (flist.size()<2)
throw StatException("samplevar: empty or one-element list");
T me=mean(flist);
T sum=0.0;
const_SITERATE(fi, flist)
sum+=sqr(*fi-me);
return sum/(flist.size()-1);
}
template<class T>
T stdev(const vector<T> &flist)
{ return sqrt(var(flist)); }
template<class T>
T sterr(const vector<T> &flist)
{ return stdev(flist)/sqrt(T(flist.size())); }
template<class T>
T z(const vector<T> &flist, const T &x)
{ return (x-mean(flist))/samplestdev(flist); }
template<class T>
bool zs(const vector<T> &flist, vector<T> &zss)
{ DEFINE_TYPENAME
T me=mean(flist), ss=samplestdev(flist);
zss=vector<T>(flist.size());
iterator zi(zss.begin());
const_SITERATE(fi, flist)
*(zi++)=(*fi-me)/ss;
return true;
}
/* *********** TRIMMING FNCS ************/
template<class T>
void trimboth (const vector<T> &flist, double proportion, vector<T> &clist)
{ int tocut=int(flist.size()*proportion);
if (tocut*2>flist.size())
throw StatException("trim proportion too large");
clist=vector<T>(flist.begin()+tocut, flist.end()-tocut);
}
template<class T>
void trim1 (const vector<T> &flist, double proportion, vector<T> &clist, bool right=true)
{ int tocut=int(flist.size()*proportion);
if (tocut>flist.size())
throw StatException("trim proportion too large");
if (right)
clist=vector<T>(flist.begin(), flist.end()-tocut);
else
clist=vector<T>(flist.begin()+tocut, flist.end());
}
/* *********** PROBABILITY CALCS ************/
template<class T>
T gammln(const T &xx)
{
static T cof[6] = {76.18009173, -86.50532033, 24.01409822, -1.231739516, 0.120858003e-2, -0.536382e-5};
T x=xx, y=xx, tmp=x+5.5;
tmp-=(x+0.5)*log(tmp);
T ser=1.000000000190015;
for(int j=0; j<6; j++)
ser+=cof[j]/++y;
return -tmp + log(T(2.5066282746310005)*ser/x);
}
template<class T>
T gammcf(const T &a, const T &x, T &gln)
{ const T FPMIN(1.0e-30);
const int ITMAX= 100;
const T EPS=3.0e-7;
gln = gammln(a);
T b=x+1.0-a;
T c=T(1.0)/FPMIN;
T d=T(1.0)/b;
T h=d;
for(int i=1; i<=ITMAX; i++) {
T an=(a-double(i))*i;
b += 2.0;
d=an*d+b;
if (abs(d) < FPMIN) d=FPMIN;
c=b+an/c;
if (abs(c) < FPMIN) c=FPMIN;
d=T(1.0)/d;
T del=d*c;
h *= del;
if (abs(del-1.0) < EPS)
return exp(-x+a*log(x)-gln)*h;
}
throw StatException("gcf: a too large, ITMAX too small");
}
template<class T>
T gammser(const T &a, const T &x, T &gln)
{ const int ITMAX=100;
const T EPS=3.0e-7;
gln=gammln(a);
if (x<=0.0)
throw StatException("gser: negative x");
T ap=a;
T sum=T(1.0)/a;
T del=sum;
for(int n=1; n<=ITMAX; n++) {
++ap;
del *= x/ap;
sum += del;
if (abs(del) < fabs(sum)*EPS)
return sum*exp(-x+a*log(x)-gln);
}
throw StatException("gcf: a too large, ITMAX too small");
}
template<class T>
T gammp(const T &a, const T &x)
{ if ((x<0.0) || (a<=0.0))
throw StatException("gammp: invalid arguments");
T gln;
return (x<(a+1.0)) ? gammser(a, x, gln) : -gammcf(a, x, gln)+1.0;
}
template<class T>
T gammq(const T &a, const T &x)
{ if ((x<0.0) || (a<=0.0))
throw StatException("gammp: invalid arguments");
T gln;
return (x<(a+1.0)) ? -gammser(a, x, gln)+1.0 : gammcf(a, x, gln);
}
template<class T>
T erf(const T &x)
{ return (x<0.0) ? -gammp(T(0.5), x*x) : gammp(T(0.5), x*x); }
template<class T>
T erfc(const T &x)
{ return (x<0.0) ? T(1.0) + gammp(T(0.5), x*x) : gammq(T(0.5), x*x); }
template<class T>
T chisqprob(const T &x, const T &df)
{ return gammq(df*0.5, x*0.5);
}
template<class T>
T betacf(const T &a, const T &b, const T &x)
{ const int ITMAX = 200;
const double EPS = 3.0e-7;
T bm=1.0, az=1.0, am=1.0;
T qab=a+b;
T qap=a+1.0;
T qam=a-1.0;
T bz= -qab*x/qap + 1.0;
for(int i=0; i<=ITMAX; i++) {
T em = i+1;
T tem = em*2;
T d = em*(b-em)*x/((qam+tem)*(a+tem));
T ap = az + d*am;
T bp = bz+d*bm;
d = -(a+em)*(qab+em)*x/((qap+tem)*(a+tem));
T app = ap+d*az;
T bpp = bp+d*bz;
T aold = az;
am = ap/bpp;
bm = bp/bpp;
az = app/bpp;
bz = 1.0;
if (abs(az-aold)<(fabs(az)*EPS))
return az;
}
throw StatException("betacf: a or b too big, or ITMAX too small.");
return -1.0;
}
template<class T>
T betai(const T &a, const T &b, const T &x)
{ if ((x<0.0) || (x>1.0))
throw StatException("betai: bad x");
T bt= ((x==0.0) || (x==1.0)) ? 0.0 : exp(gammln(a+b)-gammln(a)-gammln(b)+a*log(x)+b*log(- x + 1.0));
return (x < (a+1.0)/(a+b+2.0)) ? bt*betacf(a,b,x)/a : -bt*betacf(b,a, -x+1.0)/b + 1.0;
}
template<class T>
T zprob(const T &z)
{ const T Z_MAX = 6.0;
T x;
if (z == 0.0)
x = 0.0;
else {
T y = abs(z) * 0.5;
if (y >= Z_MAX*0.5)
x = 1.0;
else if (y < 1.0) {
T w = sqr(y);
x = ((((((((w * 0.000124818987
-0.001075204047) * w +0.005198775019) * w
-0.019198292004) * w +0.059054035642) * w
-0.151968751364) * w +0.319152932694) * w
-0.531923007300) * w +0.797884560593) * y * 2.0;
}
else {
y -= 2.0;
x = (((((((((((((y * -0.000045255659
+0.000152529290) * y -0.000019538132) * y
-0.000676904986) * y +0.001390604284) * y
-0.000794620820) * y -0.002034254874) * y
+0.006549791214) * y -0.010557625006) * y
+0.011630447319) * y -0.009279453341) * y
+0.005353579108) * y -0.002141268741) * y
+0.000535310849) * y +0.999936657524;
}
}
return (z > 0.0) ? (x+1.0)*0.5 : (-x+1.0)*0.5;
}
inline double fprob(const int &dfnum, const int &dfden, const double &F)
{ return betai(dfden*0.5, dfnum*0.5, dfden/(dfden+dfnum*F)); }
template<class T>
T erfcc(const T& x)
{ T z = abs(x);
T t = T(1.0) / (z*0.5+1.0);
T ans = t * exp(-z*z-1.26551223 + t*(t*(t*(t*(t*(t*(t*(t*(t*0.17087277-0.82215223)+1.48851587)-1.13520398)+0.27886807)-0.18628806)+0.9678418)+0.37409196)+1.00002368));
return (x >= 0.0) ? ans : -ans +2.0;
}
/* *********** RANDOM NUMBERS ***************/
template<class T>
T gasdev(const T &mean, const T &dev)
{ float r, v1, v2;
do {
v1=float(rand())/RAND_MAX*2-1;
v2=float(rand())/RAND_MAX*2-1;
r=v1*v1+v2*v2;
} while ((r>1.0) || (r<0.0));
return mean + dev * v1 * sqrt(T(-2.0*log(r)/r));
}
template<class T, class RF>
T gasdev(const T &mean, const T &dev, RF &randfunc)
{ float r, v1, v2;
do {
v1=randfunc(-1.0, 1.0);
v2=randfunc(-1.0, 1.0);
r=v1*v1+v2*v2;
} while ((r>1.0) || (r<0.0));
return mean + dev * v1 * sqrt(-2.0*log(r)/r);
}
/* *********** CORRELATION FNCS ************/
template<class T>
T pearsonr(const vector<T> flist1, const vector<T> &flist2, T &probrs)
{ DEFINE_TYPENAME
const T TINY=T(1.0e-30);
if (flist1.size() != flist2.size())
throw StatException("pearsonr: lists of different sizes");
double n=flist1.size();
T sumx=0.0, sumx2=0.0, sumy=0.0, sumy2=0.0, summult=0.0;
for(const_iterator i1(flist1.begin()), i2(flist2.begin()), e1(flist1.end()); i1!=e1; i1++, i2++) {
sumx += *i1; sumx2 += sqr(*i1);
sumy += *i2; sumy2 += sqr(*i2);
summult += *i1 * *i2;
}
T r_num = summult * n - sumx * sumy;
T r_den = sqrt( (sumx2*n - sqr(sumx) ) * (sumy2*n - sqr(sumy)) );
T r = r_num / r_den;
T df=n-2;
T t = r * sqrt( df / ( (-r+1.0+TINY)*(r+1.0+TINY) ) );
probrs=betai(df*0.5, T(0.5), df/(df+t*t));
return r;
}
template<class T>
double spearmanr(const vector<T> &flist1, const vector<T> &flist2, double &probrs)
{ if (flist1.size() != flist2.size())
throw StatException("spearmanr: lists of different sizes");
double n=flist1.size();
vector<double> ranks1, ranks2;
rankdata(flist1, ranks1);
rankdata(flist2, ranks2);
double dsq=sumdiffsquared(ranks1, ranks2);
double rs=1-6*dsq / (n*(sqr(n)-1));
double df=n-2;
double t = rs * sqrt(df / ((rs+1.0)*(1.0-rs)));
probrs=betai(0.5*df,0.5,df/(df+t*t));
return rs;
}
template<class T>
double pointbiserialr(const vector<T> &flist1, const vector<T> &flist2, double &probrs)
{ throw StatException("pointbiserialr: not implemented"); }
template<class T>
double kendalltau(const vector<T> &x, const vector<T> &y, double &probrs)
{ if (x.size() != y.size())
throw StatException("kendaltau: lists of different sizes");
if (!x.size())
throw StatException("kendaltau: empty lists");
int n=x.size();
int n1=0, n2=0, iss=0;
for (int j=0; j<n-1; j++)
for (int k=j+1; k<n; k++) {
int a1=compare(x[j], x[k]);
int a2=compare(y[j], x[k]);
int aa=a1*a2;
if (aa) {
n1++; n2++;
if (aa>0) iss++;
else iss--;
}
else {
if (a1) n1++;
if (a2) n2++;
}
}
double tau = double(iss) / sqrt (double(n1)*double(n2));
double nf=n;
double svar=(4*nf+10)/(9*nf*(nf-1));
double z= tau / sqrt(svar);
probrs = erfcc(fabs(z)/1.4142136);
return tau;
}
template<class T>
void linregress(const vector<T> flist1, const vector<T> &flist2,
T &slope, T &intercepr, T &r, T &probrs, T &sterrest)
{ DEFINE_TYPENAME
const T TINY=T(1.0e-30);
if (flist1.size() != flist2.size())
throw StatException("pearsonr: lists of different sizes");
double n=flist1.size();
T sumx=0.0, sumx2=0.0, sumy=0.0, sumy2=0.0, summult=0.0;
for(const_iterator i1(flist1.begin()), i2(flist2.begin()), e1(flist1.end()); i1!=e1; i1++, i2++) {
sumx += *i1; sumx2 += sqr(*i1);
sumy += *i2; sumy2 += sqr(*i2);
summult += (*i1 * *i2);
}
T meanx = sumx/n;
T meany = sumy/n;
T r_num = summult * n - sumx * sumy;
T r_den = sqrt( (sumx2*n - sqr(sumx) ) * (sumy2*n - sqr(sumy)) );
r = r_num / r_den;
T df=n-2;
T t = r * sqrt( df / ( (-r+1.0+TINY)*(r+1.0+TINY) ) );
probrs=betai(df*0.5, T(0.5), df/(df+t*t));
// T z = log((r+1.0+TINY)/(-r+1.0+TINY));
slope = r_num / (sumx2*n - sqr(sumx));
intercepr = meany - slope*meanx;
sterrest = sqrt(-sqr(r)+1.0) * samplestdev(flist2);
}
/* *********** INFERENTIAL STATS ************/
template<class T>
T ttest_1samp(const vector<T> &flist, const T &popmean, T &prob)
{
T n=flist.size(), df=n-1.0;
T t= (mean(flist) - popmean) / sqrt(var(flist)/n);
prob=betai(df*0.5, T(0.5), df/(df+t*t));
return t;
}
template<class T>
T ttest_ind(const vector<T> &x, const vector<T> &y, T &prob)
{ T n1=x.size(), n2=y.size(), df=n1+n2-2.0;
T svar= ( sqr(stdev(x))*(n1-1.0) + sqr(stdev(y))*(n2-1.0) ) / df;
T t= ( mean(x) - mean(y) ) / sqrt(svar * ( (n1+n2)/(n1*n2) ) );
prob=betai(df*0.5, T(0.5), df/(df+t*t));
return t;
}
template<class T>
T ttest_rel(const vector<T> &x, const vector<T> &y, T &prob)
{ if (x.size() != y.size())
throw StatException("ttest_rel: lists of different sizes");
T meanx=mean(x);
T meany=mean(y);
T n=x.size();
T cov=0.0;
for(int i=0; i<x.size(); i++)
cov += (x[i]-meanx) * (y[i]-meany);
T df=n-1.0;
cov /= df;
T sd = sqrt((var(x)+var(y) - cov*2.0)/n);
if (sd==0.0)
throw StatException("ttest_rel: sd==0, can't divide");
T t = (mean(x)-mean(y))/sd;
prob=betai(df*0.5, T(0.5), df/(df+t*t));
return t;
}
template<class T>
T chisquare(const vector<T> &x, const vector<T> *exp, T &prob)
{ int n=x.size();
T chisq=0.0;
if (exp) {
if (exp->size()!=n)
throw StatException("chi_square: lists of different sizes");
for(int i=0; i<n; i++)
chisq+=sqr(x[i]-(*exp)[i]) / (*exp)[i];
}
else {
T invted=sum(x)/T(n);
for(int i=0; i<n; i++)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -