📄 stat.hpp
字号:
chisq+=sqr(x[i]-invted) / invted;
}
prob=chisqprob(chisq, T(n-1));
return chisq;
}
template<class T>
T min_el(const T &x, const T &y)
{ return x<y ? x : y; }
template<class T>
T max_el(const T &x, const T &y)
{ return x>y ? x : y; }
template<class T>
T chisquare2d(const vector<vector<T> > &cont,
int &df, T &prob, T &cramerV, T &contingency_coeff)
{ if (!cont.size())
throw StatException("chisquare2d: invalid contingency table");
const T TINY=1.0e-30;
int ni=cont.size();
int nj=cont[0].size();
if (!nj)
throw StatException("chisquare2d: invalid contingency table");
vector<T> sumi(ni), sumj(nj);
int ci, cj;
for(ci=0; ci<ni; ci++)
sumi.push_back(T(0.0));
for(cj=0; cj<nj; cj++)
sumj.push_back(T(0.0));
for(ci=0; ci<ni; ci++) {
if (cont[ci].size()!=nj)
throw StatException("chisquare2d: invalid contingency table");
for(cj=0; cj<nj; cj++) {
sumi[ci]+=cont[ci][cj];
sumj[cj]+=cont[ci][cj];
}
}
T sum=0.0;
int nnj=0;
for(cj=0; cj<nj; cj++)
if (sumj[cj]>0.0) {
sum+=sumj[cj];
nnj++;
}
int nni=0;
T chisq=0.0;
for(ci=0; ci<ni; ci++) {
if (sumi[ci]>0.0)
nni++;
for(cj=0; cj<nj; cj++) {
T expctd=sumj[cj]*sumi[ci] / sum;
chisq += sqr(cont[ci][cj]-expctd) / (expctd+TINY);
}
}
df=(nni-1)*(nnj-1);
prob=chisqprob(chisq, T(df));
cramerV = sqrt (chisq/(sum*(min_el(nni, nnj)-1.0)));
contingency_coeff=sqrt(chisq/(chisq+sum));
return chisq;
}
template<class T>
T anova_rel(const vector<vector<T> > &cont, int &df_bt, int &df_err, T &prob)
{
DEFINE_TYPENAME
int k = cont.size();
int n = cont[0].size();
if ((n<2) || (k<2))
throw StatException("anova_rel: invalid contingency table");
int N = k*n;
T G = T(0.0), SS_wt = T(0.0), SS_total = T(0.0), SS_bt = T(0.0), SS_bs = T(0.0), SS_err;
vector<T> Ps(n, T(0.0));
iterator Psi, Pse(Ps.end());
for(typename vector<vector<T> >::const_iterator conti(cont.begin()), conte(cont.end()); conti!=conte; conti++) {
if ((*conti).size() != n)
throw StatException("anova_rel: number of subject is not the same in all groups");
T t = T(0.0), tt = T(0.0);
Psi = Ps.begin();
for(typename vector<T>::const_iterator contii((*conti).begin()), contie((*conti).end()); contii!=contie; contii++, Psi++) {
t += (*contii);
*Psi += (*contii);
tt += (*contii) * (*contii);
}
G += t;
SS_total += tt;
SS_wt += tt - t*t/n;
SS_bt += t*t;
}
for(Psi = Ps.begin(); Psi != Pse; Psi++)
SS_bs += *Psi * *Psi;
const T GG_N = G*G/N;
SS_total -= GG_N;
SS_bt = SS_bt/n - GG_N;
SS_bs = SS_bs/k - GG_N;
SS_err = SS_wt - SS_bs;
df_bt = (k-1);
df_err = (N-k) - (n-1);
if (SS_err < 1e-20) {
prob = 0.0;
return T(-1.0);
}
T MS_bt = SS_bt / df_bt;
T MS_err = SS_err / df_err;
T F = MS_bt / MS_err;
prob = fprob(df_bt, df_err, F);
return F;
}
template<class T>
T friedmanf(const vector<vector<T> > &cont, double &chi2, int &dfnum, int &dfden, double &prob)
{
DEFINE_TYPENAME
int k = cont.size();
int N = cont[0].size();
if ((N<2) || (k<2))
throw StatException("friedmanf: invalid contingency table");
vector<vector<T> > transposed;
int line;
for(line = 0; line < N; line++)
transposed.push_back(vector<T>());
typename vector<vector<T> >::iterator trani;
const typename vector<vector<T> >::iterator tranb(transposed.begin()), trane(transposed.end());
for(typename vector<vector<T> >::const_iterator conti(cont.begin()), conte(cont.end()); conti != conte; conti++) {
if ((*conti).size() != N)
throw StatException("friedmanf: number of subject is not the same in all groups");
trani = tranb;
for(const_iterator contii((*conti).begin()); trani!=trane; trani++, contii++)
(*trani).push_back(*contii);
}
vector<double> R(k, 0.0), tranks;
typename vector<double>::iterator Ri, Rb(R.begin()), Re(R.end()), tri;
for(trani = tranb; trani != trane; trani++) {
rankdata(*trani, tranks);
for(Ri = Rb, tri = tranks.begin(); Ri != Re; Ri++, tri++)
*Ri += *tri;
}
double RR = 0.0;
for(Ri = Rb; Ri != Re; Ri++)
RR += *Ri * *Ri;
chi2 = 12 * N / float(k*(k+1)) * (RR/N/N - k*(k+1)*(k+1) / 4.0);
double F = (N-1) * chi2 / (N*(k-1) - chi2);
prob = fprob(k-1, (k-1)*(N-1), F);
return F;
}
template<class T>
double mannwhitneyu(const vector<T> &x, const vector<T> &y, double &prob)
{ vector<T> both(x);
both.insert(both.end(), y.begin(), y.end());
vector<double> ranks;
rankdata(both, ranks);
int n1=x.size(), n2=y.size();
double u1=n1*n2 + (n1*(n1+1))/2.0;
double u2=n1*n2 + (n2*(n2+1))/2.0;
vector<double>::iterator ri;
for(ri=ranks.begin(); n1--; u1 -= *(ri++));
while(ri!=ranks.end())
u2 -= *(ri++);
double bigu=max_el(u1, u2);
double smallu=min_el(u1, u2);
double sd=sqrt(n1*n2*(n1+n2+1)/12.0);
if (sd==0)
throw StatException("mannwhitneyu: empty group");
double z=abs((bigu-n1*n2/2.0) / sd);
prob = 1.0 - zprob(z);
return smallu;
}
template<class T, class G, class C>
double mannwhitneyu(const vector<T> &x, double &prob, const G &group, const C &compare)
{ DEFINE_TYPENAME
vector<double> ranks;
rankdata(x, ranks, compare);
double u1=0.0, u2=0.0;
int n1=0, n2=0;
vector<double>::iterator ri(ranks.begin());
const_SITERATE(xi, x)
if (group(*xi)) {
n1++;
u1-= *(ri++);
}
else {
n2++;
u2-= *(ri++);
}
u1+=n1*n2 + (n1*(n1+1))/2.0;
u2+=n1*n2 + (n2*(n2+1))/2.0;
double bigu=max_el(u1, u2);
double smallu=min_el(u1, u2);
double sd=sqrt(n1*n2*(n1+n2+1)/12.0);
if (sd==0)
throw StatException("mannwhitneyu: empty group");
double z=abs((bigu-n1*n2/2.0) / sd);
prob = 1.0 - zprob(z);
return smallu;
}
template<class T>
double ranksums(const vector<T> &x, const vector<T> &y, double &prob)
{ vector<T> both(x);
both.insert(both.end(), y.begin(), y.end());
vector<double> ranks;
rankdata(both, ranks);
int n1c=x.size();
double sum=0.0;
for(vector<double>::iterator ri(ranks.begin()); n1c--; sum += *(ri++));
double n1=x.size(), n2=y.size();
double expected = n1*(n1+n2+1) / 2.0;
double z = (sum-expected) / sqrt(n1*n2*(n1+n2+1)/12.0);
prob=zprob(z);
return z;
}
template<class T, class G, class C>
double ranksums(const vector<T> &x, double &prob, const G &group, const C &compare)
{ DEFINE_TYPENAME
vector <double> ranks;
rankdata(x, ranks, compare);
vector<double>::iterator ri(ranks.begin());
double sum=0.0;
int obs=0;
const_SITERATE(xi, x)
if (group(*xi)) {
sum += *(ri++);
obs++;
}
else
ri++;
double n1=obs, n2=x.size()-n1;
double expected = n1*(n1+n2+1) / 2.0;
double z = (sum-expected) / sqrt(n1*n2*(n1+n2+1)/12.0);
prob=zprob(z);
return z;
}
template<class T>
double wilcoxont(const vector<T> &x, const vector<T> &y, double &prob)
{ DEFINE_TYPENAME
if (x.size() != y.size())
throw StatException("ttest_rel: lists of different sizes");
vector<T> d, absd;
for(const_iterator xi(x.begin()), xe(x.end()), yi(y.begin());
xi!=xe;
xi++, yi++)
if (*xi!=*yi) {
d.push_back(*xi-*yi);
absd.push_back(abs(d.back()));
}
if (!d.size()) {
prob = 1.0;
return 0.0;
}
vector<double> absdranks;
rankdata(absd, absdranks);
double r_plus=0.0, r_minus=0.0;
for(int i=0; i<d.size(); *( (d[i]<0.0) ? &r_minus : &r_plus ) += absdranks[i], i++);
double N=d.size();
double se = sqrt(N*(N+1)*(2*N+1)/24.0);
double wt = min_el(r_plus, r_minus);
double z = fabs(wt-N*(N+1)*0.25)/se;
prob=1.0-zprob(z);
return wt;
}
// This follows http://www-2.cs.cmu.edu/afs/cs/project/jair/pub/volume4/cohn96a-html/node7.html
template<class T, class U>
T loess_y(const T &refx, map<T, U> points, const float &windowProp)
{ typedef typename map<T, U>::const_iterator mapiterator;
mapiterator from, to;
/* Find the window */
mapiterator lowedge = points.begin();
mapiterator highedge = points.end();
int needpoints = int(ceil(points.size() * windowProp));
if ((needpoints<=1) || (needpoints>=points.size())) {
from = lowedge;
to = highedge;
return ((*--highedge).first - (*lowedge).first)/2.0;
}
from = points.lower_bound(refx);
to = points.upper_bound(refx);
if (from==to)
if (to != highedge)
to++;
else
from --;
/* Extend the interval; we set from to highedge when it would go beyond lowedge, to indicate that only to can be modified now */
while (needpoints--) {
if ((to == highedge) || ((from != highedge) && (refx - (*from).first < (*to).first - refx))) {
if (from == lowedge)
from = highedge;
else
from--;
}
else
to++;
}
if (from == highedge)
from = lowedge;
else
from++;
mapiterator tt = to;
--tt;
T h = (refx - (*from).first);
if ((*tt).first - refx > h)
h = ((*tt).first - refx);
h *= T(1.1);
/* Iterate through the window */
T Sx = 0.0, Sy = 0.0, Sxx = 0.0, Sxy = 0.0;
T n = 0.0;
for (; from != to; from++) {
const T &x = (*from).first;
const T &y = (*from).second;
T w = abs(refx - x) / h;
w = 1 - w*w*w;
w = w*w*w;
n += w;
Sx += w * x;
Sxx += w * x * x;
Sy += w * y;
Sxy += w * x * y;
}
if (n==0)
return Sy;
T div = Sxx - Sx * Sx / n;
if (!div)
return Sy;
return Sy/n + (Sxy - Sx*Sy/n) / div * (refx - Sx/n);
}
enum { DISTRIBUTE_MINIMAL, DISTRIBUTE_FACTOR, DISTRIBUTE_FIXED, DISTRIBUTE_UNIFORM };
template<class T, class U>
void distributePoints(const map<T, U> points, int nPoints, vector<T> &result, int method = DISTRIBUTE_MINIMAL)
{ typedef typename map<T, U>::const_iterator mapiterator;
if ((method == DISTRIBUTE_MINIMAL) && (nPoints<0)) {
nPoints = -nPoints;
method = DISTRIBUTE_FACTOR;
}
result.clear();
switch (method) {
case DISTRIBUTE_FACTOR: {
for (mapiterator pi(points.begin()), pe(points.end());;) {
T ax = (*pi).first;
result.push_back(ax);
if (++pi==pe)
break;
// We could write this faster, but we don't want to run into problems with rounding floats
T div = ((*pi).first - ax) / nPoints;
for (int i=1; i < nPoints; i++)
result.push_back(ax + i*div);
}
return;
}
case DISTRIBUTE_MINIMAL: {
if (nPoints<=points.size()) {
for (mapiterator pi(points.begin()), pe(points.end()); pi != pe; pi++)
result.push_back((*pi).first);
}
else {
T ineach = float(nPoints - points.size()) / float(points.size()-1);
T inthis = T(0.0);
for (mapiterator pi(points.begin()), pe(points.end());;) {
T ax = (*pi).first;
result.push_back(ax);
if (++pi==pe)
break;
inthis += ineach;
if (inthis>=T(0.5)) {
T dif = ((*pi).first - ax) / (int(floor(inthis))+1);
while (inthis>T(0.5)) {
result.push_back(ax += dif);
inthis -= T(1.0);
}
}
}
}
return;
}
case DISTRIBUTE_FIXED: {
float step = points.size()/(nPoints-2);
float up = 1.5;
T x1 = T(0.0);
T dx = T(0.0);
for(mapiterator pi(points.begin()), pe(points.end());;) {
do {
x1 = (*pi).first;
if (++pi==pe)
break;
T dx = (*pi).first - x1;
up -= 1.0;
} while (up>1.0);
if (pi==pe)
break;
for (; up<1.0; up += step)
result.push_back(x1 + dx*up);
}
result.push_back(x1);
return;
}
case DISTRIBUTE_UNIFORM: {
T fi = (*points.begin()).first;
mapiterator pe(points.end());
pe--;
T rg = ((*pe).first-fi) / (nPoints-1);
for (int i = 0; i<nPoints; i++)
result.push_back(fi + i*rg);
return;
}
}
}
int nUniquePoints(const vector<double> &points);
void samplingFactor (const vector<double> &points, int nPoints, vector<double> &result);
void samplingFactor (const map<double, double> &points, int nPoints, vector<double> &result);
void samplingMinimal(const vector<double> &points, int nPoints, vector<double> &result);
void samplingMinimal(const map<double, double> &points, int nPoints, vector<double> &result);
void samplingFixed (const vector<double> &points, int nPoints, vector<double> &result);
void samplingFixed (const map<double, double> &points, int nPoints, vector<double> &result);
void samplingUniform(const vector<double> &points, int nPoints, vector<double> &result);
void samplingUniform(const map<double, double> &points, int nPoints, vector<double> &result);
template<class T, class U>
void loess(const U &points, int nPoints, const float &windowProp, map<T, T> &loess_curve, int distributionMethod = DISTRIBUTE_MINIMAL)
{ DEFINE_TYPENAME
vector<T> xpoints;
distributePoints(points, nPoints, xpoints, distributionMethod);
for (const_iterator xi(xpoints.begin()), xe(xpoints.end()); xi!=xe; xi++)
loess_curve[*xi] = loess_y(*xi, points, windowProp);
}
class TXYW {
public:
double x, y, w;
TXYW(const double &ax, const double &ay, const double &aw = 1.0)
: x(ax), y(ay), w(aw)
{}
TXYW(const TXYW &o)
: x(o.x), y(o.y), w(o.w)
{}
};
// refpoints and points should be sorted
void loess(const vector<double> &refpoints, const vector<TXYW> &points, const float &windowProp, vector<pair<double, double> > &result);
void lwr(const vector<double> &refpoints, const vector<TXYW> &points, const float &smoothFactor, vector<pair<double, double> > &result);
void loess(const vector<double> &refpoints, const vector<pair<double, double> > &points, const float &windowProp, vector<pair<double, double> > &result);
void loess(const vector<double> &refpoints, const map<double, double> &points, const float &windowProp, vector<pair<double, double> > &result);
void lwr(const vector<double> &refpoints, const vector<pair<double, double> > &points, const float &smoothFactor, vector<pair<double, double> > &result);
void lwr(const vector<double> &refpoints, const map<double, double> &points, const float &smoothFactor, vector<pair<double, double> > &result);
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -