📄 stat.hpp
字号:
/*
This file is part of Orange.
Orange is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
Orange is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Orange; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Authors: Janez Demsar, Blaz Zupan, 1996--2002
Contact: janez.demsar@fri.uni-lj.si
*/
/* Most of the functions on this file are adapted from stat.py, which is in
turn based on number of various sources. */
#ifndef STAT_HPP
#define STAT_HPP
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <vector>
#include <map>
#include <algorithm>
#include <limits>
using namespace std;
#include "stladdon.hpp"
#include "stat.hpp"
#include "statexceptions.hpp"
//#include "random.hpp"
#define SITERATE(iter, pna) for(iterator iter((pna).begin()), iter##_end((pna).end()); iter!=iter##_end; iter++)
#define const_SITERATE(iter, pna) for(const_iterator iter((pna).begin()), iter##_end((pna).end()); iter!=iter##_end; iter++)
#define DEFINE_TYPENAME typedef typename vector<T>::iterator iterator; typedef typename vector<T>::const_iterator const_iterator;
/* *********** AUXILIARY FUNCTIONS ************/
#ifndef _MSC_VER_70
inline double abs(double x)
{ return fabs(x); }
#endif
template<class T>
inline T sqr(const T &x) { return x*x; }
template<class T>
inline int convert_to_int(const T &x) { return int(x); }
template<class T>
inline double convert_to_double(const T &x) { return double(x); }
template<class T>
int compare(const T &x, const T &y)
{ if (x<y) return -1;
if (x>y) return 1;
return 0;
}
template<class T>
class CompareByIndex
{ const vector<T> &items;
public:
CompareByIndex(const vector<T> &po)
: items(po) {}
int operator()(const int &i1, const int &i2) const
{ return items[i1]<items[i2]; }
};
template<class T, class U>
class CompareByIndex_pred
{ const vector<T> &items;
U less_than;
public:
CompareByIndex_pred(const vector<T> &po, U cf)
: items(po), less_than(cf) {}
int operator()(const int &i1, const int &i2) const
{ return less_than(items[i1], items[i2]); }
};
/* *********** SUPPORT FUNCTIONS ************/
template<class T>
T sum(const vector<T> &x, const T &init=0.0)
{ DEFINE_TYPENAME
T sum=init;
const_SITERATE(ii, x)
sum += *ii;
return sum;
}
template<class T>
void cumsum(const vector<T> &x, vector<T> &res, const T &init=0.0)
{ DEFINE_TYPENAME
T sum=init;
const_SITERATE(ii, x) {
sum += *ii;
res.push_back(sum);
}
}
template<class T>
T ss(const vector<T> &x, const T &init=0.0)
{ DEFINE_TYPENAME
T sum=init;
const_SITERATE(ii, x)
sum+=sqr(*ii);
return sum;
}
template<class T>
T summult(const vector<T> &x, const vector<T> &y, const T &init=0.0)
{ DEFINE_TYPENAME
if (x.size() != y.size())
throw StatException("summult: lists of different sizes");
T sum=init;
for(const_iterator xi(x.begin()), xe(x.end()), yi(y.begin());
xi!=xe; sum+= *(xi++) * *(yi++));
return sum;
}
template<class T>
T sumdiffsquared(const vector<T> &flist1, const vector<T> &flist2, const T &init=0.0)
{ DEFINE_TYPENAME
if (flist1.size() != flist2.size())
throw StatException("sumdiffsquared: lists of different sizes");
T sum=init;
for(const_iterator i1(flist1.begin()), e1(flist1.end()), i2(flist2.begin()); i1!=e1; sum+=sqr(*(i1++)-*(i2++)));
return sum;
}
template<class T>
T sumsquared(vector<T> &x, const T &init=0.0)
{ return sqr(sum(x, init)); }
template<class T>
bool shellsort(const vector<T> &flist, vector<int> &indices, vector<T> &slist)
{ DEFINE_TYPENAME
int len=flist.size();
indices=vector<int>(len);
vector<int>::iterator ii=indices.begin();
for(int idx=0; idx<len; idx++)
*(ii++)=idx;
stable_sort(indices.begin(), indices.end(), CompareByIndex<T>(flist));
slist=vector<T>(len);
iterator si(slist.begin());
ITERATE(vector<int>, i2, indices)
*(si++)=flist[*i2];
return true;
}
template<class T, class U>
bool shellsort(const vector<T> &flist, vector<int> &indices, vector<T> &slist, const U &cf)
{ DEFINE_TYPENAME
int len=flist.size();
indices=vector<int>(len);
vector<int>::iterator ii=indices.begin();
for(int idx=0; idx<len; idx++)
*(ii++)=idx;
stable_sort(indices.begin(), indices.end(), CompareByIndex_pred<T, U>(flist, cf));
slist=vector<T>();
slist.reserve(len);
ITERATE(vector<int>, i2, indices)
slist.push_back(flist[*i2]);
return true;
}
template<class T>
bool rankdata(const vector<T> &flist, vector<double> &ranks)
{ vector<T> items;
vector<int> indices;
shellsort(flist, indices, items);
int sumranks=0, dupcount=0, n=indices.size();
ranks=vector<double>(n);
for(int beg=0, en; beg<n; beg=en) {
const T &begi=items[beg];
for(en=beg+1; (en<n) && (begi==items[en]); en++);
double averank=double((en-1)+beg)/2 + 1;
while(beg<en)
ranks[indices[beg++]]=averank;
}
return true;
}
template<class T, class U>
bool rankdata(const vector<T> &flist, vector<double> &ranks, U cf)
{ vector<T> items;
vector<int> indices;
shellsort(flist, indices, items, cf);
int sumranks=0, dupcount=0, n=indices.size();
ranks=vector<double>(n);
for(int beg=0, en; beg<n; beg=en) {
for(en=beg+1; (en<n) && (!cf(items[beg], items[en])); en++);
double averank=double((en-1)+beg)/2 + 1;
while(beg<en)
ranks[indices[beg++]]=averank;
}
return true;
}
/* *********** CENTRAL TENDENCY ************/
template<class T>
T geometricmean(const vector<T> &flist)
{ DEFINE_TYPENAME
if (!flist.size())
throw StatException("geometricmean: empty list");
T mult=1.0;
const_SITERATE(fi, flist)
mult *= *fi;
if (mult<=0.0)
throw StatException("geometricmean: non-positive product");
return exp(log(mult)/flist.size());
}
template<class T>
T harmonicmean(const vector<T> &flist)
{ DEFINE_TYPENAME
if (!flist.size())
throw StatException("harmonicmean: empty list");
T sum=0.0;
const_SITERATE(fi, flist)
if (*fi==0.0)
throw StatException("harmonicmean: division by zero");
else sum+=T(1.0) / *fi;
return T(flist.size())/sum;
}
template<class T>
T mean(const vector<T> &flist)
{ DEFINE_TYPENAME
if (!flist.size())
throw StatException("mean: empty list");
T sum=0.0;
const_SITERATE(fi, flist)
sum += *fi;
return sum/flist.size();
}
template<class T>
T median(const vector<T> &med)
{ if (!med.size())
throw StatException("median: empty list");
vector<T> med2(med);
nth_element(med2.begin(), med2.begin()+med2.size()/2, med2.end());
return middleelement(med2);
}
template<class T, class C>
T median(const vector<T> &med, const C &compare)
{ if (!med.size())
throw StatException("median: empty list");
vector<T> med2(med);
nth_element(med2.begin(), med2.begin()+med2.size()/2, med2.end(), compare);
return middleelement(med2);
}
template<class T>
T middleelement(const vector<T> &med)
{ DEFINE_TYPENAME
const_iterator medmid(med.begin()+med.size()/2);
if (med.size()%2)
return *min_element(medmid, med.end());
else
return (*max_element(med.begin(), medmid) + *min_element(medmid, med.end()))/2.0;
}
template<class T>
int mode(const vector<T> &flist, vector<T> &mode)
{ DEFINE_TYPENAME
typedef typename map<T, int>::iterator mapiterator;
if (!flist.size())
throw StatException("mode: empty list");
map<T, int> bins;
const_SITERATE(fi, flist) {
mapiterator bi=bins.lower_bound(*fi);
if ((bi==bins.end()) || ((*bi).first!=*fi))
bins[*fi]=1;
else
(*bi).second++;
}
int count=0;
for(mapiterator bi(bins.begin()), be(bins.end()); bi!=be; bi++)
if ((*bi).second>count) {
count=(*bi).second;
mode.clear();
mode.push_back((*bi).first);
}
else if ((*bi).second==count) {
mode.push_back((*bi).first);
}
return count;
}
template<class T, class C>
int mode(const vector<T> &flist, vector<T> &mode, const C &compare)
{ DEFINE_TYPENAME
typedef typename map<T, int, C>::iterator mapiterator;
if (!flist.size())
throw StatException("mode: empty list");
map<T, int, C> bins(compare);
const_SITERATE(fi, flist) {
mapiterator bi=bins.lower_bound(*fi);
if ((bi==bins.end()) || ((*bi).first!=*fi))
bins[*fi]=1;
else
(*bi).second++;
}
int count=0;
for(mapiterator bi(bins.begin()), be(bins.end()); bi!=be; bi++)
if ((*bi).second>count) {
count=(*bi).second;
mode.clear();
mode.push_back((*bi).first);
}
else if ((*bi).second==count) {
mode.push_back((*bi).first);
}
return count;
}
/* *********** MOMENTS ************/
template<class T>
T moment(const vector<T> &flist, const int &mom)
{ DEFINE_TYPENAME
if (!flist.size())
throw StatException("moment: empty list");
T me=mean(flist);
if (mom==1) return me;
if (mom==2) return samplevar(flist);
T sum=0.0;
const_SITERATE(fi, flist) {
T dx= *fi-me;
if (dx>0.0)
sum+=exp(log(dx)*mom);
else if (dx<0.0)
if (mom%2)
sum-=exp(log(-dx)*mom);
else
sum+=exp(log(-dx)*mom);
}
return sum/flist.size();
}
template<class T>
T variation(const vector<T> &flist)
{ return samplestdev(flist)/mean(flist) * 100.0;
}
template<class T>
T skewness(const vector<T> &flist)
{ T mom2=samplevar(flist);
if (mom2==0.0)
throw StatException("skewness: variation is 0.0");
return moment(flist, 3) / exp(log(mom2)*1.5);
}
template<class T>
T kurtosis(const vector<T> &flist)
{ T mom2=samplevar(flist);
if (mom2==0.0)
throw StatException("skewness: variation is 0.0");
return moment(flist, 4) / sqr(mom2);
}
/* *********** FREQUENCY STATS************/
template<class T>
T scoreatpercentile(const vector<T> &flist, const double &perc)
{ DEFINE_TYPENAME
if (!flist.size())
throw StatException("mode: empty list");
vector<T> l2(flist);
iterator mid(l2.begin()+int(l2.size()*perc/100.0+0.5));
nth_element(l2.begin(), mid, l2.end());
return *min_element(mid, l2.end());
}
template<class T, class C>
T scoreatpercentile(const vector<T> &flist, const double &perc, const C &compare)
{ DEFINE_TYPENAME
if (!flist.size())
throw StatException("mode: empty list");
vector<T> l2(flist);
iterator mid(l2.begin()+int(l2.size()*perc/100.0+0.5));
nth_element(l2.begin(), mid, l2.end(), compare);
return *min_element(mid, l2.end());
}
template<class T>
double percentileofscore(const vector<T> &flist, const T &x)
{ DEFINE_TYPENAME
vector<T> l2(flist);
iterator part=partition(l2.begin(), l2.end(), bind2nd(less<T>(), x));
return double(part-l2.begin())/l2.size() * 100.0;
}
template<class T, class C>
double percentileofscore(const vector<T> &flist, const T &x, const C &compare)
{ DEFINE_TYPENAME
vector<T> l2(flist);
iterator part=partition(l2.begin(), l2.end(), bind2nd(compare, x));
return double(part-l2.begin())/l2.size() * 100.0;
}
template<class T>
void histogram (const vector<T> &flist,
vector<int> &counts, T &min, T &binsize, int &extrapoints,
int numbins=10)
{ DEFINE_TYPENAME
T max;
min=*min_element(flist.begin(), flist.end());
max=*max_element(flist.begin(), flist.end());
T ebw=(max-min)/T(numbins) + 1.0;
binsize=(max-min+ebw)/T(numbins);
min-=binsize/2;
counts=vector<int>(numbins, 0);
extrapoints=0;
const_SITERATE(ii, flist) {
int binno=convert_to_int((*ii-min)/binsize);
if (binno<numbins)
counts[binno]++;
else
extrapoints++;
}
}
template<class T>
void histogram (const vector<T> &flist,
vector<int> &counts, T &rmin, T &binsize, int &extrapoints,
const T &min, const T &max, int numbins=10)
{ DEFINE_TYPENAME
rmin=min;
binsize=(max-min)/T(numbins);
counts=vector<int>(numbins, 0);
extrapoints=0;
const_SITERATE(ii, flist) {
int binno=convert_to_int((*ii-min)/(binsize));
if (binno<numbins)
counts[binno]++;
else
extrapoints++;
}
}
template<class T>
void cumfreq (const vector<T> &flist,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -