⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 stat.hpp

📁 orange源码 数据挖掘技术
💻 HPP
📖 第 1 页 / 共 3 页
字号:
/*
    This file is part of Orange.

    Orange is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    Orange is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with Orange; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

    Authors: Janez Demsar, Blaz Zupan, 1996--2002
    Contact: janez.demsar@fri.uni-lj.si
*/

/*  Most of the functions on this file are adapted from stat.py, which is in
    turn based on number of various sources. */

#ifndef STAT_HPP
#define STAT_HPP

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#include <vector>
#include <map>
#include <algorithm>
#include <limits>

using namespace std;

#include "stladdon.hpp"
#include "stat.hpp"
#include "statexceptions.hpp"
//#include "random.hpp"

#define SITERATE(iter, pna)  for(iterator iter((pna).begin()), iter##_end((pna).end()); iter!=iter##_end; iter++)
#define const_SITERATE(iter, pna)  for(const_iterator iter((pna).begin()), iter##_end((pna).end()); iter!=iter##_end; iter++)
#define DEFINE_TYPENAME typedef typename vector<T>::iterator iterator; typedef typename vector<T>::const_iterator const_iterator;


/* *********** AUXILIARY FUNCTIONS ************/

#ifndef _MSC_VER_70
inline double abs(double x)
{ return fabs(x); }
#endif

template<class T>
inline T sqr(const T &x) { return x*x; }


template<class T>
inline int convert_to_int(const T &x) { return int(x); }

template<class T>
inline double convert_to_double(const T &x) { return double(x); }


template<class T>
int compare(const T &x, const T &y)
{ if (x<y) return -1;
  if (x>y) return 1;
  return 0;
}

template<class T>
class CompareByIndex
{ const vector<T> &items;
public:
  
  CompareByIndex(const vector<T> &po)
   : items(po) {}

  int operator()(const int &i1, const int &i2) const
  { return items[i1]<items[i2]; }
};


template<class T, class U>
class CompareByIndex_pred
{ const vector<T> &items;
  U less_than;

public:
  CompareByIndex_pred(const vector<T> &po, U cf)
   : items(po), less_than(cf) {}

  int operator()(const int &i1, const int &i2) const
  { return less_than(items[i1], items[i2]); }
};




/* *********** SUPPORT FUNCTIONS ************/



template<class T>
T sum(const vector<T> &x, const T &init=0.0)
{ DEFINE_TYPENAME
  T sum=init;
  const_SITERATE(ii, x)
    sum += *ii;
  return sum;
}

template<class T>
void cumsum(const vector<T> &x, vector<T> &res, const T &init=0.0)
{ DEFINE_TYPENAME
  T sum=init;
  const_SITERATE(ii, x) {
    sum += *ii;
    res.push_back(sum);
  }
}

template<class T>
T ss(const vector<T> &x, const T &init=0.0)
{ DEFINE_TYPENAME
  T sum=init;
  const_SITERATE(ii, x)
    sum+=sqr(*ii);
  return sum;
}


template<class T>
T summult(const vector<T> &x, const vector<T> &y, const T &init=0.0)
{ DEFINE_TYPENAME
  
  if (x.size() != y.size())
    throw StatException("summult: lists of different sizes");

  T sum=init;
  for(const_iterator xi(x.begin()), xe(x.end()), yi(y.begin());
      xi!=xe; sum+= *(xi++) * *(yi++));
  return sum;
}


template<class T>
T sumdiffsquared(const vector<T> &flist1, const vector<T> &flist2, const T &init=0.0)
{ DEFINE_TYPENAME

  if (flist1.size() != flist2.size())
    throw StatException("sumdiffsquared: lists of different sizes");

  T sum=init;
  for(const_iterator i1(flist1.begin()), e1(flist1.end()), i2(flist2.begin()); i1!=e1; sum+=sqr(*(i1++)-*(i2++)));
  return sum;
}


template<class T>
T sumsquared(vector<T> &x, const T &init=0.0)
{ return sqr(sum(x, init)); }

template<class T>
bool shellsort(const vector<T> &flist, vector<int> &indices, vector<T> &slist)
{ DEFINE_TYPENAME
  
  int len=flist.size();
  indices=vector<int>(len);
  vector<int>::iterator ii=indices.begin();
  for(int idx=0; idx<len; idx++)
    *(ii++)=idx;

  stable_sort(indices.begin(), indices.end(), CompareByIndex<T>(flist));

  slist=vector<T>(len);
  iterator si(slist.begin());
  ITERATE(vector<int>, i2, indices)
    *(si++)=flist[*i2];
    
  return true;
}


template<class T, class U>
bool shellsort(const vector<T> &flist, vector<int> &indices, vector<T> &slist, const U &cf)
{ DEFINE_TYPENAME
  
  int len=flist.size();
  indices=vector<int>(len);
  vector<int>::iterator ii=indices.begin();
  for(int idx=0; idx<len; idx++)
    *(ii++)=idx;

  stable_sort(indices.begin(), indices.end(), CompareByIndex_pred<T, U>(flist, cf));

  slist=vector<T>();
  slist.reserve(len);
  ITERATE(vector<int>, i2, indices)
    slist.push_back(flist[*i2]);
    
  return true;
}


template<class T>
bool rankdata(const vector<T> &flist, vector<double> &ranks)
{ vector<T> items;
  vector<int> indices;
  shellsort(flist, indices, items);

  int sumranks=0, dupcount=0, n=indices.size();
  ranks=vector<double>(n);
  for(int beg=0, en; beg<n; beg=en) {
    const T &begi=items[beg];
    for(en=beg+1; (en<n) && (begi==items[en]); en++);
    double averank=double((en-1)+beg)/2 + 1;
    while(beg<en)
      ranks[indices[beg++]]=averank;
  }

  return true;
}


template<class T, class U>
bool rankdata(const vector<T> &flist, vector<double> &ranks, U cf)
{ vector<T> items;
  vector<int> indices;
  shellsort(flist, indices, items, cf);

  int sumranks=0, dupcount=0, n=indices.size();
  ranks=vector<double>(n);
  for(int beg=0, en; beg<n; beg=en) {
    for(en=beg+1; (en<n) && (!cf(items[beg], items[en])); en++);
    double averank=double((en-1)+beg)/2 + 1;
    while(beg<en)
      ranks[indices[beg++]]=averank;
  }

  return true;
}

/* *********** CENTRAL TENDENCY ************/


template<class T>
T geometricmean(const vector<T> &flist)
{ DEFINE_TYPENAME
  if (!flist.size())
    throw StatException("geometricmean: empty list");

  T mult=1.0;
  const_SITERATE(fi, flist)
    mult *= *fi;

  if (mult<=0.0)
    throw StatException("geometricmean: non-positive product");

  return exp(log(mult)/flist.size());
}


template<class T>
T harmonicmean(const vector<T> &flist)
{ DEFINE_TYPENAME
  if (!flist.size())
    throw StatException("harmonicmean: empty list");

  T sum=0.0;
  const_SITERATE(fi, flist)
    if (*fi==0.0)
      throw StatException("harmonicmean: division by zero");
    else sum+=T(1.0) / *fi;
  return T(flist.size())/sum;
}


template<class T>
T mean(const vector<T> &flist)
{ DEFINE_TYPENAME
  if (!flist.size())
    throw StatException("mean: empty list");

  T sum=0.0;
  const_SITERATE(fi, flist)
    sum += *fi;
  return sum/flist.size();
}


template<class T>
T median(const vector<T> &med)
{ if (!med.size())
    throw StatException("median: empty list");

  vector<T> med2(med);
  nth_element(med2.begin(), med2.begin()+med2.size()/2, med2.end());
  return middleelement(med2);
}


template<class T, class C>
T median(const vector<T> &med, const C &compare)
{ if (!med.size())
    throw StatException("median: empty list");

  vector<T> med2(med);
  nth_element(med2.begin(), med2.begin()+med2.size()/2, med2.end(), compare);
  return middleelement(med2);
}


template<class T>
T middleelement(const vector<T> &med)
{ DEFINE_TYPENAME
  const_iterator medmid(med.begin()+med.size()/2);
  if (med.size()%2)
    return *min_element(medmid, med.end());
  else
    return (*max_element(med.begin(), medmid) + *min_element(medmid, med.end()))/2.0;
}


template<class T>
int mode(const vector<T> &flist, vector<T> &mode)
{ DEFINE_TYPENAME
  typedef typename map<T, int>::iterator mapiterator;
  if (!flist.size())
    throw StatException("mode: empty list");

  map<T, int> bins;
  const_SITERATE(fi, flist) {
    mapiterator bi=bins.lower_bound(*fi);
    if ((bi==bins.end()) || ((*bi).first!=*fi))
      bins[*fi]=1;
    else
      (*bi).second++;
  }

  int count=0;
  for(mapiterator bi(bins.begin()), be(bins.end()); bi!=be; bi++)
    if ((*bi).second>count) {
      count=(*bi).second;
      mode.clear();
      mode.push_back((*bi).first);
    }
    else if ((*bi).second==count) {
      mode.push_back((*bi).first);
    }
  return count;
}



template<class T, class C>
int mode(const vector<T> &flist, vector<T> &mode, const C &compare)
{ DEFINE_TYPENAME
  typedef typename map<T, int, C>::iterator mapiterator;

  if (!flist.size())
    throw StatException("mode: empty list");

  map<T, int, C> bins(compare);
  const_SITERATE(fi, flist) {
    mapiterator bi=bins.lower_bound(*fi);
    if ((bi==bins.end()) || ((*bi).first!=*fi))
      bins[*fi]=1;
    else
      (*bi).second++;
  }

  int count=0;
  for(mapiterator bi(bins.begin()), be(bins.end()); bi!=be; bi++)
    if ((*bi).second>count) {
      count=(*bi).second;
      mode.clear();
      mode.push_back((*bi).first);
    }
    else if ((*bi).second==count) {
      mode.push_back((*bi).first);
    }
  return count;
}



/* *********** MOMENTS ************/

template<class T>
T moment(const vector<T> &flist, const int &mom)
{ DEFINE_TYPENAME
  if (!flist.size())
    throw StatException("moment: empty list");

  T me=mean(flist);

  if (mom==1) return me;
  if (mom==2) return samplevar(flist);

  T sum=0.0;
  const_SITERATE(fi, flist) {
    T dx= *fi-me;
    if (dx>0.0)
      sum+=exp(log(dx)*mom);
    else if (dx<0.0)
      if (mom%2)
        sum-=exp(log(-dx)*mom);
      else
        sum+=exp(log(-dx)*mom);
  }
  return sum/flist.size();
}


template<class T>
T variation(const vector<T> &flist)
{ return samplestdev(flist)/mean(flist) * 100.0;
}


template<class T>
T skewness(const vector<T> &flist)
{ T mom2=samplevar(flist);
  if (mom2==0.0)
    throw StatException("skewness: variation is 0.0");
  return moment(flist, 3) / exp(log(mom2)*1.5);
}


template<class T>
T kurtosis(const vector<T> &flist)
{ T mom2=samplevar(flist);
  if (mom2==0.0)
    throw StatException("skewness: variation is 0.0");
  return moment(flist, 4) / sqr(mom2);
}



/* *********** FREQUENCY STATS************/

template<class T>
T scoreatpercentile(const vector<T> &flist, const double &perc)
{ DEFINE_TYPENAME
  
  if (!flist.size())
    throw StatException("mode: empty list");

  vector<T> l2(flist);
  iterator mid(l2.begin()+int(l2.size()*perc/100.0+0.5));
  nth_element(l2.begin(), mid, l2.end());
  return *min_element(mid, l2.end());
}


template<class T, class C>
T scoreatpercentile(const vector<T> &flist, const double &perc, const C &compare)
{ DEFINE_TYPENAME
  
  if (!flist.size())
    throw StatException("mode: empty list");

  vector<T> l2(flist);
  iterator mid(l2.begin()+int(l2.size()*perc/100.0+0.5));
  nth_element(l2.begin(), mid, l2.end(), compare);
  return *min_element(mid, l2.end());
}


template<class T>
double percentileofscore(const vector<T> &flist, const T &x)
{ DEFINE_TYPENAME
  vector<T> l2(flist);
  iterator part=partition(l2.begin(), l2.end(), bind2nd(less<T>(), x));
  return double(part-l2.begin())/l2.size() * 100.0;
}


template<class T, class C>
double percentileofscore(const vector<T> &flist, const T &x, const C &compare)
{ DEFINE_TYPENAME
  vector<T> l2(flist);
  iterator part=partition(l2.begin(), l2.end(), bind2nd(compare, x));
  return double(part-l2.begin())/l2.size() * 100.0;
}


template<class T>
void histogram (const vector<T> &flist,
                vector<int> &counts, T &min, T &binsize, int &extrapoints,
                int numbins=10)
{ DEFINE_TYPENAME
  T max;

  min=*min_element(flist.begin(), flist.end());
  max=*max_element(flist.begin(), flist.end());
  T ebw=(max-min)/T(numbins) + 1.0;
  binsize=(max-min+ebw)/T(numbins);
  min-=binsize/2;

  counts=vector<int>(numbins, 0);
  extrapoints=0;
  const_SITERATE(ii, flist) {
    int binno=convert_to_int((*ii-min)/binsize);
    if (binno<numbins)
      counts[binno]++;
    else
      extrapoints++;
  }
}


template<class T>
void histogram (const vector<T> &flist,
                vector<int> &counts, T &rmin, T &binsize, int &extrapoints,
                const T &min, const T &max, int numbins=10)
{ DEFINE_TYPENAME
  rmin=min;

  binsize=(max-min)/T(numbins);
  counts=vector<int>(numbins, 0);
  extrapoints=0;
  const_SITERATE(ii, flist) {
    int binno=convert_to_int((*ii-min)/(binsize));
    if (binno<numbins)
      counts[binno]++;
    else
      extrapoints++;
  }
}


template<class T>
void cumfreq  (const vector<T> &flist,

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -