📄 fnchyppr.cpp

📁 很好用的随机数产生器
💻 CPP
字号:
/************************** FNCHYPPR.CPP ********************** 2002-10-20 AF *
*
* Calculation of univariate and multivariate Fisher's noncentral hypergeometric
* probability distribution.
*
* This file contains source code for the class CFishersNCHypergeometric 
* and CMultiFishersNCHypergeometric defined in stocc.h.
*
* Documentation:
* ==============
* The file stocc.h contains class definitions.
* The file stocc.htm contains further instructions.
*
*******************************************************************************/

#include <math.h>      // math functions
#include <string.h>    // memcpy function
#include "stocc.h"     // class definition


/***********************************************************************
             Methods for class CFishersNCHypergeometric
***********************************************************************/

CFishersNCHypergeometric::CFishersNCHypergeometric(long int n_, long int m_, long int N_, double odds_) {
  // constructor
  // set parameters
  n = n_; m = m_; N = N_; odds = odds_;
  // check validity of parameters
  if (n<0 || m<0 || N<0 || odds<0. || n>N || m>N) FatalError("Parameter out of range in class CFishersNCHypergeometric");
  // initialize
  logodds = log(odds);  scale = rsum = 0.;
  ParametersChanged = 1;
  // calculate xmin and xmax
  xmin = m + n - N;  if (xmin < 0) xmin = 0;
  xmax = n;  if (xmax > m) xmax = m;}


double CFishersNCHypergeometric::mean(void) {
  // find approximate mean
  double a, b;                         // temporaries in calculation
  double mean;                         // mean

  if (odds == 1.) { // simple hypergeometric
    return double(m)*n/N;}

  // calculate Cornfield mean
  a = (m+n)*odds + (N-m-n); 
  b = a*a - 4.*odds*(odds-1.)*m*n;
  b = b > 0. ? sqrt(b) : 0.;
  mean = (a-b)/(2.*(odds-1.));
  return mean;}

double CFishersNCHypergeometric::moments(double * mean_, double * var_) {
  // calculate exact mean and variance
  // return value = sum of f(x), expected = 1.
  double y, sy=0, sxy=0, sxxy=0, me1;
  long int x, xm, x1;
  const float accuracy = 1E-10f;  // accuracy of calculation
  xm = (long int)mean();  // approximation to mean
  for (x=xm; x<=xmax; x++) {
    y = probability(x);
    x1 = x - xm;  // subtract approximate mean to avoid loss of precision in sums
    sy += y; sxy += x1 * y; sxxy += x1 * x1 * y;
    if (y < accuracy) break;}
  for (x=xm-1; x>=xmin; x--) {
    y = probability(x);
    x1 = x - xm;  // subtract approximate mean to avoid loss of precision in sums
    sy += y; sxy += x1 * y; sxxy += x1 * x1 * y;
    if (y < accuracy) break;}

  me1 = sxy / sy;
  *mean_ = me1 + xm;
  y = sxxy / sy - me1 * me1;
  if (y < 0) y=0;
  *var_ = y;
  return sy;}


double CFishersNCHypergeometric::probability(long int x) {
  // calculate probability function
  const double accuracy = 1E-10;  // accuracy of calculation

  if (x < xmin || x > xmax) return 0;
  if (!rsum) {
    // first time. calculate rsum = reciprocal of sum of proportional 
    // function over all probable x values
    long int x1, x2;                // x loop
    double y;                       // value of proportional function
    x1 = (long int)mean();          // start at mean
    if (x1 < xmin) x1 = xmin;
    x2 = x1 + 1;
    scale = 0.; scale = lng(x1);    // calculate scale to avoid overflow
    rsum = 1.;                      // = exp(lng(x1)) with this scale
    for (x1--; x1 >= xmin; x1--) {
      rsum += y = exp(lng(x1));     // sum from x1 and down 
      if (y < accuracy) break;}     // until value becomes negligible
    for (; x2 <= xmax; x2++) {      // sum from x2 and up
      rsum += y = exp(lng(x2));
      if (y < accuracy) break;}     // until value becomes negligible
    rsum = 1. / rsum;}              // save reciprocal sum

  return exp(lng(x)) * rsum;}       // function value


double CFishersNCHypergeometric::probabilityRatio(long int x, long int x0) {
  // Calculate probability ratio f(x)/f(x0)
  // This is much faster than calculating a single probability because
  // rsum is not needed
  double a1, a2, a3, a4, f1, f2, f3, f4;
  long int y, dx = x - x0;
  int invert = 0;

  if (x < xmin || x > xmax) return 0.;
  if (x0 < xmin || x0 > xmax) FatalError("Infinity in CFishersNCHypergeometric::probabilityRatio");
  if (dx == 0.) return 1.;
  if (dx < 0.) {
    invert = 1;
    dx = -dx;
    y = x;  x = x0;  x0 = y;}
  a1 = m - x0;  a2 = n - x0;  a3 = x;  a4 = N - m - n + x;
  if (dx <= 28 && x <= 100000) {       // avoid overflow
    // direct calculation
    f1 = f2 = 1.;
    // compute ratio of binomials
    for (y = 0; y < dx; y++) {
      f1 *= a1-- * a2--;
      f2 *= a3-- * a4--;}
    // compute odds^dx
    f3 = 1.;  f4 = odds;  y = dx;
    while (y) {
      if (f4 < 1.E-100) {
        f3 = 0.;  break;}              // avoid underflow
      if (y & 1) f3 *= f4;
      f4 *= f4;
      y = (unsigned long)(y) >> 1;}
    f1 = f3 * f1 / f2;
    if (invert) f1 = 1. / f1;}
  else {
    // use logarithms
    f1 = FallingFactorial(a1,dx) + FallingFactorial(a2,dx) -
         FallingFactorial(a3,dx) - FallingFactorial(a4,dx) +
         dx * log(odds);
    if (invert) f1 = -f1;
    f1 = exp(f1);}
  return f1;}


double CFishersNCHypergeometric::lng(long int x) {
  // natural log of proportional function
  // returns lambda = log(m!*x!/(m-x)!*m2!*x2!/(m2-x2)!*odds^x)
  long int x2 = n-x, m2 = N-m;
  if (ParametersChanged) {
    mFac = LnFac(m) + LnFac(m2);
    xLast = -99; ParametersChanged = 0;}
  if (m < FAK_LEN && m2 < FAK_LEN)  goto DEFLT;
  switch (x - xLast) {
  case 0: // x unchanged
    break;
  case 1: // x incremented. calculate from previous value
    xFac += log (double(x) * (m2-x2) / (double(x2+1)*(m-x+1)));
    break;
  case -1: // x decremented. calculate from previous value
    xFac += log (double(x2) * (m-x) / (double(x+1)*(m2-x2+1)));
    break;
  default: DEFLT: // calculate all
    xFac = LnFac(x) + LnFac(x2) + LnFac(m-x) + LnFac(m2-x2);
    }
  xLast = x;
  return mFac - xFac + x * logodds - scale;}


/***********************************************************************
     calculation methods in class CMultiFishersNCHypergeometric
***********************************************************************/
  
CMultiFishersNCHypergeometric::CMultiFishersNCHypergeometric(long int n_, long int * m_, double * odds_, int colors_, float accuracy_) {
  // constructor
  long int N1;
  int i;
  // copy parameters
  n = n_;  m = m_;  odds = odds_;  colors = colors_;  accuracy = accuracy_;
  // check if parameters are valid
  for (N=N1=0, i=0; i < colors; i++) {
    if (m[i] < 0 || odds[i] < 0) FatalError("Parameter negative in constructor for CMultiFishersNCHypergeometric");
    N += m[i];
    if (odds[i]) N1 += m[i];}
  if (N < n) FatalError("Not enough items in constructor for CMultiFishersNCHypergeometric");
  if (N1< n) FatalError("Not enough items with nonzero weight in constructor for CMultiFishersNCHypergeometric");

  // calculate mFac and logodds
  for (i=0, mFac=0.; i < colors; i++) {
    mFac += LnFac(m[i]);
    logodds[i] = log(odds[i]);}

  // initialize
  sn = 0;}


void CMultiFishersNCHypergeometric::mean(double * mu) {
  // calculates approximate mean of multivariate Fisher's noncentral
  // hypergeometric distribution. Result is returned in mu[0..colors-1].
  // The calculation is reasonably fast.
  if (colors < 3) {
    // simple cases
    if (colors == 1) mu[0] = n;
    if (colors == 2) {
      mu[0] = CFishersNCHypergeometric(n,m[0],m[0]+m[1],odds[0]/odds[1]).mean();
      mu[1] = n - mu[0];}
    return;}

  double r, r1;              // iteration variable
  double q;                  // mean of color i
  double W;                  // total weight
  int i;                     // color index
  int iter = 0;              // iteration counter

  // initial guess for r
  for (i=0, W=0.; i < colors; i++) W += m[i] * odds[i];
  r = (double)n * N / ((N-n)*W);

  // iteration loop to find r
  do {
    r1 = r;
    for (i=0, q=0.; i < colors; i++) {
      q += m[i] * r * odds[i] / (r * odds[i] + 1.);}
    r *= n * (N-q) / (q * (N-n));
    if (++iter > 100) FatalError("convergence problem in function CMultiFishersNCHypergeometric::mean");}
  while (fabs(r-r1) > 1E-5);

  // store result
  for (i=0; i < colors; i++) {
    mu[i] = m[i] * r * odds[i] / (r * odds[i] + 1.);}}


void CMultiFishersNCHypergeometric::variance(double * var) {
  // calculates approximate variance of multivariate Fisher's noncentral
  // hypergeometric distribution (accuracy is not too good).
  // Result is returned in variance[0..colors-1].
  // The calculation is reasonably fast.
  double r1, r2;
  double mu[MAXCOLORS];
  int i;
  mean(mu);
  for (i=0; i<colors; i++) {
    r1 = mu[i] * (m[i]-mu[i]);
    r2 = (n-mu[i])*(mu[i]+N-n-m[i]);
    if (r1 <= 0. || r2 <= 0.) {
      var[i] = 0.;}
    else {
      var[i] = N*r1*r2/((N-1)*(m[i]*r2+(N-m[i])*r1));}}}


double CMultiFishersNCHypergeometric::probability(long int * x) {
  // Calculate probability function.
  // Note: The first-time call takes very long time because it requires
  // a calculation of all possible x combinations with probability >
  // accuracy, which may be extreme.
  // The calculation uses logarithms to avoid overflow. 
  // (Recursive calculation may be faster, but this has not been implemented)
  if (sn == 0) SumOfAll();             // first time initialize
  return exp(lng(x)) * rsum;}          // function value


double CMultiFishersNCHypergeometric::moments(double * mean, double * variance, long int * combinations) {
  // calculates mean and variance of the Fisher's noncentral hypergeometric 
  // distribution by calculating all combinations of x-values with
  // probability > accuracy.
  // Return value = 1.
  // Returns the mean in mean[0...colors-1]
  // Returns the variance in variance[0...colors-1]

  int i;                               // color index
  if (sn == 0) {
    // first time initialization includes calculation of mean and variance
    SumOfAll();}

  // just copy results
  for (i=0; i < colors; i++) {
    mean[i] = sx[i];
    variance[i] = sxx[i];}
  if (combinations) *combinations = sn;
  return 1.;}


void CMultiFishersNCHypergeometric::SumOfAll() {
  // this function does the very time consuming job of calculating the sum
  // of the proportional function g(x) over all possible combinations of
  // the x[i] values with probability > accuracy. These combinations are 
  // generated by the recursive function loop().
  // The mean and variance are generated as by-products.

  int i;                               // color index
  long int msum;                       // sum of m[i]

  // get approximate mean
  mean(sx);
  // round mean to integers
  for (i=0, msum=0; i < colors; i++) {
    msum += xm[i] = (long int)(sx[i]+0.4999999);}
  // adjust truncated x values to make the sum = n
  msum -= n;
  for (i = 0; msum < 0; i++) {
    if (xm[i] < m[i]) {
      xm[i]++; msum++;}}
  for (i = 0; msum > 0; i++) {
    if (xm[i] > 0) {
      xm[i]--; msum--;}}

  // adjust scale factor to g(mean) to avoid overflow
  scale = 0.; scale = lng(xm);

  // initialize for recursive loops
  sn = 0;
  for (i = colors-1, msum = 0; i >= 0; i--) {
    remaining[i] = msum;  msum += m[i];}
  for (i = 0; i < colors; i++) {
    sx[i] = 0;  sxx[i] = 0;}

  // recursive loops to calculate sums of g(x) over all x combinations
  rsum = 1. / loop(n, 0);

  // calculate mean and variance
  for (i=0; i<colors; i++) {
    sxx[i] = sxx[i]*rsum - sx[i]*sx[i]*rsum*rsum;
    sx[i] = sx[i]*rsum;}}
    

double CMultiFishersNCHypergeometric::loop(long int n, int c) {
  // recursive function to loop through all combinations of x-values.
  // used by SumOfAll
  long int x, x0;                      // x of color c
  long int xmin, xmax;                 // min and max of x[c]
  double s1, s2, sum = 0.;             // sum of g(x) values
  int i;                               // loop counter

  if (c < colors-1) {
    // not the last color
    // calculate min and max of x[c] for given x[0]..x[c-1]
    xmin = n - remaining[c];  if (xmin < 0) xmin = 0;
    xmax = m[c];  if (xmax > n) xmax = n;
    x0 = xm[c];  if (x0 < xmin) x0 = xmin;  if (x0 > xmax) x0 = xmax;
    // loop for all x[c] from mean and up
    for (x = x0, s2 = 0.; x <= xmax; x++) {
      xi[c] = x;
      sum += s1 = loop(n-x, c+1); // recursive loop for remaining colors
      if (s1 < accuracy && s1 < s2) break; // stop when values become negligible
      s2 = s1;}
    // loop for all x[c] from mean and down
    for (x = x0-1; x >= xmin; x--) {
      xi[c] = x;
      sum += s1 = loop(n-x, c+1); // recursive loop for remaining colors
      if (s1 < accuracy && s1 < s2) break; // stop when values become negligible
      s2 = s1;}}
  else {
    // last color
    xi[c] = n;
    // sums and squaresums    
    s1 = exp(lng(xi));     // proportional function g(x)
    for (i = 0; i < colors; i++) { // update sums
      sx[i]  += s1 * xi[i];
      sxx[i] += s1 * xi[i] * xi[i];}
    sn++;
    sum += s1;}
  return sum;}


double CMultiFishersNCHypergeometric::lng(long int * x) {
  // natural log of proportional function g(x)
  double y = 0.;
  int i;
  for (i=0; i < colors; i++) {
    y += x[i]*logodds[i] - LnFac(x[i]) - LnFac(m[i]-x[i]);}
  return mFac + y - scale;}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -