📄 statistics.java

📁 Java 编写的多种数据挖掘算法包括聚类、分类、预处理等
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
package weka.core;/** * Class implementing some distributions, tests, etc. The code is mostly adapted from the CERN * Jet Java libraries: *  * Copyright 2001 University of Waikato * Copyright 1999 CERN - European Organization for Nuclear Research. * Permission to use, copy, modify, distribute and sell this software and its documentation for * any purpose is hereby granted without fee, provided that the above copyright notice appear * in all copies and that both that copyright notice and this permission notice appear in * supporting documentation.  * CERN and the University of Waikato make no representations about the suitability of this  * software for any purpose. It is provided "as is" without expressed or implied warranty. * * @author peter.gedeck@pharma.Novartis.com * @author wolfgang.hoschek@cern.ch * @author Eibe Frank (eibe@cs.waikato.ac.nz) * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) * @version $Revision: 1.8 $ */public class Statistics {  /** Some constants */  protected static final double MACHEP =  1.11022302462515654042E-16;  protected static final double MAXLOG =  7.09782712893383996732E2;  protected static final double MINLOG = -7.451332191019412076235E2;  protected static final double MAXGAM = 171.624376956302725;  protected static final double SQTPI  =  2.50662827463100050242E0;  protected static final double SQRTH  =  7.07106781186547524401E-1;  protected static final double LOGPI  =  1.14472988584940017414;    protected static final double big    =  4.503599627370496e15;  protected static final double biginv =  2.22044604925031308085e-16;  /*************************************************   *    COEFFICIENTS FOR METHOD  normalInverse()   *   *************************************************/  /* approximation for 0 <= |y - 0.5| <= 3/8 */  protected static final double P0[] = {    -5.99633501014107895267E1,    9.80010754185999661536E1,    -5.66762857469070293439E1,    1.39312609387279679503E1,    -1.23916583867381258016E0,  };  protected static final double Q0[] = {    /* 1.00000000000000000000E0,*/    1.95448858338141759834E0,    4.67627912898881538453E0,    8.63602421390890590575E1,    -2.25462687854119370527E2,    2.00260212380060660359E2,    -8.20372256168333339912E1,    1.59056225126211695515E1,    -1.18331621121330003142E0,  };    /* Approximation for interval z = sqrt(-2 log y ) between 2 and 8   * i.e., y between exp(-2) = .135 and exp(-32) = 1.27e-14.   */  protected static final double P1[] = {    4.05544892305962419923E0,    3.15251094599893866154E1,    5.71628192246421288162E1,    4.40805073893200834700E1,    1.46849561928858024014E1,    2.18663306850790267539E0,    -1.40256079171354495875E-1,    -3.50424626827848203418E-2,    -8.57456785154685413611E-4,  };  protected static final double Q1[] = {    /*  1.00000000000000000000E0,*/    1.57799883256466749731E1,    4.53907635128879210584E1,    4.13172038254672030440E1,    1.50425385692907503408E1,    2.50464946208309415979E0,    -1.42182922854787788574E-1,    -3.80806407691578277194E-2,    -9.33259480895457427372E-4,  };    /* Approximation for interval z = sqrt(-2 log y ) between 8 and 64   * i.e., y between exp(-32) = 1.27e-14 and exp(-2048) = 3.67e-890.   */  protected static final double  P2[] = {    3.23774891776946035970E0,    6.91522889068984211695E0,    3.93881025292474443415E0,    1.33303460815807542389E0,    2.01485389549179081538E-1,    1.23716634817820021358E-2,    3.01581553508235416007E-4,    2.65806974686737550832E-6,    6.23974539184983293730E-9,  };  protected static final double  Q2[] = {    /*  1.00000000000000000000E0,*/    6.02427039364742014255E0,    3.67983563856160859403E0,    1.37702099489081330271E0,    2.16236993594496635890E-1,    1.34204006088543189037E-2,    3.28014464682127739104E-4,    2.89247864745380683936E-6,    6.79019408009981274425E-9,  };    /**   * Computes standard error for observed values of a binomial   * random variable.   *   * @param p the probability of success   * @param n the size of the sample   * @return the standard error   */  public static double binomialStandardError(double p, int n) {        if (n == 0) {      return 0;     }    return Math.sqrt((p*(1-p))/(double) n);  }    /**   * Returns chi-squared probability for given value and degrees   * of freedom. (The probability that the chi-squared variate   * will be greater than x for the given degrees of freedom.)   *   * @param x the value   * @param df the number of degrees of freedom   * @return the chi-squared probability   */  public static double chiSquaredProbability(double x, double v) {     if( x < 0.0 || v < 1.0 ) return 0.0;    return incompleteGammaComplement( v/2.0, x/2.0 );  }  /**   * Computes probability of F-ratio.   *   * @param F the F-ratio   * @param df1 the first number of degrees of freedom   * @param df2 the second number of degrees of freedom   * @return the probability of the F-ratio.   */  public static double FProbability(double F, int df1, int df2) {        return incompleteBeta( df2/2.0, df1/2.0, df2/(df2+df1*F) );  }  /**   * Returns the area under the Normal (Gaussian) probability density   * function, integrated from minus infinity to <tt>x</tt>   * (assumes mean is zero, variance is one).   * <pre>   *                            x   *                             -   *                   1        | |          2   *  normal(x)  = ---------    |    exp( - t /2 ) dt   *               sqrt(2pi)  | |   *                           -   *                          -inf.   *   *             =  ( 1 + erf(z) ) / 2   *             =  erfc(z) / 2   * </pre>   * where <tt>z = x/sqrt(2)</tt>.   * Computation is via the functions <tt>errorFunction</tt> and <tt>errorFunctionComplement</tt>.   *   * @param a the z-value   * @return the probability of the z value according to the normal pdf   */  public static double normalProbability(double a) {     double x, y, z;     x = a * SQRTH;    z = Math.abs(x);     if( z < SQRTH ) y = 0.5 + 0.5 * errorFunction(x);    else {      y = 0.5 * errorFunctionComplemented(z);      if( x > 0 )  y = 1.0 - y;    }     return y;  }  /**   * Returns the value, <tt>x</tt>, for which the area under the   * Normal (Gaussian) probability density function (integrated from   * minus infinity to <tt>x</tt>) is equal to the argument <tt>y</tt>   * (assumes mean is zero, variance is one).   * <p>   * For small arguments <tt>0 < y < exp(-2)</tt>, the program computes   * <tt>z = sqrt( -2.0 * log(y) )</tt>;  then the approximation is   * <tt>x = z - log(z)/z  - (1/z) P(1/z) / Q(1/z)</tt>.   * There are two rational functions P/Q, one for <tt>0 < y < exp(-32)</tt>   * and the other for <tt>y</tt> up to <tt>exp(-2)</tt>.    * For larger arguments,   * <tt>w = y - 0.5</tt>, and  <tt>x/sqrt(2pi) = w + w**3 R(w**2)/S(w**2))</tt>.   *   * @param y0 the area under the normal pdf   * @return the z-value   */  public static double normalInverse(double y0) {     double x, y, z, y2, x0, x1;    int code;    final double s2pi = Math.sqrt(2.0*Math.PI);    if( y0 <= 0.0 ) throw new IllegalArgumentException();    if( y0 >= 1.0 ) throw new IllegalArgumentException();    code = 1;    y = y0;    if( y > (1.0 - 0.13533528323661269189) ) { /* 0.135... = exp(-2) */      y = 1.0 - y;      code = 0;    }    if( y > 0.13533528323661269189 ) {      y = y - 0.5;      y2 = y * y;      x = y + y * (y2 * polevl( y2, P0, 4)/p1evl( y2, Q0, 8 ));      x = x * s2pi;       return(x);    }    x = Math.sqrt( -2.0 * Math.log(y) );    x0 = x - Math.log(x)/x;    z = 1.0/x;    if( x < 8.0 ) /* y > exp(-32) = 1.2664165549e-14 */      x1 = z * polevl( z, P1, 8 )/p1evl( z, Q1, 8 );    else      x1 = z * polevl( z, P2, 8 )/p1evl( z, Q2, 8 );    x = x0 - x1;    if( code != 0 )      x = -x;    return( x );  }  /**   * Returns natural logarithm of gamma function.   *   * @param x the value   * @return natural logarithm of gamma function   */  public static double lnGamma(double x) {    double p, q, w, z;     double A[] = {      8.11614167470508450300E-4,      -5.95061904284301438324E-4,      7.93650340457716943945E-4,      -2.77777777730099687205E-3,      8.33333333333331927722E-2    };    double B[] = {      -1.37825152569120859100E3,      -3.88016315134637840924E4,      -3.31612992738871184744E5,      -1.16237097492762307383E6,      -1.72173700820839662146E6,      -8.53555664245765465627E5    };    double C[] = {      /* 1.00000000000000000000E0, */      -3.51815701436523470549E2,      -1.70642106651881159223E4,      -2.20528590553854454839E5,      -1.13933444367982507207E6,      -2.53252307177582951285E6,      -2.01889141433532773231E6    };     if( x < -34.0 ) {      q = -x;      w = lnGamma(q);      p = Math.floor(q);      if( p == q ) throw new ArithmeticException("lnGamma: Overflow");      z = q - p;      if( z > 0.5 ) {	p += 1.0;	z = p - q;      }      z = q * Math.sin( Math.PI * z );      if( z == 0.0 ) throw new 	ArithmeticException("lnGamma: Overflow");      z = LOGPI - Math.log( z ) - w;      return z;    }     if( x < 13.0 ) {      z = 1.0;      while( x >= 3.0 ) {	x -= 1.0;	z *= x;      }      while( x < 2.0 ) {	if( x == 0.0 ) throw new 	  ArithmeticException("lnGamma: Overflow");	z /= x;	x += 1.0;      }      if( z < 0.0 ) z = -z;      if( x == 2.0 ) return Math.log(z);      x -= 2.0;      p = x * polevl( x, B, 5 ) / p1evl( x, C, 6);      return( Math.log(z) + p );    }     if( x > 2.556348e305 ) throw new ArithmeticException("lnGamma: Overflow");     q = ( x - 0.5 ) * Math.log(x) - x + 0.91893853320467274178;      if( x > 1.0e8 ) return( q );     p = 1.0/(x*x);    if( x >= 1000.0 )      q += ((   7.9365079365079365079365e-4 * p		- 2.7777777777777777777778e-3) *p	    + 0.0833333333333333333333) / x;    else      q += polevl( p, A, 4 ) / x;    return q;  }  /**   * Returns the error function of the normal distribution.   * The integral is   * <pre>   *                           x    *                            -   *                 2         | |          2   *   erf(x)  =  --------     |    exp( - t  ) dt.   *              sqrt(pi)   | |   *                          -   *                           0   * </pre>   * <b>Implementation:</b>   * For <tt>0 <= |x| < 1, erf(x) = x * P4(x**2)/Q5(x**2)</tt>; otherwise   * <tt>erf(x) = 1 - erfc(x)</tt>.   * <p>   * Code adapted from the <A HREF="http://www.sci.usq.edu.au/staff/leighb/graph/Top.html">   * Java 2D Graph Package 2.4</A>,   * which in turn is a port from the   * <A HREF="http://people.ne.mediaone.net/moshier/index.html#Cephes">Cephes 2.2</A>   * Math Library (C).   *   * @param a the argument to the function.   */  static double errorFunction(double x) {     double y, z;    final double T[] = {      9.60497373987051638749E0,      9.00260197203842689217E1,      2.23200534594684319226E3,      7.00332514112805075473E3,      5.55923013010394962768E4    };    final double U[] = {      //1.00000000000000000000E0,      3.35617141647503099647E1,      5.21357949780152679795E2,      4.59432382970980127987E3,      2.26290000613890934246E4,      4.92673942608635921086E4    };      if( Math.abs(x) > 1.0 ) return( 1.0 - errorFunctionComplemented(x) );    z = x * x;    y = x * polevl( z, T, 4 ) / p1evl( z, U, 5 );    return y;  }  /**   * Returns the complementary Error function of the normal distribution.   * <pre>   *  1 - erf(x) =   *   *                           inf.    *                             -   *                  2         | |          2   *   erfc(x)  =  --------     |    exp( - t  ) dt   *               sqrt(pi)   | |   *                           -   *                            x   * </pre>   * <b>Implementation:</b>   * For small x, <tt>erfc(x) = 1 - erf(x)</tt>; otherwise rational   * approximations are computed.   * <p>   * Code adapted from the <A HREF="http://www.sci.usq.edu.au/staff/leighb/graph/Top.html">   * Java 2D Graph Package 2.4</A>,   * which in turn is a port from the   * <A HREF="http://people.ne.mediaone.net/moshier/index.html#Cephes">Cephes 2.2</A>   * Math Library (C).   *   * @param a the argument to the function.   */  static double errorFunctionComplemented(double a) {     double x,y,z,p,q;      double P[] = {      2.46196981473530512524E-10,      5.64189564831068821977E-1,      7.46321056442269912687E0,      4.86371970985681366614E1,      1.96520832956077098242E2,      5.26445194995477358631E2,      9.34528527171957607540E2,      1.02755188689515710272E3,      5.57535335369399327526E2    };    double Q[] = {      //1.0      1.32281951154744992508E1,      8.67072140885989742329E1,      3.54937778887819891062E2,      9.75708501743205489753E2,      1.82390916687909736289E3,      2.24633760818710981792E3,      1.65666309194161350182E3,      5.57535340817727675546E2    };      double R[] = {      5.64189583547755073984E-1,      1.27536670759978104416E0,      5.01905042251180477414E0,      6.16021097993053585195E0,      7.40974269950448939160E0,      2.97886665372100240670E0    };    double S[] = {      //1.00000000000000000000E0,       2.26052863220117276590E0,      9.39603524938001434673E0,      1.20489539808096656605E1,      1.70814450747565897222E1,      9.60896809063285878198E0,      3.36907645100081516050E0    };      if( a < 0.0 )   x = -a;    else            x = a;      if( x < 1.0 )   return 1.0 - errorFunction(a);      z = -a * a;      if( z < -MAXLOG ) {      if( a < 0 )  return( 2.0 );      else         return( 0.0 );    }      z = Math.exp(z);      if( x < 8.0 ) {      p = polevl( x, P, 8 );      q = p1evl( x, Q, 8 );    } else {      p = polevl( x, R, 5 );      q = p1evl( x, S, 6 );    }      y = (z * p)/q;      if( a < 0 ) y = 2.0 - y;      if( y == 0.0 ) {      if( a < 0 ) return 2.0;      else        return( 0.0 );    }    return y;  }    /**   * Evaluates the given polynomial of degree <tt>N</tt> at <tt>x</tt>.   * Evaluates polynomial when coefficient of N is 1.0.   * Otherwise same as <tt>polevl()</tt>.   * <pre>   *                     2          N   * y  =  C  + C x + C x  +...+ C x   *        0    1     2          N   *   * Coefficients are stored in reverse order:   *   * coef[0] = C  , ..., coef[N] = C  .   *            N                   0   * </pre>   * The function <tt>p1evl()</tt> assumes that <tt>coef[N] = 1.0</tt> and is   * omitted from the array.  Its calling arguments are   * otherwise the same as <tt>polevl()</tt>.   * <p>   * In the interest of speed, there are no checks for out of bounds arithmetic.   *   * @param x argument to the polynomial.   * @param coef the coefficients of the polynomial.   * @param N the degree of the polynomial.   */  static double p1evl( double x, double coef[], int N ) {      double ans;    ans = x + coef[0];      for(int i=1; i<N; i++) ans = ans*x+coef[i];      return ans;  }  /**   * Evaluates the given polynomial of degree <tt>N</tt> at <tt>x</tt>.   * <pre>   *                     2          N   * y  =  C  + C x + C x  +...+ C x   *        0    1     2          N   *   * Coefficients are stored in reverse order:   *   * coef[0] = C  , ..., coef[N] = C  .   *            N                   0   * </pre>   * In the interest of speed, there are no checks for out of bounds arithmetic.
12 下一页
💿 文件大小 3872 K
👤 上传用户 fengkuangyidao
📂 所属分类 Applet
🏷️ 相关标签

#Java #编写 #数据挖掘算法 #分类
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -