⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 statistics.java

📁 :<<数据挖掘--实用机器学习技术及java实现>>一书的配套源程序
💻 JAVA
字号:
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    Statistics.java *    Copyright (C) 1999 Eibe Frank * */package weka.core;/** * Class implementing some distributions, tests, etc. Most of the * code is adapted from Gary Perlman's unixstat. * * @author Eibe Frank (eibe@cs.waikato.ac.nz) * @version $Revision: 1.4 $ */public class Statistics {  /** Some constants */  private static double logSqrtPi = Math.log(Math.sqrt(Math.PI));  private static double rezSqrtPi = 1/Math.sqrt(Math.PI);  private static double bigx = 20.0;  /**   * Computes standard error for observed values of a binomial   * random variable.   *   * @param p the probability of success   * @param n the size of the sample   * @return the standard error   */  public static double binomialStandardError(double p, int n) {    if (n == 0) {      return 0;     }    return Math.sqrt((p*(1-p))/(double) n);  }  /**   * Returns chi-squared probability for given value and degrees   * of freedom. (The probability that the chi-squared variate   * will be greater than x for the given degrees of freedom.)   * Adapted from unixstat by Gary Perlman.   *   * @param x the value   * @param df the number of degrees of freedom   */  public static double chiSquaredProbability(double x, int df) {    double a, y = 0, s, e, c, z, val;    boolean even;       if (x <= 0 || df < 1)      return (1);    a = 0.5 * x;    even = (((int)(2*(df/2))) == df);    if (df > 1)      y = Math.exp(-a); //((-a < -bigx) ? 0.0 : Math.exp (-a));    s = (even ? y : (2.0 * normalProbability(-Math.sqrt (x))));    if (df > 2){      x = 0.5 * (df - 1.0);      z = (even ? 1.0 : 0.5);      if (a > bigx){	e = (even ? 0.0 : logSqrtPi);	c = Math.log (a);	while (z <= x){	  e = Math.log (z) + e;	  val = c*z-a-e;	  s += Math.exp (val); //((val < -bigx) ? 0.0 : Math.exp (val));	  z += 1.0;	}	return (s);      }else{	e = (even ? 1.0 : (rezSqrtPi / Math.sqrt (a)));	c = 0.0;	while (z <= x){	  e = e * (a / z);	  c = c + e;	  z += 1.0;	}	return (c * y + s);      }    }else{      return (s);    }  }  /**   * Critical value for given probability of F-distribution.   * Adapted from unixstat by Gary Perlman.   *   * @param p the probability   * @param df1 the first number of degrees of freedom   * @param df2 the second number of degrees of freedom   * @return the critical value for the given probability   */  public static double FCriticalValue(double p, int df1, int df2) {    double  fval;    double  maxf = 99999.0;     /* maximum possible F ratio */    double  minf = .000001;     /* minimum possible F ratio */        if (p <= 0.0 || p >= 1.0)      return (0.0);        fval = 1.0 / p; /* the smaller the p, the larger the F */        while (Math.abs (maxf - minf) > .000001) {      if (FProbability(fval, df1, df2) < p) /* F too large */	maxf = fval;      else /* F too small */	minf = fval;      fval = (maxf + minf) * 0.5;    }        return (fval);  }  /**   * Computes probability of F-ratio.   * Adapted from unixstat by Gary Perlman.   * Collected Algorithms of the CACM    * Algorithm 322   * Egon Dorrer   *   * @param F the F-ratio   * @param df1 the first number of degrees of freedom   * @param df2 the second number of degrees of freedom   * @return the probability of the F-ratio.   */  public static double FProbability(double F, int df1, int df2) {    int     i, j;    int     a, b;    double  w, y, z, d, p;    if ((Math.abs(F) < 10e-10) || df1 <= 0 || df2 <= 0)      return (1.0);    a = (df1%2 == 1) ? 1 : 2;    b = (df2%2 == 1) ? 1 : 2;    w = (F * df1) / df2;    z = 1.0 / (1.0 + w);    if (a == 1)      if (b == 1) {	p = Math.sqrt (w);	y = 1/Math.PI; /* 1 / 3.14159 */	d = y * z / p;	p = 2.0 * y * Math.atan (p);      } else {	p = Math.sqrt (w * z);	d = 0.5 * p * z / w;      } else if (b == 1) {	p = Math.sqrt (z);	d = 0.5 * z * p;	p = 1.0 - p;      } else {	d = z * z;	p = w * z;      }    y = 2.0 * w / z;    for (j = b + 2; j <= df2; j += 2) {      d *= (1.0 + a / (j - 2.0)) * z;      p = (a == 1 ? p + d * y / (j - 1.0) : (p + w) * z);    }    y = w * z;    z = 2.0 / z;    b = df2 - 2;    for (i = a + 2; i <= df1; i += 2) {      j = i + b;      d *= y * j / (i - 2.0);      p -= z * d / j;    }        // correction for approximation errors suggested in certification    if (p < 0.0)      p = 0.0;    else if (p > 1.0)      p = 1.0;    return (1.0-p);  }    /**   * Returns probability that the standardized normal variate Z (mean = 0, standard   * deviation = 1) is less than z.   * Adapted from unixstat by Gary Perlman.   *   * @param the z-value   * @return the probability of the z value according to the normal pdf   */  public static double normalProbability(double z) {    double  y, x, w;        if (z == 0.0)      x = 0.0;    else{      y = 0.5 * Math.abs (z);      if (y >= 3.0)	x = 1.0;      else if (y < 1.0){	w = y*y;	x = ((((((((0.000124818987 * w                   -0.001075204047) * w +0.005198775019) * w                   -0.019198292004) * w +0.059054035642) * w                   -0.151968751364) * w +0.319152932694) * w                   -0.531923007300) * w +0.797884560593) * y * 2.0;      }else{	y -= 2.0;	x = (((((((((((((-0.000045255659 * y                         +0.000152529290) * y -0.000019538132) * y                         -0.000676904986) * y +0.001390604284) * y                         -0.000794620820) * y -0.002034254874) * y                         +0.006549791214) * y -0.010557625006) * y                         +0.011630447319) * y -0.009279453341) * y                         +0.005353579108) * y -0.002141268741) * y                         +0.000535310849) * y +0.999936657524;      }    }      return (z > 0.0 ? ((x + 1.0) / 2.0) : ((1.0 - x) / 2.0));  }  /**   * Computes absolute size of half of a student-t confidence interval    * for given degrees of freedom, probability, and observed value.   *   * @param df the number of degrees of freedom   * @param p the probability   * @param se the observed value   * @return absolute size of half of a student-t confidence interval   */  public static double studentTConfidenceInterval(int df, double p,						  double se) {    return Math.sqrt(FCriticalValue(p, 1, df))*se;  }  /**   * Main method for testing this class.   */  public static void main(String[] ops) {    System.out.println("Binomial standard error (0.5, 100): " + 		       Statistics.binomialStandardError(0.5, 100));    System.out.println("Chi-squared probability (2.558, 10): " +		       Statistics.chiSquaredProbability(2.558, 10));    System.out.println("Normal probability (0.2): " +		       Statistics.normalProbability(0.2));    System.out.println("F critical value (0.05, 4, 5): " +		       Statistics.FCriticalValue(0.05, 4, 5));    System.out.println("F probability (5.1922, 4, 5): " +		       Statistics.FProbability(5.1922, 4, 5));    System.out.println("Student-t confidence interval (9, 0.01, 2): " +		       Statistics.studentTConfidenceInterval(9, 0.01, 2));  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -