📄 statistics.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * Statistics.java * Copyright (C) 1999 Eibe Frank * */package weka.core;/** * Class implementing some distributions, tests, etc. Most of the * code is adapted from Gary Perlman's unixstat. * * @author Eibe Frank (eibe@cs.waikato.ac.nz) * @version $Revision: 1.4 $ */public class Statistics { /** Some constants */ private static double logSqrtPi = Math.log(Math.sqrt(Math.PI)); private static double rezSqrtPi = 1/Math.sqrt(Math.PI); private static double bigx = 20.0; /** * Computes standard error for observed values of a binomial * random variable. * * @param p the probability of success * @param n the size of the sample * @return the standard error */ public static double binomialStandardError(double p, int n) { if (n == 0) { return 0; } return Math.sqrt((p*(1-p))/(double) n); } /** * Returns chi-squared probability for given value and degrees * of freedom. (The probability that the chi-squared variate * will be greater than x for the given degrees of freedom.) * Adapted from unixstat by Gary Perlman. * * @param x the value * @param df the number of degrees of freedom */ public static double chiSquaredProbability(double x, int df) { double a, y = 0, s, e, c, z, val; boolean even; if (x <= 0 || df < 1) return (1); a = 0.5 * x; even = (((int)(2*(df/2))) == df); if (df > 1) y = Math.exp(-a); //((-a < -bigx) ? 0.0 : Math.exp (-a)); s = (even ? y : (2.0 * normalProbability(-Math.sqrt (x)))); if (df > 2){ x = 0.5 * (df - 1.0); z = (even ? 1.0 : 0.5); if (a > bigx){ e = (even ? 0.0 : logSqrtPi); c = Math.log (a); while (z <= x){ e = Math.log (z) + e; val = c*z-a-e; s += Math.exp (val); //((val < -bigx) ? 0.0 : Math.exp (val)); z += 1.0; } return (s); }else{ e = (even ? 1.0 : (rezSqrtPi / Math.sqrt (a))); c = 0.0; while (z <= x){ e = e * (a / z); c = c + e; z += 1.0; } return (c * y + s); } }else{ return (s); } } /** * Critical value for given probability of F-distribution. * Adapted from unixstat by Gary Perlman. * * @param p the probability * @param df1 the first number of degrees of freedom * @param df2 the second number of degrees of freedom * @return the critical value for the given probability */ public static double FCriticalValue(double p, int df1, int df2) { double fval; double maxf = 99999.0; /* maximum possible F ratio */ double minf = .000001; /* minimum possible F ratio */ if (p <= 0.0 || p >= 1.0) return (0.0); fval = 1.0 / p; /* the smaller the p, the larger the F */ while (Math.abs (maxf - minf) > .000001) { if (FProbability(fval, df1, df2) < p) /* F too large */ maxf = fval; else /* F too small */ minf = fval; fval = (maxf + minf) * 0.5; } return (fval); } /** * Computes probability of F-ratio. * Adapted from unixstat by Gary Perlman. * Collected Algorithms of the CACM * Algorithm 322 * Egon Dorrer * * @param F the F-ratio * @param df1 the first number of degrees of freedom * @param df2 the second number of degrees of freedom * @return the probability of the F-ratio. */ public static double FProbability(double F, int df1, int df2) { int i, j; int a, b; double w, y, z, d, p; if ((Math.abs(F) < 10e-10) || df1 <= 0 || df2 <= 0) return (1.0); a = (df1%2 == 1) ? 1 : 2; b = (df2%2 == 1) ? 1 : 2; w = (F * df1) / df2; z = 1.0 / (1.0 + w); if (a == 1) if (b == 1) { p = Math.sqrt (w); y = 1/Math.PI; /* 1 / 3.14159 */ d = y * z / p; p = 2.0 * y * Math.atan (p); } else { p = Math.sqrt (w * z); d = 0.5 * p * z / w; } else if (b == 1) { p = Math.sqrt (z); d = 0.5 * z * p; p = 1.0 - p; } else { d = z * z; p = w * z; } y = 2.0 * w / z; for (j = b + 2; j <= df2; j += 2) { d *= (1.0 + a / (j - 2.0)) * z; p = (a == 1 ? p + d * y / (j - 1.0) : (p + w) * z); } y = w * z; z = 2.0 / z; b = df2 - 2; for (i = a + 2; i <= df1; i += 2) { j = i + b; d *= y * j / (i - 2.0); p -= z * d / j; } // correction for approximation errors suggested in certification if (p < 0.0) p = 0.0; else if (p > 1.0) p = 1.0; return (1.0-p); } /** * Returns probability that the standardized normal variate Z (mean = 0, standard * deviation = 1) is less than z. * Adapted from unixstat by Gary Perlman. * * @param the z-value * @return the probability of the z value according to the normal pdf */ public static double normalProbability(double z) { double y, x, w; if (z == 0.0) x = 0.0; else{ y = 0.5 * Math.abs (z); if (y >= 3.0) x = 1.0; else if (y < 1.0){ w = y*y; x = ((((((((0.000124818987 * w -0.001075204047) * w +0.005198775019) * w -0.019198292004) * w +0.059054035642) * w -0.151968751364) * w +0.319152932694) * w -0.531923007300) * w +0.797884560593) * y * 2.0; }else{ y -= 2.0; x = (((((((((((((-0.000045255659 * y +0.000152529290) * y -0.000019538132) * y -0.000676904986) * y +0.001390604284) * y -0.000794620820) * y -0.002034254874) * y +0.006549791214) * y -0.010557625006) * y +0.011630447319) * y -0.009279453341) * y +0.005353579108) * y -0.002141268741) * y +0.000535310849) * y +0.999936657524; } } return (z > 0.0 ? ((x + 1.0) / 2.0) : ((1.0 - x) / 2.0)); } /** * Computes absolute size of half of a student-t confidence interval * for given degrees of freedom, probability, and observed value. * * @param df the number of degrees of freedom * @param p the probability * @param se the observed value * @return absolute size of half of a student-t confidence interval */ public static double studentTConfidenceInterval(int df, double p, double se) { return Math.sqrt(FCriticalValue(p, 1, df))*se; } /** * Main method for testing this class. */ public static void main(String[] ops) { System.out.println("Binomial standard error (0.5, 100): " + Statistics.binomialStandardError(0.5, 100)); System.out.println("Chi-squared probability (2.558, 10): " + Statistics.chiSquaredProbability(2.558, 10)); System.out.println("Normal probability (0.2): " + Statistics.normalProbability(0.2)); System.out.println("F critical value (0.05, 4, 5): " + Statistics.FCriticalValue(0.05, 4, 5)); System.out.println("F probability (5.1922, 4, 5): " + Statistics.FProbability(5.1922, 4, 5)); System.out.println("Student-t confidence interval (9, 0.01, 2): " + Statistics.studentTConfidenceInterval(9, 0.01, 2)); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -