📄 statistics.java
字号:
* * <blockquote><code> * mean(xs) * = <big><big>Σ</big></big><sub><sub>i < xs.length</sub></sub> * xs[i] / xs.length * </code></blockquote> * * If the array is of length zero, the result is {@link Double#NaN}. * * @param xs Array of values. * @return Mean of array of values. */ public static double mean(double[] xs) { return com.aliasi.util.Math.sum(xs) / (double) xs.length; } /** * Returns the variance of the specified array of input values. * The variance of an array of values is the mean of the * squared differences between the values and the mean: * * <blockquote><code> * variance(xs) * = <big><big>Σ</big></big><sub><sub>i < xs.length</sub></sub> * (mean(xs) - xs[i])<sup><sup>2</sup></sup> / xs.length * </code></blockquote> * * If the array is of length zero, the result is {@link Double#NaN}. * * @param xs Array of values. * @return Variance of array of values. */ public static double variance(double[] xs) { return variance(xs,mean(xs)); } /** * Returns the standard deviation of the specified array of input * values. The standard deviation is just the square root of the * variance: * * <blockquote><code> * standardDeviation(xs) = variance(xs)<sup><sup>(1/2)</sup></sup> * </code></blockquote> * * If the array is of length zero, the result is {@link Double#NaN}. * * @param xs Array of values. * @return Standard deviation of array of values. */ public static double standardDeviation(double[] xs) { return Math.sqrt(variance(xs)); } /** * Returns (Pearson's) correlation coefficient between two * specified arrays. The correlation coefficient, traditionally * notated as <code><i>r</i><sup>2</sup></code>, measures the * square error between a best linear fit between the two arrays * of data. Rescaling either array by a positive constant will * not affect the result. * * <P>The square root <code><i>r</i></code> of the correlation * coefficient <code><i>r</i><sup>2</sup></code> is the variance * in the second array explained by a linear relation with the * the first array. * * <P>The definition of the correlation coefficient is: * * <blockquote><code> * correlation(xs,ys)<sup><sup>2</sup></sup> * <br>= sumSqDiff(xs,ys)<sup><sup>2</sup></sup> * <br> / (sumSqDiff(xs,xs) * sumSqDiff(xs,xs)) * </code></blockquote> * * where * * <blockquote><code> * sumSqDiff(xs,ys) * <br>= <big><big>Σ</big></big><sub><sub>i<xs.length</sub></sub> * (xs[i] - mean(xs)) * (ys[i] - mean(ys)) * </code></blockquote> * * and thus the terms in the denominator above reduce using: * * <blockquote><code> * sumSqDiffs(xs,xs) * <br>= <big><big>Σ</big></big><sub><sub>i<xs.length</sub></sub> * (xs[i] - mean(xs))<sup><sup>2</sup></sup> * </code></blockquote> * * <P>See the following for more details: * * <UL> * * <LI>Eric W. Weisstein. "Correlation Coefficient." From * MathWorld--A Wolfram Web Resource. <a * href="http://mathworld.wolfram.com/CorrelationCoefficient.html" * >http://mathworld.wolfram.com/CorrelationCoefficient.html</a> * * <LI>Wikipedia. <a * href="http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient">Pearson * product-moment correlation coefficient</a>. * * </UL> * * @param xs First array of values. * @param ys Second array of values. * @return The correlation coefficient of the two arrays. * @throws IllegalArgumentException If the arrays are not the same * length. */ public static double correlation(double[] xs, double[] ys) { if (xs.length != ys.length) { String msg = "xs and ys must be the same length." + " Found xs.length=" + xs.length + " ys.length=" + ys.length; throw new IllegalArgumentException(msg); } double meanXs = mean(xs); double meanYs = mean(ys); double ssXX = sumSquareDiffs(xs,meanXs); double ssYY = sumSquareDiffs(ys,meanYs); double ssXY = sumSquareDiffs(xs,ys,meanXs,meanYs); return Math.sqrt((ssXY*ssXY) / (ssXX * ssYY)); } /** * Returns a sample from the discrete distribution represented by the * specified cumulative probability ratios, using the specified random * number generator. * * <p>The cumulative probability ratios represent unnormalized probabilities * of generating the value of their index or less, that is, unnormalized * cumulative probabilities. For instance, consider * the cumulative probability ratios <code>{ 0.5, 0.5, 3.9, 10.1}</code>: * * <blockquote><table border='1' cellpadding="5"> * <tr><th>Outcome</th><th>Value</th><th>Unnormalized Prob</th><th>Prob</th></tr> * <tr><td>0</td> <td>0.5</td> <td>0.5</td> <td>0.5/10.1</td></tr> * <tr><td>1</td> <td>0.5</td> <td>0.0</td> <td>0.0/10.1</td></tr> * <tr><td>2</td> <td>3.9</td> <td>3.4</td> <td>3.4/10.1</td></tr> * <tr><td>3</td> <td>10.1</td> <td>6.2</td> <td>6.2/10.1</td></tr> * </table></blockquote> * * A sample is taken by generating a random number x between 0.0 and * the value of the last element (here 10.1). The value returned is * the index i such that: * * <blockquote><pre> * cumulativeProbRatios[i-1] < x <= cumulativeProbRatios[i]</pre></blockquote> * * The corresponding probabilities given the sample input are * listed in the last column in the table above. * * <p>Note that if two * elements have the same value, there is no chance of generating * the outcome with the higher index. In the example above, this * corresponds to outcome 1 having probaiblity 0.0 * * <p><b>Warning</b>: The cumulative probability ratios are required to meet * two conditions. First, all values must be finite and non-negative. Second, * the values must be non-decreasing, so that * <code>cumulativeProbRatios[i] <= cumulativeProbRatios[i+1]</code>. * If either of these conditions is not met, the result is undefined. * * @param cumulativeProbRatios Cumulative probability for outcome less than * or equal to index. * @param random Random number generator for sampling. * @return A sample from the specified distribution. */ public static int sample(double[] cumulativeProbRatios, Random random) { int low = 0; int high = cumulativeProbRatios.length - 1; double x = random.nextDouble() * cumulativeProbRatios[high]; while (low < high) { int mid = (high + low)/2; if (x > cumulativeProbRatios[mid]) { low = mid+1; } else if (high == mid) { return (x > cumulativeProbRatios[low]) ? mid : low; } else { high = mid; } } return low; } /** * Returns the log (base 2) of the probability of the specified * discrete distribution given the specified uniform Dirichlet * with concentration parameters equal to the specified value. * * <p>See {@link #dirichletLog2Prob(double[],double[])} for * more information on the Dirichlet distribution. This method * returns a result equivalent to the following (though is more * efficiently implemented): * * <blockquote><pre> * double[] alphas = new double[xs.length]; * java.util.Arrays.fill(alphas,alpha); * assert(dirichletLog2Prob(alpha,xs) == dirichletLog2Prob(alphas,xs));</pre></blockquote> * * <p>For the uniform Dirichlet, the distribution simplifies to * the following form: * * <blockquote><pre> * p(xs | Dirichlet(α)) = (1/Z(α)) <big><big><big>Π</big></big></big><sub><sub>i < k</sub></sub> xs[i]<sup>α-1</sup></pre></blockquote> * * where * * <blockquote><pre> * Z(α) = Γ(α)<sup>k</sup> / Γ(k * α)</pre></blockquote> * * <p><i>Warning:</i> The probability distribution must be proper * in the sense of having values between 0 and 1 inclusive and * summing to 1.0. This property is not checked by this method. * * @param alpha Dirichlet concentration parameter to use for each * dimension. * @param xs The distribution whose probability is returned. * @return The log (base 2) probability of the specified * distribution in the uniform Dirichlet distribution with concentration * parameters equal to <code>alpha</code>. * @throws IllegalArgumentException If the values in the distribution * are not between 0 and 1 inclusive or if the concentration parameter is * not positive and finite. */ public static double dirichletLog2Prob(double alpha, double[] xs) { verifyAlpha(alpha); verifyDistro(xs); int k = xs.length; // normalizing term double result = com.aliasi.util.Math.log2Gamma(k * alpha) - k * com.aliasi.util.Math.log2Gamma(alpha); double alphaMinus1 = alpha - 1; for (int i = 0; i < k; ++i) result += alphaMinus1 * com.aliasi.util.Math.log2(xs[i]); return result; } /** * Returns the log (base 2) of the probability of the specified * discrete distribution given the specified Dirichlet * distribution concentration parameters. A Dirichlet * distribution is a distribution over <code>k</code>-dimensional * discrete distributions. * * <p>The Dirichlet is widely used because it is the conjugate * prior for multinomials in a Bayesian setting, and thus may * be used to specify a convenient distribution over discrete * distributions.</p> * * <p>A Dirichlet distribution is specified by a dimensionality * <code>k</code> and a concentration parameter <code>α[i] > 0</code> * for each <code>i < k</code>. * The probability of the distribution <code>xs</code> * in a Dirichlet distribution of dimension <code>k</code> * and concentration parameters <code>α</code> is * given (up to a normalizing constant) by: * * <blockquote><pre> * p(xs | Dirichlet(α)) * <big>∝</big> <big><big><big>Π</big></big></big><sub><sub>i < k</sub></sub> xs[i]<sup>α[i]-1</sup></pre></blockquote> * * The full distribution is: * * <blockquote><pre> * p(xs | Dirichlet(α)) * = (1/Z(α)) * <big><big><big>Π</big></big></big><sub><sub>i < k</sub></sub> xs[i]<sup>α[i]-1</sup></pre></blockquote> * * where the normalizing constant is given by: * * <blockquote><pre> * Z(α) = <big><big>Π</big></big><sub><sub>i < k</sub></sub> Γ(α[i]) * / Γ(<big><big>Σ</big></big><sub><sub>i < k</sub></sub> α[i])</pre></blockquote> * * <p><i>Warning:</i> The probability distribution must be proper * in the sense of having values between 0 and 1 inclusive and * summing to 1.0. This property is not checked by this method. * * @param alphas The concentration parameters for the uniform Dirichlet. * @param xs The outcome distribution * @return The probability of the outcome distribution given the * Dirichlet concentratioin parameter. * @throws IllegalArgumentException If the Dirichlet parameters and * distribution are not arrays of the same length or if the distribution * parameters in xs are not between 0 and 1 inclusive, or if any of * the concentration parameters is not positive and finite. */ public static double dirichletLog2Prob(double[] alphas, double[] xs) { if (alphas.length != xs.length) { String msg = "Dirichlet prior alphas and distribution xs must be the same length." + " Found alphas.length=" + alphas.length + " xs.length=" + xs.length; throw new IllegalArgumentException(msg); } for (int i = 0; i < alphas.length; ++i) verifyAlpha(alphas[i]); verifyDistro(xs); int k = xs.length; double result = 0.0; double alphaSum = 0.0; for (int i = 0; i < alphas.length; ++i) { alphaSum += alphas[i]; result -= com.aliasi.util.Math.log2Gamma(alphas[i]); } result += com.aliasi.util.Math.log2Gamma(alphaSum); for (int i = 0; i < k; ++i) result += (alphas[i] - 1) * com.aliasi.util.Math.log2(xs[i]); return result; } static void verifyAlpha(double alpha) { if (Double.isNaN(alpha) || Double.isInfinite(alpha) || alpha <= 0.0) { String msg = "Concentration parameter must be positive and finite." + " Found alpha=" + alpha; throw new IllegalArgumentException(msg); } } static void verifyDistro(double[] xs) { for (int i = 0; i < xs.length; ++i) { if (xs[i] < 0.0 || xs[i] > 1.0 || Double.isNaN(xs[i]) || Double.isInfinite(xs[i])) { String msg = "All xs must be betwee 0.0 and 1.0 inclusive." + " Found xs[" + i + "]=" + xs[i]; throw new IllegalArgumentException(msg); } } } // = sumSquareDiffs(xs,mean,xs,mean); static double sumSquareDiffs(double[] xs, double mean) { double sum = 0.0; for (int i = 0; i < xs.length; ++i) { double diff = xs[i] - mean; sum += diff * diff; } return sum; } static double sumSquareDiffs(double[] xs, double[] ys, double meanXs, double meanYs) { double sum = 0.0; for (int i = 0; i < xs.length; ++i) sum += (xs[i] - meanXs) * (ys[i] - meanYs); return sum; } static double variance(double[] xs, double mean) { return sumSquareDiffs(xs,mean) / (double) xs.length; } static void assertNonNegative(String variableName, double value) { if (Double.isInfinite(value) || Double.isNaN(value) || value < 0.0) { String msg = "Require finite non-negative value." + " Found " + variableName + " =" + value; throw new IllegalArgumentException(msg); } } private static double csTerm(double found, double expected) { double diff = found - expected; return diff * diff / expected; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -