📄 ttestimpl.java
字号:
throw new IllegalArgumentException("insufficient data for t statistic"); } return tTest(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(), sampleStats2.getVariance(), (double) sampleStats1.getN(), (double) sampleStats2.getN()); } /** * Returns the <i>observed significance level</i>, or * <i>p-value</i>, associated with a two-sample, two-tailed t-test * comparing the means of the datasets described by two StatisticalSummary * instances, under the hypothesis of equal subpopulation variances. To * perform a test without the equal variances assumption, use * {@link #tTest(StatisticalSummary, StatisticalSummary)}. * <p> * The number returned is the smallest significance level * at which one can reject the null hypothesis that the two means are * equal in favor of the two-sided alternative that they are different. * For a one-sided test, divide the returned value by 2.</p> * <p> * See {@link #homoscedasticT(double[], double[])} for the formula used to * compute the t-statistic. The sum of the sample sizes minus 2 is used as * the degrees of freedom.</p> * <p> * <strong>Usage Note:</strong><br> * The validity of the p-value depends on the assumptions of the parametric * t-test procedure, as discussed * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a> * </p><p> * <strong>Preconditions</strong>: <ul> * <li>The datasets described by the two Univariates must each contain * at least 2 observations. * </li></ul></p> * * @param sampleStats1 StatisticalSummary describing data from the first sample * @param sampleStats2 StatisticalSummary describing data from the second sample * @return p-value for t-test * @throws IllegalArgumentException if the precondition is not met * @throws MathException if an error occurs computing the p-value */ public double homoscedasticTTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2) throws IllegalArgumentException, MathException { if ((sampleStats1 == null) || (sampleStats2 == null || Math.min(sampleStats1.getN(), sampleStats2.getN()) < 2)) { throw new IllegalArgumentException("insufficient data for t statistic"); } return homoscedasticTTest(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(), sampleStats2.getVariance(), (double) sampleStats1.getN(), (double) sampleStats2.getN()); } /** * Performs a * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm"> * two-sided t-test</a> evaluating the null hypothesis that * <code>sampleStats1</code> and <code>sampleStats2</code> describe * datasets drawn from populations with the same mean, with significance * level <code>alpha</code>. This test does not assume that the * subpopulation variances are equal. To perform the test under the equal * variances assumption, use * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}. * <p> * Returns <code>true</code> iff the null hypothesis that the means are * equal can be rejected with confidence <code>1 - alpha</code>. To * perform a 1-sided test, use <code>alpha * 2</code></p> * <p> * See {@link #t(double[], double[])} for the formula used to compute the * t-statistic. Degrees of freedom are approximated using the * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm"> * Welch-Satterthwaite approximation.</a></p> * <p> * <strong>Examples:</strong><br><ol> * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at * the 95%, use * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code> * </li> * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code> * at the 99% level, first verify that the measured mean of * <code>sample 1</code> is less than the mean of <code>sample 2</code> * and then use * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code> * </li></ol></p> * <p> * <strong>Usage Note:</strong><br> * The validity of the test depends on the assumptions of the parametric * t-test procedure, as discussed * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> * here</a></p> * <p> * <strong>Preconditions</strong>: <ul> * <li>The datasets described by the two Univariates must each contain * at least 2 observations. * </li> * <li> <code> 0 < alpha < 0.5 </code> * </li></ul></p> * * @param sampleStats1 StatisticalSummary describing sample data values * @param sampleStats2 StatisticalSummary describing sample data values * @param alpha significance level of the test * @return true if the null hypothesis can be rejected with * confidence 1 - alpha * @throws IllegalArgumentException if the preconditions are not met * @throws MathException if an error occurs performing the test */ public boolean tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2, double alpha) throws IllegalArgumentException, MathException { if ((alpha <= 0) || (alpha > 0.5)) { throw new IllegalArgumentException("bad significance level: " + alpha); } return (tTest(sampleStats1, sampleStats2) < alpha); } //----------------------------------------------- Protected methods /** * Gets a DistributionFactory to use in creating TDistribution instances. * @return a distribution factory. * @deprecated inject TDistribution directly instead of using a factory. */ protected DistributionFactory getDistributionFactory() { return DistributionFactory.newInstance(); } /** * Computes approximate degrees of freedom for 2-sample t-test. * * @param v1 first sample variance * @param v2 second sample variance * @param n1 first sample n * @param n2 second sample n * @return approximate degrees of freedom */ protected double df(double v1, double v2, double n1, double n2) { return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) / ((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) / (n2 * n2 * (n2 - 1d))); } /** * Computes t test statistic for 1-sample t-test. * * @param m sample mean * @param mu constant to test against * @param v sample variance * @param n sample n * @return t test statistic */ protected double t(double m, double mu, double v, double n) { return (m - mu) / Math.sqrt(v / n); } /** * Computes t test statistic for 2-sample t-test. * <p> * Does not assume that subpopulation variances are equal.</p> * * @param m1 first sample mean * @param m2 second sample mean * @param v1 first sample variance * @param v2 second sample variance * @param n1 first sample n * @param n2 second sample n * @return t test statistic */ protected double t(double m1, double m2, double v1, double v2, double n1, double n2) { return (m1 - m2) / Math.sqrt((v1 / n1) + (v2 / n2)); } /** * Computes t test statistic for 2-sample t-test under the hypothesis * of equal subpopulation variances. * * @param m1 first sample mean * @param m2 second sample mean * @param v1 first sample variance * @param v2 second sample variance * @param n1 first sample n * @param n2 second sample n * @return t test statistic */ protected double homoscedasticT(double m1, double m2, double v1, double v2, double n1, double n2) { double pooledVariance = ((n1 - 1) * v1 + (n2 -1) * v2 ) / (n1 + n2 - 2); return (m1 - m2) / Math.sqrt(pooledVariance * (1d / n1 + 1d / n2)); } /** * Computes p-value for 2-sided, 1-sample t-test. * * @param m sample mean * @param mu constant to test against * @param v sample variance * @param n sample n * @return p-value * @throws MathException if an error occurs computing the p-value */ protected double tTest(double m, double mu, double v, double n) throws MathException { double t = Math.abs(t(m, mu, v, n)); distribution.setDegreesOfFreedom(n - 1); return 1.0 - distribution.cumulativeProbability(-t, t); } /** * Computes p-value for 2-sided, 2-sample t-test. * <p> * Does not assume subpopulation variances are equal. Degrees of freedom * are estimated from the data.</p> * * @param m1 first sample mean * @param m2 second sample mean * @param v1 first sample variance * @param v2 second sample variance * @param n1 first sample n * @param n2 second sample n * @return p-value * @throws MathException if an error occurs computing the p-value */ protected double tTest(double m1, double m2, double v1, double v2, double n1, double n2) throws MathException { double t = Math.abs(t(m1, m2, v1, v2, n1, n2)); double degreesOfFreedom = 0; degreesOfFreedom = df(v1, v2, n1, n2); distribution.setDegreesOfFreedom(degreesOfFreedom); return 1.0 - distribution.cumulativeProbability(-t, t); } /** * Computes p-value for 2-sided, 2-sample t-test, under the assumption * of equal subpopulation variances. * <p> * The sum of the sample sizes minus 2 is used as degrees of freedom.</p> * * @param m1 first sample mean * @param m2 second sample mean * @param v1 first sample variance * @param v2 second sample variance * @param n1 first sample n * @param n2 second sample n * @return p-value * @throws MathException if an error occurs computing the p-value */ protected double homoscedasticTTest(double m1, double m2, double v1, double v2, double n1, double n2) throws MathException { double t = Math.abs(homoscedasticT(m1, m2, v1, v2, n1, n2)); double degreesOfFreedom = (double) (n1 + n2 - 2); distribution.setDegreesOfFreedom(degreesOfFreedom); return 1.0 - distribution.cumulativeProbability(-t, t); } /** * Modify the distribution used to compute inference statistics. * @param value the new distribution * @since 1.2 */ public void setDistribution(TDistribution value) { distribution = value; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -