⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 simpleregression.java

📁 Apache的common math数学软件包
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements.  See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License.  You may obtain a copy of the License at * *      http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */package org.apache.commons.math.stat.regression;import java.io.Serializable;import org.apache.commons.math.MathException;import org.apache.commons.math.distribution.TDistribution;import org.apache.commons.math.distribution.TDistributionImpl;/** * Estimates an ordinary least squares regression model * with one independent variable. * <p> * <code> y = intercept + slope * x  </code></p> * <p> * Standard errors for <code>intercept</code> and <code>slope</code> are  * available as well as ANOVA, r-square and Pearson's r statistics.</p> * <p> * Observations (x,y pairs) can be added to the model one at a time or they  * can be provided in a 2-dimensional array.  The observations are not stored * in memory, so there is no limit to the number of observations that can be * added to the model.</p>  * <p> * <strong>Usage Notes</strong>: <ul> * <li> When there are fewer than two observations in the model, or when * there is no variation in the x values (i.e. all x values are the same)  * all statistics return <code>NaN</code>. At least two observations with * different x coordinates are requred to estimate a bivariate regression  * model. * </li> * <li> getters for the statistics always compute values based on the current * set of observations -- i.e., you can get statistics, then add more data * and get updated statistics without using a new instance.  There is no  * "compute" method that updates all statistics.  Each of the getters performs * the necessary computations to return the requested statistic.</li> * </ul></p> * * @version $Revision: 617953 $ $Date: 2008-02-02 22:54:00 -0700 (Sat, 02 Feb 2008) $ */public class SimpleRegression implements Serializable {    /** Serializable version identifier */    private static final long serialVersionUID = -3004689053607543335L;    /** the distribution used to compute inference statistics. */    private TDistribution distribution;        /** sum of x values */    private double sumX = 0d;    /** total variation in x (sum of squared deviations from xbar) */    private double sumXX = 0d;    /** sum of y values */    private double sumY = 0d;    /** total variation in y (sum of squared deviations from ybar) */    private double sumYY = 0d;    /** sum of products */    private double sumXY = 0d;    /** number of observations */    private long n = 0;    /** mean of accumulated x values, used in updating formulas */    private double xbar = 0;    /** mean of accumulated y values, used in updating formulas */    private double ybar = 0;    // ---------------------Public methods--------------------------------------    /**     * Create an empty SimpleRegression instance     */    public SimpleRegression() {        this(new TDistributionImpl(1.0));    }        /**     * Create an empty SimpleRegression using the given distribution object to     * compute inference statistics.     * @param t the distribution used to compute inference statistics.     * @since 1.2     */    public SimpleRegression(TDistribution t) {        super();        setDistribution(t);    }        /**     * Adds the observation (x,y) to the regression data set.     * <p>     * Uses updating formulas for means and sums of squares defined in      * "Algorithms for Computing the Sample Variance: Analysis and     * Recommendations", Chan, T.F., Golub, G.H., and LeVeque, R.J.      * 1983, American Statistician, vol. 37, pp. 242-247, referenced in     * Weisberg, S. "Applied Linear Regression". 2nd Ed. 1985.</p>     *     *     * @param x independent variable value     * @param y dependent variable value     */    public void addData(double x, double y) {        if (n == 0) {            xbar = x;            ybar = y;        } else {            double dx = x - xbar;            double dy = y - ybar;            sumXX += dx * dx * (double) n / (double) (n + 1.0);            sumYY += dy * dy * (double) n / (double) (n + 1.0);            sumXY += dx * dy * (double) n / (double) (n + 1.0);            xbar += dx / (double) (n + 1.0);            ybar += dy / (double) (n + 1.0);        }        sumX += x;        sumY += y;        n++;                if (n > 2) {            distribution.setDegreesOfFreedom(n - 2);        }    }    /**     * Adds the observations represented by the elements in      * <code>data</code>.     * <p>     * <code>(data[0][0],data[0][1])</code> will be the first observation, then     * <code>(data[1][0],data[1][1])</code>, etc.</p>     * <p>      * This method does not replace data that has already been added.  The     * observations represented by <code>data</code> are added to the existing     * dataset.</p>     * <p>      * To replace all data, use <code>clear()</code> before adding the new      * data.</p>     *      * @param data array of observations to be added     */    public void addData(double[][] data) {        for (int i = 0; i < data.length; i++) {            addData(data[i][0], data[i][1]);        }    }    /**     * Clears all data from the model.     */    public void clear() {        sumX = 0d;        sumXX = 0d;        sumY = 0d;        sumYY = 0d;        sumXY = 0d;        n = 0;    }    /**     * Returns the number of observations that have been added to the model.     *     * @return n number of observations that have been added.     */    public long getN() {        return n;    }    /**     * Returns the "predicted" <code>y</code> value associated with the      * supplied <code>x</code> value,  based on the data that has been     * added to the model when this method is activated.     * <p>     * <code> predict(x) = intercept + slope * x </code></p>     * <p>     * <strong>Preconditions</strong>: <ul>     * <li>At least two observations (with at least two different x values)     * must have been added before invoking this method. If this method is      * invoked before a model can be estimated, <code>Double,NaN</code> is     * returned.     * </li></ul></p>     *     * @param x input <code>x</code> value     * @return predicted <code>y</code> value     */    public double predict(double x) {        double b1 = getSlope();        return getIntercept(b1) + b1 * x;    }    /**     * Returns the intercept of the estimated regression line.     * <p>     * The least squares estimate of the intercept is computed using the      * <a href="http://www.xycoon.com/estimation4.htm">normal equations</a>.     * The intercept is sometimes denoted b0.</p>     * <p>     * <strong>Preconditions</strong>: <ul>     * <li>At least two observations (with at least two different x values)     * must have been added before invoking this method. If this method is      * invoked before a model can be estimated, <code>Double,NaN</code> is     * returned.     * </li></ul></p>     *     * @return the intercept of the regression line     */    public double getIntercept() {        return getIntercept(getSlope());    }    /**    * Returns the slope of the estimated regression line.      * <p>    * The least squares estimate of the slope is computed using the     * <a href="http://www.xycoon.com/estimation4.htm">normal equations</a>.    * The slope is sometimes denoted b1.</p>    * <p>    * <strong>Preconditions</strong>: <ul>    * <li>At least two observations (with at least two different x values)    * must have been added before invoking this method. If this method is     * invoked before a model can be estimated, <code>Double.NaN</code> is    * returned.    * </li></ul></p>    *    * @return the slope of the regression line    */    public double getSlope() {        if (n < 2) {            return Double.NaN; //not enough data         }        if (Math.abs(sumXX) < 10 * Double.MIN_VALUE) {            return Double.NaN; //not enough variation in x        }        return sumXY / sumXX;    }    /**     * Returns the <a href="http://www.xycoon.com/SumOfSquares.htm">     * sum of squared errors</a> (SSE) associated with the regression      * model.     * <p>     * The sum is computed using the computational formula</p>     * <p>     * <code>SSE = SYY - (SXY * SXY / SXX)</code></p>     * <p>     * where <code>SYY</code> is the sum of the squared deviations of the y     * values about their mean, <code>SXX</code> is similarly defined and     * <code>SXY</code> is the sum of the products of x and y mean deviations.     * </p><p>     * The sums are accumulated using the updating algorithm referenced in      * {@link #addData}.</p>     * <p>     * The return value is constrained to be non-negative - i.e., if due to      * rounding errors the computational formula returns a negative result,      * 0 is returned.</p>     * <p>

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -