📄 descriptivestatistics.java
字号:
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */package org.apache.commons.math.stat.descriptive;import java.io.Serializable;import java.lang.reflect.InvocationTargetException;import java.util.Arrays;import org.apache.commons.discovery.tools.DiscoverClass;import org.apache.commons.math.stat.descriptive.moment.GeometricMean;import org.apache.commons.math.stat.descriptive.moment.Kurtosis;import org.apache.commons.math.stat.descriptive.moment.Mean;import org.apache.commons.math.stat.descriptive.moment.Skewness;import org.apache.commons.math.stat.descriptive.moment.Variance;import org.apache.commons.math.stat.descriptive.rank.Max;import org.apache.commons.math.stat.descriptive.rank.Min;import org.apache.commons.math.stat.descriptive.rank.Percentile;import org.apache.commons.math.stat.descriptive.summary.Sum;import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;import org.apache.commons.math.util.ResizableDoubleArray;/** * Maintains a dataset of values of a single variable and computes descriptive * statistics based on stored data. The {@link #getWindowSize() windowSize} * property sets a limit on the number of values that can be stored in the * dataset. The default value, INFINITE_WINDOW, puts no limit on the size of * the dataset. This value should be used with caution, as the backing store * will grow without bound in this case. For very large datasets, * {@link SummaryStatistics}, which does not store the dataset, should be used * instead of this class. If <code>windowSize</code> is not INFINITE_WINDOW and * more values are added than can be stored in the dataset, new values are * added in a "rolling" manner, with new values replacing the "oldest" values * in the dataset. * * <p>Note: this class is not threadsafe. Use * {@link SynchronizedDescriptiveStatistics} if concurrent access from multiple * threads is required.</p> * * @version $Revision: 620318 $ $Date: 2008-02-10 13:17:24 -0700 (Sun, 10 Feb 2008) $ */public class DescriptiveStatistics implements StatisticalSummary, Serializable { /** Serialization UID */ private static final long serialVersionUID = -2734185686570407433L; /** hold the window size **/ protected int windowSize = INFINITE_WINDOW; /** * Stored data values */ protected ResizableDoubleArray eDA = new ResizableDoubleArray(); /** Mean statistic implementation - can be reset by setter. */ private UnivariateStatistic meanImpl = new Mean(); /** Geometric mean statistic implementation - can be reset by setter. */ private UnivariateStatistic geometricMeanImpl = new GeometricMean(); /** Kurtosis statistic implementation - can be reset by setter. */ private UnivariateStatistic kurtosisImpl = new Kurtosis(); /** Maximum statistic implementation - can be reset by setter. */ private UnivariateStatistic maxImpl = new Max(); /** Minimum statistic implementation - can be reset by setter. */ private UnivariateStatistic minImpl = new Min(); /** Percentile statistic implementation - can be reset by setter. */ private UnivariateStatistic percentileImpl = new Percentile(); /** Skewness statistic implementation - can be reset by setter. */ private UnivariateStatistic skewnessImpl = new Skewness(); /** Variance statistic implementation - can be reset by setter. */ private UnivariateStatistic varianceImpl = new Variance(); /** Sum of squares statistic implementation - can be reset by setter. */ private UnivariateStatistic sumsqImpl = new SumOfSquares(); /** Sum statistic implementation - can be reset by setter. */ private UnivariateStatistic sumImpl = new Sum(); /** * Construct a DescriptiveStatistics instance with an infinite window */ public DescriptiveStatistics() { } /** * Construct a DescriptiveStatistics instance with the specified window * * @param window the window size. */ public DescriptiveStatistics(int window) { super(); setWindowSize(window); } /** * Create an instance of a <code>DescriptiveStatistics</code> * @param cls the type of <code>DescriptiveStatistics</code> object to * create. * @return a new instance. * @throws InstantiationException is thrown if the object can not be * created. * @throws IllegalAccessException is thrown if the type's default * constructor is not accessible. * @deprecated to be removed in commons-math 2.0 */ public static DescriptiveStatistics newInstance(Class cls) throws InstantiationException, IllegalAccessException { return (DescriptiveStatistics)cls.newInstance(); } /** * Create an instance of a <code>DescriptiveStatistics</code> * @return a new DescriptiveStatistics instance. * @deprecated to be removed in commons-math 2.0 */ public static DescriptiveStatistics newInstance() { DescriptiveStatistics factory = null; try { DiscoverClass dc = new DiscoverClass(); factory = (DescriptiveStatistics) dc.newInstance( DescriptiveStatistics.class, "org.apache.commons.math.stat.descriptive.DescriptiveStatisticsImpl"); } catch(Throwable t) { return new DescriptiveStatisticsImpl(); } return factory; } /** * Represents an infinite window size. When the {@link #getWindowSize()} * returns this value, there is no limit to the number of data values * that can be stored in the dataset. */ public static final int INFINITE_WINDOW = -1; /** * Adds the value to the dataset. If the dataset is at the maximum size * (i.e., the number of stored elements equals the currently configured * windowSize), the first (oldest) element in the dataset is discarded * to make room for the new value. * * @param v the value to be added */ public void addValue(double v) { if (windowSize != INFINITE_WINDOW) { if (getN() == windowSize) { eDA.addElementRolling(v); } else if (getN() < windowSize) { eDA.addElement(v); } } else { eDA.addElement(v); } } /** * Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm"> * arithmetic mean </a> of the available values * @return The mean or Double.NaN if no values have been added. */ public double getMean() { return apply(meanImpl); } /** * Returns the <a href="http://www.xycoon.com/geometric_mean.htm"> * geometric mean </a> of the available values * @return The geometricMean, Double.NaN if no values have been added, * or if the productof the available values is less than or equal to 0. */ public double getGeometricMean() { return apply(geometricMeanImpl); } /** * Returns the variance of the available values. * @return The variance, Double.NaN if no values have been added * or 0.0 for a single value set. */ public double getVariance() { return apply(varianceImpl); } /** * Returns the standard deviation of the available values. * @return The standard deviation, Double.NaN if no values have been added * or 0.0 for a single value set. */ public double getStandardDeviation() { double stdDev = Double.NaN; if (getN() > 0) { if (getN() > 1) { stdDev = Math.sqrt(getVariance()); } else { stdDev = 0.0; } } return (stdDev); } /** * Returns the skewness of the available values. Skewness is a * measure of the asymmetry of a given distribution. * @return The skewness, Double.NaN if no values have been added * or 0.0 for a value set <=2. */ public double getSkewness() { return apply(skewnessImpl); } /** * Returns the Kurtosis of the available values. Kurtosis is a * measure of the "peakedness" of a distribution * @return The kurtosis, Double.NaN if no values have been added, or 0.0 * for a value set <=3. */ public double getKurtosis() { return apply(kurtosisImpl); } /** * Returns the maximum of the available values * @return The max or Double.NaN if no values have been added. */ public double getMax() { return apply(maxImpl); } /** * Returns the minimum of the available values * @return The min or Double.NaN if no values have been added. */ public double getMin() { return apply(minImpl); } /** * Returns the number of available values * @return The number of available values */ public long getN() { return eDA.getNumElements(); } /** * Returns the sum of the values that have been added to Univariate. * @return The sum or Double.NaN if no values have been added */ public double getSum() { return apply(sumImpl); } /** * Returns the sum of the squares of the available values. * @return The sum of the squares or Double.NaN if no * values have been added. */ public double getSumsq() { return apply(sumsqImpl); } /** * Resets all statistics and storage */ public void clear() { eDA.clear(); } /** * Returns the maximum number of values that can be stored in the * dataset, or INFINITE_WINDOW (-1) if there is no limit. * * @return The current window size or -1 if its Infinite. */ public int getWindowSize() { return windowSize; } /** * WindowSize controls the number of values which contribute * to the reported statistics. For example, if * windowSize is set to 3 and the values {1,2,3,4,5} * have been added <strong> in that order</strong> * then the <i>available values</i> are {3,4,5} and all * reported statistics will be based on these values * @param windowSize sets the size of the window. */ public void setWindowSize(int windowSize) { if (windowSize < 1) { if (windowSize != INFINITE_WINDOW) { throw new IllegalArgumentException("window size must be positive."); } } this.windowSize = windowSize; // We need to check to see if we need to discard elements // from the front of the array. If the windowSize is less than // the current number of elements. if (windowSize != INFINITE_WINDOW && windowSize < eDA.getNumElements()) { eDA.discardFrontElements(eDA.getNumElements() - windowSize); } } /** * Returns the current set of values in an array of double primitives. * The order of addition is preserved. The returned array is a fresh * copy of the underlying data -- i.e., it is not a reference to the * stored data. * * @return returns the current set of numbers in the order in which they * were added to this set */ public double[] getValues() { double[] copiedArray = new double[eDA.getNumElements()];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -