resample.java
来自「Weka」· Java 代码 · 共 669 行 · 第 1/2 页
JAVA
669 行
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * Resample.java * Copyright (C) 2002 University of Waikato, Hamilton, New Zealand * */package weka.filters.supervised.instance;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.filters.Filter;import weka.filters.SupervisedFilter;import java.util.Collections;import java.util.Enumeration;import java.util.Random;import java.util.Vector;/** <!-- globalinfo-start --> * Produces a random subsample of a dataset using either sampling with replacement or without replacement.<br/> * The original dataset must fit entirely in memory. The number of instances in the generated dataset may be specified. The dataset must have a nominal class attribute. If not, use the unsupervised version. The filter can be made to maintain the class distribution in the subsample, or to bias the class distribution toward a uniform distribution. When used in batch mode (i.e. in the FilteredClassifier), subsequent batches are NOT resampled. * <p/> <!-- globalinfo-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -S <num> * Specify the random number seed (default 1)</pre> * * <pre> -Z <num> * The size of the output dataset, as a percentage of * the input dataset (default 100)</pre> * * <pre> -B <num> * Bias factor towards uniform class distribution. * 0 = distribution in input data -- 1 = uniform distribution. * (default 0)</pre> * * <pre> -no-replacement * Disables replacement of instances * (default: with replacement)</pre> * * <pre> -V * Inverts the selection - only available with '-no-replacement'.</pre> * <!-- options-end --> * * @author Len Trigg (len@reeltwo.com) * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 1.11 $ */public class Resample extends Filter implements SupervisedFilter, OptionHandler { /** for serialization */ static final long serialVersionUID = 7079064953548300681L; /** The subsample size, percent of original set, default 100% */ protected double m_SampleSizePercent = 100; /** The random number generator seed */ protected int m_RandomSeed = 1; /** The degree of bias towards uniform (nominal) class distribution */ protected double m_BiasToUniformClass = 0; /** Whether to perform sampling with replacement or without */ protected boolean m_NoReplacement = false; /** Whether to invert the selection (only if instances are drawn WITHOUT * replacement) * @see #m_NoReplacement */ protected boolean m_InvertSelection = false; /** * Returns a string describing this filter * * @return a description of the filter suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Produces a random subsample of a dataset using either sampling " + "with replacement or without replacement.\n" + "The original dataset must " + "fit entirely in memory. The number of instances in the generated " + "dataset may be specified. The dataset must have a nominal class " + "attribute. If not, use the unsupervised version. The filter can be " + "made to maintain the class distribution in the subsample, or to bias " + "the class distribution toward a uniform distribution. When used in batch " + "mode (i.e. in the FilteredClassifier), subsequent batches are NOT resampled."; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector result = new Vector(); result.addElement(new Option( "\tSpecify the random number seed (default 1)", "S", 1, "-S <num>")); result.addElement(new Option( "\tThe size of the output dataset, as a percentage of\n" +"\tthe input dataset (default 100)", "Z", 1, "-Z <num>")); result.addElement(new Option( "\tBias factor towards uniform class distribution.\n" +"\t0 = distribution in input data -- 1 = uniform distribution.\n" +"\t(default 0)", "B", 1, "-B <num>")); result.addElement(new Option( "\tDisables replacement of instances\n" +"\t(default: with replacement)", "no-replacement", 0, "-no-replacement")); result.addElement(new Option( "\tInverts the selection - only available with '-no-replacement'.", "V", 0, "-V")); return result.elements(); } /** * Parses a given list of options. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -S <num> * Specify the random number seed (default 1)</pre> * * <pre> -Z <num> * The size of the output dataset, as a percentage of * the input dataset (default 100)</pre> * * <pre> -B <num> * Bias factor towards uniform class distribution. * 0 = distribution in input data -- 1 = uniform distribution. * (default 0)</pre> * * <pre> -no-replacement * Disables replacement of instances * (default: with replacement)</pre> * * <pre> -V * Inverts the selection - only available with '-no-replacement'.</pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String tmpStr; tmpStr = Utils.getOption('S', options); if (tmpStr.length() != 0) setRandomSeed(Integer.parseInt(tmpStr)); else setRandomSeed(1); tmpStr = Utils.getOption('B', options); if (tmpStr.length() != 0) setBiasToUniformClass(Double.parseDouble(tmpStr)); else setBiasToUniformClass(0); tmpStr = Utils.getOption('Z', options); if (tmpStr.length() != 0) setSampleSizePercent(Double.parseDouble(tmpStr)); else setSampleSizePercent(100); setNoReplacement(Utils.getFlag("no-replacement", options)); if (getNoReplacement()) setInvertSelection(Utils.getFlag('V', options)); if (getInputFormat() != null) { setInputFormat(getInputFormat()); } } /** * Gets the current settings of the filter. * * @return an array of strings suitable for passing to setOptions */ public String [] getOptions() { Vector<String> result; result = new Vector<String>(); result.add("-B"); result.add("" + getBiasToUniformClass()); result.add("-S"); result.add("" + getRandomSeed()); result.add("-Z"); result.add("" + getSampleSizePercent()); if (getNoReplacement()) { result.add("-no-replacement"); if (getInvertSelection()) result.add("-V"); } return result.toArray(new String[result.size()]); } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String biasToUniformClassTipText() { return "Whether to use bias towards a uniform class. A value of 0 leaves the class " + "distribution as-is, a value of 1 ensures the class distribution is " + "uniform in the output data."; } /** * Gets the bias towards a uniform class. A value of 0 leaves the class * distribution as-is, a value of 1 ensures the class distributions are * uniform in the output data. * * @return the current bias */ public double getBiasToUniformClass() { return m_BiasToUniformClass; } /** * Sets the bias towards a uniform class. A value of 0 leaves the class * distribution as-is, a value of 1 ensures the class distributions are * uniform in the output data. * * @param newBiasToUniformClass the new bias value, between 0 and 1. */ public void setBiasToUniformClass(double newBiasToUniformClass) { m_BiasToUniformClass = newBiasToUniformClass; } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String randomSeedTipText() { return "Sets the random number seed for subsampling."; } /** * Gets the random number seed. * * @return the random number seed. */ public int getRandomSeed() { return m_RandomSeed; } /** * Sets the random number seed. * * @param newSeed the new random number seed. */ public void setRandomSeed(int newSeed) { m_RandomSeed = newSeed; } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String sampleSizePercentTipText() { return "The subsample size as a percentage of the original set."; } /** * Gets the subsample size as a percentage of the original set. * * @return the subsample size */ public double getSampleSizePercent() { return m_SampleSizePercent; } /** * Sets the size of the subsample, as a percentage of the original set. * * @param newSampleSizePercent the subsample set size, between 0 and 100. */ public void setSampleSizePercent(double newSampleSizePercent) { m_SampleSizePercent = newSampleSizePercent; } /** * Returns the tip text for this property *
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?