resample.java

来自「Weka」· Java 代码 · 共 669 行 · 第 1/2 页

JAVA
669
字号
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    Resample.java *    Copyright (C) 2002 University of Waikato, Hamilton, New Zealand * */package weka.filters.supervised.instance;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.filters.Filter;import weka.filters.SupervisedFilter;import java.util.Collections;import java.util.Enumeration;import java.util.Random;import java.util.Vector;/**  <!-- globalinfo-start --> * Produces a random subsample of a dataset using either sampling with replacement or without replacement.<br/> * The original dataset must fit entirely in memory. The number of instances in the generated dataset may be specified. The dataset must have a nominal class attribute. If not, use the unsupervised version. The filter can be made to maintain the class distribution in the subsample, or to bias the class distribution toward a uniform distribution. When used in batch mode (i.e. in the FilteredClassifier), subsequent batches are NOT resampled. * <p/> <!-- globalinfo-end --> *  <!-- options-start --> * Valid options are: <p/> *  * <pre> -S &lt;num&gt; *  Specify the random number seed (default 1)</pre> *  * <pre> -Z &lt;num&gt; *  The size of the output dataset, as a percentage of *  the input dataset (default 100)</pre> *  * <pre> -B &lt;num&gt; *  Bias factor towards uniform class distribution. *  0 = distribution in input data -- 1 = uniform distribution. *  (default 0)</pre> *  * <pre> -no-replacement *  Disables replacement of instances *  (default: with replacement)</pre> *  * <pre> -V *  Inverts the selection - only available with '-no-replacement'.</pre> *  <!-- options-end --> * * @author Len Trigg (len@reeltwo.com) * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 1.11 $  */public class Resample  extends Filter   implements SupervisedFilter, OptionHandler {    /** for serialization */  static final long serialVersionUID = 7079064953548300681L;  /** The subsample size, percent of original set, default 100% */  protected double m_SampleSizePercent = 100;    /** The random number generator seed */  protected int m_RandomSeed = 1;    /** The degree of bias towards uniform (nominal) class distribution */  protected double m_BiasToUniformClass = 0;  /** Whether to perform sampling with replacement or without */  protected boolean m_NoReplacement = false;  /** Whether to invert the selection (only if instances are drawn WITHOUT    * replacement)   * @see #m_NoReplacement */  protected boolean m_InvertSelection = false;  /**   * Returns a string describing this filter   *   * @return a description of the filter suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return         "Produces a random subsample of a dataset using either sampling "      + "with replacement or without replacement.\n"      + "The original dataset must "      + "fit entirely in memory. The number of instances in the generated "      + "dataset may be specified. The dataset must have a nominal class "      + "attribute. If not, use the unsupervised version. The filter can be "      + "made to maintain the class distribution in the subsample, or to bias "      + "the class distribution toward a uniform distribution. When used in batch "      + "mode (i.e. in the FilteredClassifier), subsequent batches are NOT resampled.";  }  /**   * Returns an enumeration describing the available options.   *   * @return an enumeration of all the available options.   */  public Enumeration listOptions() {    Vector result = new Vector();    result.addElement(new Option(	"\tSpecify the random number seed (default 1)",	"S", 1, "-S <num>"));    result.addElement(new Option(	"\tThe size of the output dataset, as a percentage of\n"	+"\tthe input dataset (default 100)",	"Z", 1, "-Z <num>"));    result.addElement(new Option(	"\tBias factor towards uniform class distribution.\n"	+"\t0 = distribution in input data -- 1 = uniform distribution.\n"	+"\t(default 0)",	"B", 1, "-B <num>"));    result.addElement(new Option(	"\tDisables replacement of instances\n"	+"\t(default: with replacement)",	"no-replacement", 0, "-no-replacement"));    result.addElement(new Option(	"\tInverts the selection - only available with '-no-replacement'.",	"V", 0, "-V"));    return result.elements();  }  /**   * Parses a given list of options. <p/>   *    <!-- options-start -->   * Valid options are: <p/>   *    * <pre> -S &lt;num&gt;   *  Specify the random number seed (default 1)</pre>   *    * <pre> -Z &lt;num&gt;   *  The size of the output dataset, as a percentage of   *  the input dataset (default 100)</pre>   *    * <pre> -B &lt;num&gt;   *  Bias factor towards uniform class distribution.   *  0 = distribution in input data -- 1 = uniform distribution.   *  (default 0)</pre>   *    * <pre> -no-replacement   *  Disables replacement of instances   *  (default: with replacement)</pre>   *    * <pre> -V   *  Inverts the selection - only available with '-no-replacement'.</pre>   *    <!-- options-end -->   *   * @param options the list of options as an array of strings   * @throws Exception if an option is not supported   */  public void setOptions(String[] options) throws Exception {    String	tmpStr;        tmpStr = Utils.getOption('S', options);    if (tmpStr.length() != 0)      setRandomSeed(Integer.parseInt(tmpStr));    else      setRandomSeed(1);    tmpStr = Utils.getOption('B', options);    if (tmpStr.length() != 0)      setBiasToUniformClass(Double.parseDouble(tmpStr));    else      setBiasToUniformClass(0);    tmpStr = Utils.getOption('Z', options);    if (tmpStr.length() != 0)      setSampleSizePercent(Double.parseDouble(tmpStr));    else      setSampleSizePercent(100);    setNoReplacement(Utils.getFlag("no-replacement", options));    if (getNoReplacement())      setInvertSelection(Utils.getFlag('V', options));    if (getInputFormat() != null) {      setInputFormat(getInputFormat());    }  }  /**   * Gets the current settings of the filter.   *   * @return an array of strings suitable for passing to setOptions   */  public String [] getOptions() {    Vector<String>	result;    result = new Vector<String>();    result.add("-B");    result.add("" + getBiasToUniformClass());    result.add("-S");    result.add("" + getRandomSeed());    result.add("-Z");    result.add("" + getSampleSizePercent());    if (getNoReplacement()) {      result.add("-no-replacement");      if (getInvertSelection())	result.add("-V");    }        return result.toArray(new String[result.size()]);  }      /**   * Returns the tip text for this property   *   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String biasToUniformClassTipText() {    return "Whether to use bias towards a uniform class. A value of 0 leaves the class "      + "distribution as-is, a value of 1 ensures the class distribution is "      + "uniform in the output data.";  }      /**   * Gets the bias towards a uniform class. A value of 0 leaves the class   * distribution as-is, a value of 1 ensures the class distributions are   * uniform in the output data.   *   * @return the current bias   */  public double getBiasToUniformClass() {    return m_BiasToUniformClass;  }    /**   * Sets the bias towards a uniform class. A value of 0 leaves the class   * distribution as-is, a value of 1 ensures the class distributions are   * uniform in the output data.   *   * @param newBiasToUniformClass the new bias value, between 0 and 1.   */  public void setBiasToUniformClass(double newBiasToUniformClass) {    m_BiasToUniformClass = newBiasToUniformClass;  }      /**   * Returns the tip text for this property   *   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String randomSeedTipText() {    return "Sets the random number seed for subsampling.";  }    /**   * Gets the random number seed.   *   * @return the random number seed.   */  public int getRandomSeed() {    return m_RandomSeed;  }    /**   * Sets the random number seed.   *   * @param newSeed the new random number seed.   */  public void setRandomSeed(int newSeed) {    m_RandomSeed = newSeed;  }      /**   * Returns the tip text for this property   *   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String sampleSizePercentTipText() {    return "The subsample size as a percentage of the original set.";  }    /**   * Gets the subsample size as a percentage of the original set.   *   * @return the subsample size   */  public double getSampleSizePercent() {    return m_SampleSizePercent;  }    /**   * Sets the size of the subsample, as a percentage of the original set.   *   * @param newSampleSizePercent the subsample set size, between 0 and 100.   */  public void setSampleSizePercent(double newSampleSizePercent) {    m_SampleSizePercent = newSampleSizePercent;  }    /**   * Returns the tip text for this property   * 

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?