⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 spreadsubsample.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    SpreadSubsample.java
 *    Copyright (C) 2002 University of Waikato
 *
 */


package weka.filters.supervised.instance;

import weka.filters.*;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Random;
import java.util.Vector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Utils;
import weka.core.UnsupportedClassTypeException;

/** 
 * Produces a random subsample of a dataset. The original dataset must
 * fit entirely in memory. This filter allows you to specify the maximum
 * "spread" between the rarest and most common class. For example, you may
 * specify that there be at most a 2:1 difference in class frequencies.
 * When used in batch mode, subsequent batches are
 * <b>not</b> resampled.
 *
 * Valid options are:<p>
 *
 * -S num <br>
 * Specify the random number seed (default 1).<p>
 *
 * -M num <br>
 *  The maximum class distribution spread. <br>
 *  0 = no maximum spread, 1 = uniform distribution, 10 = allow at most a
 *  10:1 ratio between the classes (default 0)
 *  <p>
 *
 * -X num <br>
 *  The maximum count for any class value. <br>
 *  (default 0 = unlimited)
 *  <p>
 *
 * -W <br>
 *  Adjust weights so that total weight per class is maintained. Individual
 *  instance weighting is not preserved. (default no weights adjustment)
 *  <p>
 *
 * @author Stuart Inglis (stuart@reeltwo.com)
 * @version $Revision$ 
 **/
public class SpreadSubsample extends Filter implements SupervisedFilter,
						       OptionHandler {

  /** The random number generator seed */
  private int m_RandomSeed = 1;

  /** The maximum count of any class */
  private int m_MaxCount;
  
  /** True if the first batch has been done */
  private boolean m_FirstBatchDone = false;

  /** True if the first batch has been done */
  private double m_DistributionSpread = 0;

  /**
   * True if instance weights will be adjusted to maintain
   * total weight per class.
   */
  private boolean m_AdjustWeights = false;

  /**
   * Returns a string describing this filter
   *
   * @return a description of the filter suitable for
   * displaying in the explorer/experimenter gui
   */
  public String globalInfo() {

    return "Produces a random subsample of a dataset. The original dataset must "
      + "fit entirely in memory. This filter allows you to specify the maximum "
      + "\"spread\" between the rarest and most common class. For example, you may "
      + "specify that there be at most a 2:1 difference in class frequencies. "
      + "When used in batch mode, subsequent batches are NOT resampled.";

  }
    
  /**
   * Returns the tip text for this property
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String adjustWeightsTipText() {
    return "Wether instance weights will be adjusted to maintain total weight per "
      + "class.";
  }
  
  /**
   * Returns true if instance  weights will be adjusted to maintain
   * total weight per class.
   *
   * @return true if instance weights will be adjusted to maintain
   * total weight per class.
   */
  public boolean getAdjustWeights() {

    return m_AdjustWeights;
  }
  
  /**
   * Sets whether the instance weights will be adjusted to maintain
   * total weight per class.
   *
   * @param newAdjustWeights
   */
  public void setAdjustWeights(boolean newAdjustWeights) {

    m_AdjustWeights = newAdjustWeights;
  }
  
  /**
   * Returns an enumeration describing the available options.
   *
   * @return an enumeration of all the available options.
   */
  public Enumeration listOptions() {

    Vector newVector = new Vector(4);

    newVector.addElement(new Option(
              "\tSpecify the random number seed (default 1)",
              "S", 1, "-S <num>"));
    newVector.addElement(new Option(
              "\tThe maximum class distribution spread.\n"
              +"\t0 = no maximum spread, 1 = uniform distribution, 10 = allow at most\n"
	      +"\ta 10:1 ratio between the classes (default 0)",
              "M", 1, "-M <num>"));
    newVector.addElement(new Option(
              "\tAdjust weights so that total weight per class is maintained.\n"
              +"\tIndividual instance weighting is not preserved. (default no\n"
              +"\tweights adjustment",
              "W", 0, "-W"));
    newVector.addElement(new Option(
	      "\tThe maximum count for any class value (default 0 = unlimited).\n",
              "X", 0, "-X <num>"));

    return newVector.elements();
  }


  /**
   * Parses a list of options for this object. Valid options are:<p>
   *
   * -S num <br>
   * Specify the random number seed (default 1).<p>
   *
   * -M num <br>
   *  The maximum class distribution spread. <br>
   *  0 = no maximum spread, 1 = uniform distribution, 10 = allow at most a
   *  10:1 ratio between the classes (default 0)
   *  <p>
   *
   * -X num <br>
   *  The maximum count for any class value. <br>
   *  (default 0 = unlimited)
   *  <p>
   *
   * -W <br>
   *  Adjust weights so that total weight per class is maintained. Individual
   *  instance weighting is not preserved. (default no weights adjustment)
   *  <p>
   *
   * @param options the list of options as an array of strings
   * @exception Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {
    
    String seedString = Utils.getOption('S', options);
    if (seedString.length() != 0) {
      setRandomSeed(Integer.parseInt(seedString));
    } else {
      setRandomSeed(1);
    }

    String maxString = Utils.getOption('M', options);
    if (maxString.length() != 0) {
      setDistributionSpread(Double.valueOf(maxString).doubleValue());
    } else {
      setDistributionSpread(0);
    }

    String maxCount = Utils.getOption('X', options);
    if (maxCount.length() != 0) {
      setMaxCount(Double.valueOf(maxCount).doubleValue());
    } else {
      setMaxCount(0);
    }

    setAdjustWeights(Utils.getFlag('W', options));

    if (getInputFormat() != null) {
      setInputFormat(getInputFormat());
    }
  }

  /**
   * Gets the current settings of the filter.
   *
   * @return an array of strings suitable for passing to setOptions
   */
  public String [] getOptions() {

    String [] options = new String [7];
    int current = 0;

    options[current++] = "-M"; 
    options[current++] = "" + getDistributionSpread();

    options[current++] = "-X"; 
    options[current++] = "" + getMaxCount();

    options[current++] = "-S"; 
    options[current++] = "" + getRandomSeed();

    if (getAdjustWeights()) {
      options[current++] = "-W";
    }

    while (current < options.length) {
      options[current++] = "";
    }
    return options;
  }
    
  /**
   * Returns the tip text for this property
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String distributionSpreadTipText() {
    return "The maximum class distribution spread. "
      + "(0 = no maximum spread, 1 = uniform distribution, 10 = allow at most a "
      + "10:1 ratio between the classes).";
  }
  
  /**
   * Sets the value for the distribution spread
   *
   * @param spread the new distribution spread
   */
  public void setDistributionSpread(double spread) {

    m_DistributionSpread = spread;
  }

  /**
   * Gets the value for the distribution spread
   *
   * @return the distribution spread
   */    
  public double getDistributionSpread() {

    return m_DistributionSpread;
  }
    
  /**
   * Returns the tip text for this property
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String maxCountTipText() {
    return "The maximum count for any class value (0 = unlimited).";

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -