📄 spreadsubsample.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * SpreadSubsample.java * Copyright (C) 2002 University of Waikato * */package weka.filters.supervised.instance;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.UnassignedClassException;import weka.core.UnsupportedClassTypeException;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.filters.Filter;import weka.filters.SupervisedFilter;import java.util.Enumeration;import java.util.Hashtable;import java.util.Random;import java.util.Vector;/** <!-- globalinfo-start --> * Produces a random subsample of a dataset. The original dataset must fit entirely in memory. This filter allows you to specify the maximum "spread" between the rarest and most common class. For example, you may specify that there be at most a 2:1 difference in class frequencies. When used in batch mode, subsequent batches are NOT resampled. * <p/> <!-- globalinfo-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -S <num> * Specify the random number seed (default 1)</pre> * * <pre> -M <num> * The maximum class distribution spread. * 0 = no maximum spread, 1 = uniform distribution, 10 = allow at most * a 10:1 ratio between the classes (default 0)</pre> * * <pre> -W * Adjust weights so that total weight per class is maintained. * Individual instance weighting is not preserved. (default no * weights adjustment</pre> * * <pre> -X <num> * The maximum count for any class value (default 0 = unlimited). * </pre> * <!-- options-end --> * * @author Stuart Inglis (stuart@reeltwo.com) * @version $Revision: 1.6 $ **/public class SpreadSubsample extends Filter implements SupervisedFilter, OptionHandler { /** for serialization */ static final long serialVersionUID = -3947033795243930016L; /** The random number generator seed */ private int m_RandomSeed = 1; /** The maximum count of any class */ private int m_MaxCount; /** True if the first batch has been done */ private double m_DistributionSpread = 0; /** * True if instance weights will be adjusted to maintain * total weight per class. */ private boolean m_AdjustWeights = false; /** * Returns a string describing this filter * * @return a description of the filter suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Produces a random subsample of a dataset. The original dataset must " + "fit entirely in memory. This filter allows you to specify the maximum " + "\"spread\" between the rarest and most common class. For example, you may " + "specify that there be at most a 2:1 difference in class frequencies. " + "When used in batch mode, subsequent batches are NOT resampled."; } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String adjustWeightsTipText() { return "Wether instance weights will be adjusted to maintain total weight per " + "class."; } /** * Returns true if instance weights will be adjusted to maintain * total weight per class. * * @return true if instance weights will be adjusted to maintain * total weight per class. */ public boolean getAdjustWeights() { return m_AdjustWeights; } /** * Sets whether the instance weights will be adjusted to maintain * total weight per class. * * @param newAdjustWeights whether to adjust weights */ public void setAdjustWeights(boolean newAdjustWeights) { m_AdjustWeights = newAdjustWeights; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector newVector = new Vector(4); newVector.addElement(new Option( "\tSpecify the random number seed (default 1)", "S", 1, "-S <num>")); newVector.addElement(new Option( "\tThe maximum class distribution spread.\n" +"\t0 = no maximum spread, 1 = uniform distribution, 10 = allow at most\n" +"\ta 10:1 ratio between the classes (default 0)", "M", 1, "-M <num>")); newVector.addElement(new Option( "\tAdjust weights so that total weight per class is maintained.\n" +"\tIndividual instance weighting is not preserved. (default no\n" +"\tweights adjustment", "W", 0, "-W")); newVector.addElement(new Option( "\tThe maximum count for any class value (default 0 = unlimited).\n", "X", 0, "-X <num>")); return newVector.elements(); } /** * Parses a given list of options. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -S <num> * Specify the random number seed (default 1)</pre> * * <pre> -M <num> * The maximum class distribution spread. * 0 = no maximum spread, 1 = uniform distribution, 10 = allow at most * a 10:1 ratio between the classes (default 0)</pre> * * <pre> -W * Adjust weights so that total weight per class is maintained. * Individual instance weighting is not preserved. (default no * weights adjustment</pre> * * <pre> -X <num> * The maximum count for any class value (default 0 = unlimited). * </pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String seedString = Utils.getOption('S', options); if (seedString.length() != 0) { setRandomSeed(Integer.parseInt(seedString)); } else { setRandomSeed(1); } String maxString = Utils.getOption('M', options); if (maxString.length() != 0) { setDistributionSpread(Double.valueOf(maxString).doubleValue()); } else { setDistributionSpread(0); } String maxCount = Utils.getOption('X', options); if (maxCount.length() != 0) { setMaxCount(Double.valueOf(maxCount).doubleValue()); } else { setMaxCount(0); } setAdjustWeights(Utils.getFlag('W', options)); if (getInputFormat() != null) { setInputFormat(getInputFormat()); } } /** * Gets the current settings of the filter. * * @return an array of strings suitable for passing to setOptions */ public String [] getOptions() { String [] options = new String [7]; int current = 0; options[current++] = "-M"; options[current++] = "" + getDistributionSpread(); options[current++] = "-X"; options[current++] = "" + getMaxCount(); options[current++] = "-S"; options[current++] = "" + getRandomSeed(); if (getAdjustWeights()) { options[current++] = "-W"; } while (current < options.length) { options[current++] = ""; } return options; } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String distributionSpreadTipText() { return "The maximum class distribution spread. " + "(0 = no maximum spread, 1 = uniform distribution, 10 = allow at most a " + "10:1 ratio between the classes)."; } /** * Sets the value for the distribution spread * * @param spread the new distribution spread */ public void setDistributionSpread(double spread) { m_DistributionSpread = spread; } /** * Gets the value for the distribution spread * * @return the distribution spread */ public double getDistributionSpread() { return m_DistributionSpread; } /** * Returns the tip text for this property * * @return tip text for this property suitable for
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -