📄 spreadsubsamplefilter.java
字号:
/**
*
* AgentAcademy - an open source Data Mining framework for
* training intelligent agents
*
* Copyright (C) 2001-2003 AA Consortium.
*
* This library is open source software; you can redistribute it
* and/or modify it under the terms of the GNU Lesser General
* Public License as published by the Free Software Foundation;
* either version 2.0 of the License, or (at your option) any later
* version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
*/
package org.agentacademy.modules.dataminer.filters;
/**
* <p>Title: The Data Miner prototype</p>
* <p>Description: A prototype for the DataMiner (DM), the Agent Academy (AA) module responsible for performing data mining on the contents of the Agent Use Repository (AUR). The extracted knowledge is to be sent back to the AUR in the form of a PMML document.</p>
* <p>Copyright: Copyright (c) 2002</p>
* <p>Company: CERTH</p>
* @author asymeon
* @version 0.3
*/
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Random;
import java.util.Vector;
import org.agentacademy.modules.dataminer.core.*;
import org.apache.log4j.Logger;
/**
* Produces a random subsample of a dataset. The original dataset must
* fit entirely in memory. This filter allows you to specify the maximum
* "spread" between the rarest and most common class. For example, you may
* specify that there be at most a 2:1 difference in class frequencies.
* When used in batch mode, subsequent batches are
* <b>not</b> resampled.
*
* Valid options are:<p>
*
* -S num <br>
* Specify the random number seed (default 1).<p>
*
* -M num <br>
* The maximum class distribution spread. <br>
* 0 = no maximum spread, 1 = uniform distribution, 10 = allow at most a
* 10:1 ratio between the classes (default 0)
* <p>
*
* -X num <br>
* The maximum count for any class value. <br>
* (default 0 = unlimited)
* <p>
*
* -W <br>
* Adjust weights so that total weight per class is maintained. Individual
* instance weighting is not preserved. (default no weights adjustment)
* <p>
*
* @author Stuart Inglis (stuart@intelligenesis.net)
* @version $Revision: 1.2 $
**/
public class SpreadSubsampleFilter extends Filter implements OptionHandler {
public static Logger log = Logger.getLogger(SpreadSubsampleFilter.class);
/** The random number generator seed */
private int m_RandomSeed = 1;
/** The maximum count of any class */
private int m_MaxCount;
/** True if the first batch has been done */
private boolean m_FirstBatchDone = false;
/** True if the first batch has been done */
private double m_DistributionSpread = 0;
/**
* True if instance weights will be adjusted to maintain
* total weight per class.
*/
private boolean m_AdjustWeights = false;
/**
* Returns true if instance weights will be adjusted to maintain
* total weight per class.
*
* @return true if instance weights will be adjusted to maintain
* total weight per class.
*/
public boolean getAdjustWeights() {
return m_AdjustWeights;
}
/**
* Sets whether the instance weights will be adjusted to maintain
* total weight per class.
*
* @param newAdjustWeights
*/
public void setAdjustWeights(boolean newAdjustWeights) {
m_AdjustWeights = newAdjustWeights;
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector newVector = new Vector(4);
newVector.addElement(new Option(
"\tSpecify the random number seed (default 1)",
"S", 1, "-S <num>"));
newVector.addElement(new Option(
"\tThe maximum class distribution spread.\n"
+"\t0 = no maximum spread, 1 = uniform distribution, 10 = allow at most\n"
+"\ta 10:1 ratio between the classes (default 0)",
"M", 1, "-M <num>"));
newVector.addElement(new Option(
"\tAdjust weights so that total weight per class is maintained.\n"
+"\tIndividual instance weighting is not preserved. (default no\n"
+"\tweights adjustment",
"W", 0, "-W"));
newVector.addElement(new Option(
"\tThe maximum count for any class value (default 0 = unlimited).\n",
"X", 0, "-X <num>"));
return newVector.elements();
}
/**
* Parses a list of options for this object. Valid options are:<p>
*
* -S num <br>
* Specify the random number seed (default 1).<p>
*
* -M num <br>
* The maximum class distribution spread. <br>
* 0 = no maximum spread, 1 = uniform distribution, 10 = allow at most a
* 10:1 ratio between the classes (default 0)
* <p>
*
* -X num <br>
* The maximum count for any class value. <br>
* (default 0 = unlimited)
* <p>
*
* -W <br>
* Adjust weights so that total weight per class is maintained. Individual
* instance weighting is not preserved. (default no weights adjustment)
* <p>
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
String seedString = Utils.getOption('S', options);
if (seedString.length() != 0) {
setRandomSeed(Integer.parseInt(seedString));
} else {
setRandomSeed(1);
}
String maxString = Utils.getOption('M', options);
if (maxString.length() != 0) {
setDistributionSpread(Double.valueOf(maxString).doubleValue());
} else {
setDistributionSpread(0);
}
String maxCount = Utils.getOption('X', options);
if (maxCount.length() != 0) {
setMaxCount(Double.valueOf(maxCount).doubleValue());
} else {
setMaxCount(0);
}
setAdjustWeights(Utils.getFlag('W', options));
if (getInputFormat() != null) {
setInputFormat(getInputFormat());
}
}
/**
* Gets the current settings of the filter.
*
* @return an array of strings suitable for passing to setOptions
*/
public String [] getOptions() {
String [] options = new String [7];
int current = 0;
options[current++] = "-M";
options[current++] = "" + getDistributionSpread();
options[current++] = "-X";
options[current++] = "" + getMaxCount();
options[current++] = "-S";
options[current++] = "" + getRandomSeed();
if (getAdjustWeights()) {
options[current++] = "-W";
}
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Sets the value for the distribution spread
*
* @param spread the new distribution spread
*/
public void setDistributionSpread(double spread) {
m_DistributionSpread = spread;
}
/**
* Gets the value for the distribution spread
*
* @return the distribution spread
*/
public double getDistributionSpread() {
return m_DistributionSpread;
}
/**
* Sets the value for the max count
*
* @param spread the new max count
*/
public void setMaxCount(double maxcount) {
m_MaxCount = (int)maxcount;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -