📄 rdg1.java
字号:
/*
* RDG1.java
* Copyright (C) 2000 Gabi Schmidberger.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
package weka.datagenerators;
import java.io.Serializable;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;
import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Utils;
/**
* Class to generate data randomly by producing a decision list.
* The decision list consists of rules.
* Instances are generated randomly one by one. If decision list fails
* to classify the current instance, a new rule according to this current
* instance is generated and added to the decision list.<p>
*
* The option -V switches on voting, which means that at the end
* of the generation all instances are
* reclassified to the class value that is supported by the most rules.<p>
*
* This data generator can generate 'boolean' attributes (= nominal with
* the values {true, false}) and numeric attributes. The rules can be
* 'A' or 'NOT A' for boolean values and 'B < random_value' or
* 'B >= random_value' for numeric values.<p>
*
* Valid options are:<p>
*
* -R num <br>
* The maximum number of attributes chosen to form a rule (default 10).<p>
*
* -M num <br>
* The minimum number of attributes chosen to form a rule (default 1).<p>
*
* -I num <br>
* The number of irrelevant attributes (default 0).<p>
*
* -N num <br>
* The number of numeric attributes (default 0).<p>
*
* -S seed <br>
* Random number seed for random function used (default 1). <p>
*
* -V <br>
* Flag to use voting. <p>
*
* Following an example of a generated dataset: <br>
*
* %<br>
* % weka.datagenerators.RDG1 -r expl -a 2 -c 3 -n 4 -N 1 -I 0 -M 2 -R 10 -S 2<br>
* %<br>
* relation expl<br>
*<br>
* attribute a0 {false,true}<br>
* attribute a1 numeric<br>
* attribute class {c0,c1,c2}<br>
*<br>
* data<br>
*<br>
* true,0.496823,c0<br>
* false,0.743158,c1<br>
* false,0.408285,c1<br>
* false,0.993687,c2<br>
* %<br>
* % Number of attributes chosen as irrelevant = 0<br>
* %<br>
* % DECISIONLIST (number of rules = 3):<br>
* % RULE 0: c0 := a1 < 0.986, a0<br>
* % RULE 1: c1 := a1 < 0.95, not(a0)<br>
* % RULE 2: c2 := not(a0), a1 >= 0.562<br>
*<p>
* @author Gabi Schmidberger (gabi@cs.waikato.ac.nz)
* @version $Revision$
**/
public class RDG1 extends Generator implements OptionHandler,
Serializable {
/*
* class to represent decisionlist
*/
private class RuleList implements Serializable {
/**@serial rule list */
private FastVector m_RuleList = null;
/**@serial class */
double m_ClassValue = 0.0;
public double getClassValue() { return m_ClassValue; }
public void setClassValue(double newClassValue) {
m_ClassValue = newClassValue;
}
private void addTest (Test newTest) {
if (m_RuleList == null)
m_RuleList = new FastVector();
m_RuleList.addElement(newTest);
}
private double classifyInstance (Instance example) throws Exception {
boolean passedAllTests = true;
for (Enumeration e = m_RuleList.elements();
passedAllTests && e.hasMoreElements(); ) {
Test test = (Test) e.nextElement();
passedAllTests = test.passesTest(example);
}
if (passedAllTests) return m_ClassValue;
else return -1.0;
}
public String toString () {
StringBuffer str = new StringBuffer();
str = str.append(" c" + (int) m_ClassValue + " := ");
Enumeration e = m_RuleList.elements();
if (e.hasMoreElements()) {
Test test = (Test) e.nextElement();
str = str.append(test.toPrologString());
}
while (e.hasMoreElements()) {
Test test = (Test) e.nextElement();
str = str.append(", " + test.toPrologString());
}
return str.toString();
}
} /*end class RuleList ******/
/**@serial maximum rule size*/
private int m_MaxRuleSize = 10;
/**@serial minimum rule size*/
private int m_MinRuleSize = 1;
/**@serial number of irrelevant attributes.*/
private int m_NumIrrelevant = 0;
/**@serial number of numeric attribute*/
private int m_NumNumeric = 0;
/**@serial random number generator seed*/
private int m_Seed = 1;
/**@serial flag that stores if voting is wished*/
private boolean m_VoteFlag = false;
/**@serial dataset format*/
private Instances m_DatasetFormat = null;
/**@serial random number generator*/
private Random m_Random = null;
/**@serial decision list */
private FastVector m_DecisionList = null;
/**@serial array defines which attributes are irrelevant, with: */
/* true = attribute is irrelevant; false = attribute is not irrelevant*/
boolean [] m_AttList_Irr;
/**@serial debug flag*/
private int m_Debug = 0;
/**
* Returns a string describing this data generator.
*
* @return a description of the data generator suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "A data generator that produces data randomly "
+ "with \'boolean\' (nominal with values {false,true}) and"
+ "numeric attributes by producing a decisionlist.";
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options
*/
public Enumeration listOptions() {
Vector newVector = new Vector(5);
newVector.addElement(new Option(
"\tmaximum size for rules (default 10) ",
"R", 1, "-R <num>"));
newVector.addElement(new Option(
"\tminimum size for rules (default 1) ",
"M", 1, "-M <num>"));
newVector.addElement(new Option(
"\tnumber of irrelevant attributes (default 0)",
"I", 1, "-I <num>"));
newVector.addElement(new Option(
"\tnumber of numeric attributes (default 0)",
"N", 1, "-N"));
newVector.addElement(new Option(
"\tseed for random function (default 1)",
"S", 1, "-S"));
newVector.addElement(new Option(
"\tswitch on voting (default is no voting)",
"V", 1, "-V"));
return newVector.elements();
}
/**
* Parses a list of options for this object. <p>
*
* For list of valid options see class description.<p>
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
boolean voting = false;
String ruleSizeString = Utils.getOption('R', options);
if (ruleSizeString.length() != 0) {
setMaxRuleSize((int)Double.valueOf(ruleSizeString).doubleValue());
} else {
setMaxRuleSize(10);
}
ruleSizeString = Utils.getOption('M', options);
if (ruleSizeString.length() != 0) {
setMinRuleSize((int)Double.valueOf(ruleSizeString).doubleValue());
} else {
setMinRuleSize(1);
}
String numIrrelevantString = Utils.getOption('I', options);
if (numIrrelevantString.length() != 0) {
setNumIrrelevant((int)Double.valueOf(numIrrelevantString).doubleValue());
} else {
setNumIrrelevant(0);
}
if ((getNumAttributes() - getNumIrrelevant()) < getMinRuleSize())
throw new Exception("Possible rule size is below minimal rule size.");
String numNumericString = Utils.getOption('N', options);
if (numNumericString.length() != 0) {
setNumNumeric((int)Double.valueOf(numNumericString).doubleValue());
} else {
setNumNumeric(0);
}
String seedString = Utils.getOption('S', options);
if (seedString.length() != 0) {
setSeed(Integer.parseInt(seedString));
} else {
setSeed(1);
}
voting = Utils.getFlag('V', options);
setVoteFlag(voting);
}
/**
* Gets the current settings of the datagenerator RDG1.
*
* @return an array of strings suitable for passing to setOptions
*/
public String [] getOptions() {
String [] options = new String [12];
int current = 0;
options[current++] = "-N"; options[current++] = "" + getNumNumeric();
options[current++] = "-I"; options[current++] = "" + getNumIrrelevant();
options[current++] = "-M"; options[current++] = "" + getMinRuleSize();
options[current++] = "-R"; options[current++] = "" + getMaxRuleSize();
options[current++] = "-S"; options[current++] = "" + getSeed();
if (getVoteFlag()) {
options[current++] = "-V";
options[current++] = "";
}
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Gets the random generator.
*
* @return the random generator
*/
public Random getRandom() {
if (m_Random == null) {
m_Random = new Random (getSeed());
}
return m_Random;
}
/**
* Sets the random generator.
*
* @param newRandom is the random generator.
*/
public void setRandom(Random newRandom) {
m_Random = newRandom;
}
/**
* Gets the maximum number of tests in rules.
*
* @return the maximum number of tests allowed in rules
*/
public int getMaxRuleSize() { return m_MaxRuleSize; }
/**
* Sets the maximum number of tests in rules.
*
* @param newMaxRuleSize new maximum number of tests allowed in rules.
*/
public void setMaxRuleSize(int newMaxRuleSize) {
m_MaxRuleSize = newMaxRuleSize;
}
/**
* Gets the minimum number of tests in rules.
*
* @return the minimum number of tests allowed in rules
*/
public int getMinRuleSize() { return m_MinRuleSize; }
/**
* Sets the minimum number of tests in rules.
*
* @param newMinRuleSize new minimum number of test in rules.
*/
public void setMinRuleSize(int newMinRuleSize) {
m_MinRuleSize = newMinRuleSize;
}
/**
* Gets the number of irrelevant attributes.
*
* @return the number of irrelevant attributes
*/
public int getNumIrrelevant() { return m_NumIrrelevant; }
/**
* Sets the number of irrelevant attributes.
*
* @param the number of irrelevant attributes.
*/
public void setNumIrrelevant(int newNumIrrelevant) {
m_NumIrrelevant = newNumIrrelevant;
}
/**
* Gets the number of numerical attributes.
*
* @return the number of numerical attributes.
*/
public int getNumNumeric() { return m_NumNumeric; }
/**
* Sets the number of numerical attributes.
*
* @param the number of numerical attributes.
*/
public void setNumNumeric(int newNumNumeric) {
m_NumNumeric = newNumNumeric;
}
/**
* Gets the vote flag.
*
* @return voting flag.
*/
public boolean getVoteFlag() { return m_VoteFlag; }
/**
* Sets the vote flag.
*
* @param newVoteFlag boolean with the new setting of the vote flag.
*/
public void setVoteFlag(boolean newVoteFlag) { m_VoteFlag = newVoteFlag; }
/**
* Gets the single mode flag.
*
* @return true if methode generateExample can be used.
*/
public boolean getSingleModeFlag() { return (getVoteFlag() == false); }
/**
* Gets the random number seed.
*
* @return the random number seed.
*/
public int getSeed() { return m_Seed; }
/**
* Sets the random number seed.
*
* @param newSeed the new random number seed.
*/
public void setSeed(int newSeed) { m_Seed = newSeed; }
/**
* Gets the dataset format.
*
* @return the dataset format.
*/
public Instances getDatasetFormat() { return m_DatasetFormat; }
/**
* Sets the dataset format.
*
* @param newDatasetFormat the new dataset format.
*/
public void setDatasetFormat(Instances newDatasetFormat) {
m_DatasetFormat = newDatasetFormat;
}
/**
* Gets the array that defines which of the attributes
* are seen to be irrelevant.
*
* @return the array that defines the irrelevant attributes
*/
public boolean [] getAttList_Irr() { return m_AttList_Irr; }
/**
* Sets the array that defines which of the attributes
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -