📄 rdg1.java
字号:
* are seen to be irrelevant.
*
* @param newAttList_Irr array that defines the irrelevant attributes.
*/
public void setAttList_Irr(boolean [] newAttList_Irr) {
m_AttList_Irr = newAttList_Irr;
}
/**
* Initializes the format for the dataset produced.
*
* @return the output data format
* @exception Exception data format could not be defined
*/
public Instances defineDataFormat() throws Exception {
Instances dataset;
Random random = new Random (getSeed());
setRandom(random);
m_DecisionList = new FastVector();
// number of examples is the same as given per option
setNumExamplesAct(getNumExamples());
// define dataset
dataset = defineDataset(random);
return dataset;
}
/**
* Generate an example of the dataset dataset.
* @return the instance generated
* @exception Exception if format not defined or generating <br>
* examples one by one is not possible, because voting is chosen
*/
public Instance generateExample() throws Exception {
Random random = getRandom();
Instances format = getDatasetFormat();
if (format == null) throw new Exception("Dataset format not defined.");
if (getVoteFlag()) throw new Exception("Examples cannot be generated" +
" one by one.");
// generate values for all attributes
format = generateExamples(1, random, format);
return (format.lastInstance());
}
/**
* Generate all examples of the dataset.
* @return the instance generated
* @exception Exception if format not defined or generating <br>
* examples one by one is not possible, because voting is chosen
*/
public Instances generateExamples() throws Exception {
Random random = getRandom();
Instances format = getDatasetFormat();
if (format == null) throw new Exception("Dataset format not defined.");
// generate values for all attributes
format = generateExamples(getNumExamplesAct(), random, format);
// vote all examples, and set new class value
if (getVoteFlag())
format = voteDataset(format);
return (format);
}
/**
* Generate all examples of the dataset.
* @return the instance generated
* @exception Exception if format not defined or generating <br>
* examples one by one is not possible, because voting is chosen
*/
public Instances generateExamples(int num,
Random random,
Instances format) throws Exception {
if (format == null) throw new Exception("Dataset format not defined.");
// generate values for all attributes
for (int i = 0; i < num; i++) {
// over all examples to be produced
Instance example = generateExample(random, format);
// set class of example using decision list
boolean classDefined = classifyExample(example);
if (!classDefined) {
// set class with newly generated rule
example = updateDecisionList(random, example);
}
example.setDataset(format);
format.add(example);
}
return (format);
}
/**
* Generates a new rule for the decision list.
* and classifies the new example
* @param random random number generator
* @param example example used to update decision list
*/
private Instance updateDecisionList(Random random, Instance example)
throws Exception {
FastVector TestList;
Instances format = getDatasetFormat();
if (format == null) throw new Exception("Dataset format not defined.");
TestList = generateTestList(random, example);
int maxSize = getMaxRuleSize() < TestList.size() ?
getMaxRuleSize() : TestList.size();
int ruleSize = ((int) (random.nextDouble() *
(double) (maxSize - getMinRuleSize())))
+ getMinRuleSize();
RuleList newRule = new RuleList();
for (int i=0; i < ruleSize; i++) {
int testIndex = (int) (random.nextDouble() * (double) TestList.size());
Test test = (Test) TestList.elementAt(testIndex);
newRule.addTest(test);
TestList.removeElementAt(testIndex);
// newRule.addTest((Test) TestList.elementAt(
// (int) (random.nextDouble() * (double) ruleSize)));
}
double newClassValue = 0.0;
if (m_DecisionList.size() > 0) {
RuleList r = (RuleList)(m_DecisionList.lastElement());
double oldClassValue = (double)
(r.getClassValue());
newClassValue = (double)((int)oldClassValue + 1)
% getNumClasses();
}
newRule.setClassValue(newClassValue);
m_DecisionList.addElement(newRule);
example = new Instance(example);
example.setDataset(format);
example.setClassValue(newClassValue);
return example;
}
/**
* Generates a new rule for the decision list
* and classifies the new example.
*
* @param random random number generator
* @param example
*/
private FastVector generateTestList(Random random, Instance example)
throws Exception {
Instances format = getDatasetFormat();
if (format == null) throw new Exception("Dataset format not defined.");
int numTests = getNumAttributes() - getNumIrrelevant();
FastVector TestList = new FastVector(numTests);
boolean [] irrelevant = getAttList_Irr();
for (int i = 0; i < getNumAttributes(); i++) {
if (!irrelevant[i]) {
Test newTest = null;
Attribute att = example.attribute(i);
if (att.isNumeric()) {
double newSplit = random.nextDouble();
boolean newNot = newSplit < example.value(i);
newTest = new Test(i, newSplit, format, newNot);
} else {
newTest = new Test(i, example.value(i), format, false);
}
TestList.addElement (newTest);
}
}
return TestList;
}
/**
* Generates an example with its classvalue set to missing
* and binds it to the datasets.
*
* @param random random number generator
* @param dataset dataset the example gets bind to
*/
private Instance generateExample(Random random, Instances format)
throws Exception {
double [] attributes;
Instance example;
attributes = new double[getNumAttributes() + 1];
for (int i = 0; i < getNumAttributes(); i++) {
double value = random.nextDouble();
if (format.attribute(i).isNumeric()) {
attributes[i] = value;
} else {
if (format.attribute(i).isNominal()) {
attributes[i] = (value > 0.5)? 1.0 : 0.0;
} else {
throw new Exception ("Attribute type is not supported.");
}
}
}
example = new Instance(0, attributes);
example.setDataset(format);
example.setClassMissing();
return example;
}
/**
* Tries to classify an example.
*
* @param example
*/
private boolean classifyExample(Instance example) throws Exception {
double classValue = -1.0;
for (Enumeration e = m_DecisionList.elements();
e.hasMoreElements() && classValue < 0.0;) {
RuleList rl = (RuleList) e.nextElement();
classValue = rl.classifyInstance(example);
}
if (classValue >= 0.0) {
example.setClassValue(classValue);
return true;
} else return false;
}
/**
* Classify example with maximum vote the following way.
* With every rule in the decisionlist, it is evaluated if
* the given instance could be the class of the rule.
* Finally the class value that receives the highest number of votes
* is assigned to the example.
*
* @param example example to be reclassified
* @return instance with new class value
*/
private Instance votedReclassifyExample(Instance example) throws Exception {
boolean classDefined = false;
int classVotes [] = new int [getNumClasses()];
for (int i = 0; i < classVotes.length; i++) classVotes[i] = 0;
for (Enumeration e = m_DecisionList.elements();
e.hasMoreElements();) {
RuleList rl = (RuleList) e.nextElement();
int classValue = (int) rl.classifyInstance(example);
if (classValue >= 0) classVotes[classValue]++;
}
int maxVote = 0;
int vote = -1;
for (int i = 0; i < classVotes.length; i++) {
if (classVotes[i] > maxVote) {
maxVote = classVotes[i];
vote = i;
}
}
if (vote >= 0) {
example.setClassValue((double) vote);
} else
throw new Exception ("Error in instance classification.");
return example;
}
/**
* Returns a dataset header.
* @param random random number generator
* @return dataset header
*/
private Instances defineDataset(Random random) throws Exception {
boolean [] attList_Irr;
int [] attList_Num;
FastVector attributes = new FastVector();
Attribute attribute;
FastVector nominalValues = new FastVector (2);
nominalValues.addElement("false");
nominalValues.addElement("true");
FastVector classValues = new FastVector (getNumClasses());
Instances dataset;
// set randomly those attributes that are irrelevant
attList_Irr = defineIrrelevant(random);
setAttList_Irr(attList_Irr);
// set randomly those attributes that are numeric
attList_Num = defineNumeric(random);
// define dataset
for (int i = 0; i < getNumAttributes(); i++) {
if (attList_Num[i] == Attribute.NUMERIC) {
attribute = new Attribute("a" + i);
}
else {
attribute = new Attribute("a" + i, nominalValues);
}
attributes.addElement(attribute);
}
int s = classValues.capacity();
for (int i = 0; i < classValues.capacity(); i++) {
classValues.addElement("c" + i);
}
attribute = new Attribute ("class", classValues);
attributes.addElement(attribute);
dataset = new Instances(getRelationName(), attributes,
getNumExamplesAct());
dataset.setClassIndex(getNumAttributes());
// set dataset format of this class
Instances format = new Instances(dataset, 0);
setDatasetFormat(format);
return dataset;
}
/**
* Defines randomly the attributes as irrelevant.
* Number of attributes to be set as irrelevant is either set
* with a preceeding call of setNumIrrelevant() or is per default 0.
*
* @param random
* @return list of boolean values with one value for each attribute,
* and each value set true or false according to if the corresponding
* attribute was defined irrelevant or not
*/
private boolean [] defineIrrelevant(Random random) {
boolean [] irr = new boolean [getNumAttributes()];
// initialize
for (int i = 0; i < irr.length; i++) {
irr[i] = false;
}
// set randomly
int numIrr = 0;
for (int i = 0;
(numIrr < getNumIrrelevant()) && (i < getNumAttributes() * 5);
i++) {
int maybeNext = (int) (random.nextDouble() * (double) irr.length);
if (irr[maybeNext] == false) {
irr [maybeNext] = true;
numIrr++;
}
}
return irr;
}
/**
* Chooses randomly the attributes that get datatyp numeric.
* @param random
* @return list of integer values, with one value for each attribute,
* and each value set to Attribut.NOMINAL or Attribut.NUMERIC
*/
private int [] defineNumeric(Random random) {
int [] num = new int [getNumAttributes()];
// initialize
for (int i = 0; i < num.length; i++) {
num[i] = Attribute.NOMINAL;
}
int numNum = 0;
for (int i = 0;
(numNum < getNumNumeric()) && (i < getNumAttributes() * 5); i++) {
int maybeNext = (int) (random.nextDouble() * (double) num.length);
if (num[maybeNext] != Attribute.NUMERIC) {
num[maybeNext] = Attribute.NUMERIC;
numNum++;
}
}
return num;
}
/**
* Compiles documentation about the data generation. This is the number of
* irrelevant attributes and the decisionlist with all rules.
* Considering that the decisionlist might get enhanced until
* the last instance is generated, this method should be called at the
* end of the data generation process.
*
* @return string with additional information about generated dataset
* @exception Exception no input structure has been defined
*/
public String generateFinished() throws Exception {
StringBuffer dLString = new StringBuffer();
// string for output at end of ARFF-File
boolean [] attList_Irr = getAttList_Irr();
Instances format = getDatasetFormat();
dLString.append("\n%\n% Number of attributes chosen as irrelevant = " +
getNumIrrelevant() + "\n");
for (int i = 0; i < attList_Irr.length; i++) {
if (attList_Irr[i])
dLString.append("% " + format.attribute(i).name() + "\n");
}
dLString.append("%\n% DECISIONLIST (number of rules = " +
m_DecisionList.size() + "):\n");
for (int i = 0; i < m_DecisionList.size(); i++) {
RuleList rl = (RuleList) m_DecisionList.elementAt(i);
dLString.append("% RULE " + i + ": " + rl.toString() + "\n");
}
return dLString.toString();
}
/**
* Resets the class values of all instances using voting.
* For each instance the class value that satisfies the most rules
* is choosen as new class value.
*
* @param dataset
* @return the changed instances
*/
private Instances voteDataset(Instances dataset) throws Exception {
for (int i = 0; i < dataset.numInstances(); i++) {
Instance inst = dataset.firstInstance();
inst = votedReclassifyExample(inst);
dataset.add(inst);
dataset.delete(0);
}
return dataset;
}
/**
* Main method for testing this class.
*
* @param argv should contain arguments for the data producer:
*/
public static void main(String [] argv) {
try {
Generator.makeData(new RDG1(), argv);
} catch (Exception ex) {
System.out.println(ex.getMessage());
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -