📄 rdg1.java
字号:
return format; } /** * Generate all examples of the dataset. * @param num the number of examples to generate * @param random the random number generator to use * @param format the dataset format * @return the instance generated * @throws Exception if format not defined or generating <br/> * examples one by one is not possible, because voting is chosen */ public Instances generateExamples(int num, Random random, Instances format) throws Exception { if (format == null) throw new Exception("Dataset format not defined."); // generate values for all attributes for (int i = 0; i < num; i++) { // over all examples to be produced Instance example = generateExample(random, format); // set class of example using decision list boolean classDefined = classifyExample(example); if (!classDefined) { // set class with newly generated rule example = updateDecisionList(random, example); } example.setDataset(format); format.add(example); } return (format); } /** * Generates a new rule for the decision list. * and classifies the new example * @param random random number generator * @param example example used to update decision list * @return the classified example * @throws Exception if dataset format not defined */ private Instance updateDecisionList(Random random, Instance example) throws Exception { FastVector TestList; Instances format = getDatasetFormat(); if (format == null) throw new Exception("Dataset format not defined."); TestList = generateTestList(random, example); int maxSize = getMaxRuleSize() < TestList.size() ? getMaxRuleSize() : TestList.size(); int ruleSize = ((int) (random.nextDouble() * (double) (maxSize - getMinRuleSize()))) + getMinRuleSize(); RuleList newRule = new RuleList(); for (int i=0; i < ruleSize; i++) { int testIndex = (int) (random.nextDouble() * (double) TestList.size()); Test test = (Test) TestList.elementAt(testIndex); newRule.addTest(test); TestList.removeElementAt(testIndex); } double newClassValue = 0.0; if (m_DecisionList.size() > 0) { RuleList r = (RuleList)(m_DecisionList.lastElement()); double oldClassValue = (double) (r.getClassValue()); newClassValue = (double)((int)oldClassValue + 1) % getNumClasses(); } newRule.setClassValue(newClassValue); m_DecisionList.addElement(newRule); example = (Instance)example.copy(); example.setDataset(format); example.setClassValue(newClassValue); return example; } /** * Generates a new rule for the decision list * and classifies the new example. * * @param random random number generator * @param example the instance to classify * @return a list of tests * @throws Exception if dataset format not defined */ private FastVector generateTestList(Random random, Instance example) throws Exception { Instances format = getDatasetFormat(); if (format == null) throw new Exception("Dataset format not defined."); int numTests = getNumAttributes() - getNumIrrelevant(); FastVector TestList = new FastVector(numTests); boolean[] irrelevant = getAttList_Irr(); for (int i = 0; i < getNumAttributes(); i++) { if (!irrelevant[i]) { Test newTest = null; Attribute att = example.attribute(i); if (att.isNumeric()) { double newSplit = random.nextDouble(); boolean newNot = newSplit < example.value(i); newTest = new Test(i, newSplit, format, newNot); } else { newTest = new Test(i, example.value(i), format, false); } TestList.addElement (newTest); } } return TestList; } /** * Generates an example with its classvalue set to missing * and binds it to the datasets. * * @param random random number generator * @param format dataset the example gets bind to * @return the generated example * @throws Exception if attribute type not supported */ private Instance generateExample(Random random, Instances format) throws Exception { double[] attributes; Instance example; attributes = new double[getNumAttributes() + 1]; for (int i = 0; i < getNumAttributes(); i++) { double value = random.nextDouble(); if (format.attribute(i).isNumeric()) { attributes[i] = value; } else { if (format.attribute(i).isNominal()) attributes[i] = (value > 0.5) ? 1.0 : 0.0; else throw new Exception ("Attribute type is not supported."); } } example = new Instance(0, attributes); example.setDataset(format); example.setClassMissing(); return example; } /** * Tries to classify an example. * * @param example the example to classify * @return true if it could be classified * @throws Exception if something goes wrong */ private boolean classifyExample(Instance example) throws Exception { double classValue = -1.0; for (Enumeration e = m_DecisionList.elements(); e.hasMoreElements() && classValue < 0.0;) { RuleList rl = (RuleList) e.nextElement(); classValue = rl.classifyInstance(example); } if (classValue >= 0.0) { example.setClassValue(classValue); return true; } else { return false; } } /** * Classify example with maximum vote the following way. * With every rule in the decisionlist, it is evaluated if * the given instance could be the class of the rule. * Finally the class value that receives the highest number of votes * is assigned to the example. * * @param example example to be reclassified * @return instance with new class value * @throws Exception if classification fails */ private Instance votedReclassifyExample(Instance example) throws Exception { int classVotes[] = new int [getNumClasses()]; for (int i = 0; i < classVotes.length; i++) classVotes[i] = 0; for (Enumeration e = m_DecisionList.elements(); e.hasMoreElements();) { RuleList rl = (RuleList) e.nextElement(); int classValue = (int) rl.classifyInstance(example); if (classValue >= 0) classVotes[classValue]++; } int maxVote = 0; int vote = -1; for (int i = 0; i < classVotes.length; i++) { if (classVotes[i] > maxVote) { maxVote = classVotes[i]; vote = i; } } if (vote >= 0) example.setClassValue((double) vote); else throw new Exception ("Error in instance classification."); return example; } /** * Returns a dataset header. * @param random random number generator * @return dataset header * @throws Exception if something goes wrong */ private Instances defineDataset(Random random) throws Exception { boolean[] attList_Irr; int[] attList_Num; FastVector attributes = new FastVector(); Attribute attribute; FastVector nominalValues = new FastVector (2); nominalValues.addElement("false"); nominalValues.addElement("true"); FastVector classValues = new FastVector (getNumClasses()); Instances dataset; // set randomly those attributes that are irrelevant attList_Irr = defineIrrelevant(random); setAttList_Irr(attList_Irr); // set randomly those attributes that are numeric attList_Num = defineNumeric(random); // define dataset for (int i = 0; i < getNumAttributes(); i++) { if (attList_Num[i] == Attribute.NUMERIC) attribute = new Attribute("a" + i); else attribute = new Attribute("a" + i, nominalValues); attributes.addElement(attribute); } for (int i = 0; i < classValues.capacity(); i++) classValues.addElement("c" + i); attribute = new Attribute ("class", classValues); attributes.addElement(attribute); dataset = new Instances(getRelationNameToUse(), attributes, getNumExamplesAct()); dataset.setClassIndex(getNumAttributes()); // set dataset format of this class Instances format = new Instances(dataset, 0); setDatasetFormat(format); return dataset; } /** * Defines randomly the attributes as irrelevant. * Number of attributes to be set as irrelevant is either set * with a preceeding call of setNumIrrelevant() or is per default 0. * * @param random the random number generator to use * @return list of boolean values with one value for each attribute, * and each value set true or false according to if the corresponding * attribute was defined irrelevant or not */ private boolean[] defineIrrelevant(Random random) { boolean[] irr = new boolean [getNumAttributes()]; // initialize for (int i = 0; i < irr.length; i++) irr[i] = false; // set randomly int numIrr = 0; for (int i = 0; (numIrr < getNumIrrelevant()) && (i < getNumAttributes() * 5); i++) { int maybeNext = (int) (random.nextDouble() * (double) irr.length); if (irr[maybeNext] == false) { irr [maybeNext] = true; numIrr++; } } return irr; } /** * Chooses randomly the attributes that get datatyp numeric. * @param random the random number generator to use * @return list of integer values, with one value for each attribute, * and each value set to Attribut.NOMINAL or Attribut.NUMERIC */ private int[] defineNumeric(Random random) { int[] num = new int [getNumAttributes()]; // initialize for (int i = 0; i < num.length; i++) num[i] = Attribute.NOMINAL; int numNum = 0; for (int i = 0; (numNum < getNumNumeric()) && (i < getNumAttributes() * 5); i++) { int maybeNext = (int) (random.nextDouble() * (double) num.length); if (num[maybeNext] != Attribute.NUMERIC) { num[maybeNext] = Attribute.NUMERIC; numNum++; } } return num; } /** * Generates a comment string that documentates the data generator. * By default this string is added at the beginning of the produced output * as ARFF file type, next after the options. * * @return string contains info about the generated rules */ public String generateStart () { return ""; } /** * Compiles documentation about the data generation. This is the number of * irrelevant attributes and the decisionlist with all rules. * Considering that the decisionlist might get enhanced until * the last instance is generated, this method should be called at the * end of the data generation process. * * @return string with additional information about generated dataset * @throws Exception no input structure has been defined */ public String generateFinished() throws Exception { StringBuffer dLString = new StringBuffer(); // string for output at end of ARFF-File boolean[] attList_Irr = getAttList_Irr(); Instances format = getDatasetFormat(); dLString.append("%\n% Number of attributes chosen as irrelevant = " + getNumIrrelevant() + "\n"); for (int i = 0; i < attList_Irr.length; i++) { if (attList_Irr[i]) dLString.append("% " + format.attribute(i).name() + "\n"); } dLString.append("%\n% DECISIONLIST (number of rules = " + m_DecisionList.size() + "):\n"); for (int i = 0; i < m_DecisionList.size(); i++) { RuleList rl = (RuleList) m_DecisionList.elementAt(i); dLString.append("% RULE " + i + ": " + rl.toString() + "\n"); } return dLString.toString(); } /** * Resets the class values of all instances using voting. * For each instance the class value that satisfies the most rules * is choosen as new class value. * * @param dataset the dataset to work on * @return the changed instances * @throws Exception if something goes wrong */ private Instances voteDataset(Instances dataset) throws Exception { for (int i = 0; i < dataset.numInstances(); i++) { Instance inst = dataset.firstInstance(); inst = votedReclassifyExample(inst); dataset.add(inst); dataset.delete(0); } return dataset; } /** * Main method for testing this class. * * @param args should contain arguments for the data producer: */ public static void main(String[] args) { runDataGenerator(new RDG1(), args); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -