📄 oner.java
字号:
data.deleteWithMissingClass(); // for each attribute ... Enumeration enu = instances.enumerateAttributes(); while (enu.hasMoreElements()) { try { OneRRule r = newRule((Attribute) enu.nextElement(), data); // if this attribute is the best so far, replace the rule if (noRule || r.m_correct > m_rule.m_correct) { m_rule = r; } noRule = false; } catch (Exception ex) { } } } /** * Create a rule branching on this attribute. * * @param attr the attribute to branch on * @param data the data to be used for creating the rule * @return the generated rule * @throws Exception if the rule can't be built successfully */ public OneRRule newRule(Attribute attr, Instances data) throws Exception { OneRRule r; // ... create array to hold the missing value counts int[] missingValueCounts = new int [data.classAttribute().numValues()]; if (attr.isNominal()) { r = newNominalRule(attr, data, missingValueCounts); } else { r = newNumericRule(attr, data, missingValueCounts); } r.m_missingValueClass = Utils.maxIndex(missingValueCounts); if (missingValueCounts[r.m_missingValueClass] == 0) { r.m_missingValueClass = -1; // signal for no missing value class } else { r.m_correct += missingValueCounts[r.m_missingValueClass]; } return r; } /** * Create a rule branching on this nominal attribute. * * @param attr the attribute to branch on * @param data the data to be used for creating the rule * @param missingValueCounts to be filled in * @return the generated rule * @throws Exception if the rule can't be built successfully */ public OneRRule newNominalRule(Attribute attr, Instances data, int[] missingValueCounts) throws Exception { // ... create arrays to hold the counts int[][] counts = new int [attr.numValues()] [data.classAttribute().numValues()]; // ... calculate the counts Enumeration enu = data.enumerateInstances(); while (enu.hasMoreElements()) { Instance i = (Instance) enu.nextElement(); if (i.isMissing(attr)) { missingValueCounts[(int) i.classValue()]++; } else { counts[(int) i.value(attr)][(int) i.classValue()]++; } } OneRRule r = new OneRRule(data, attr); // create a new rule for (int value = 0; value < attr.numValues(); value++) { int best = Utils.maxIndex(counts[value]); r.m_classifications[value] = best; r.m_correct += counts[value][best]; } return r; } /** * Create a rule branching on this numeric attribute * * @param attr the attribute to branch on * @param data the data to be used for creating the rule * @param missingValueCounts to be filled in * @return the generated rule * @throws Exception if the rule can't be built successfully */ public OneRRule newNumericRule(Attribute attr, Instances data, int[] missingValueCounts) throws Exception { // ... can't be more than numInstances buckets int [] classifications = new int[data.numInstances()]; double [] breakpoints = new double[data.numInstances()]; // create array to hold the counts int [] counts = new int[data.classAttribute().numValues()]; int correct = 0; int lastInstance = data.numInstances(); // missing values get sorted to the end of the instances data.sort(attr); while (lastInstance > 0 && data.instance(lastInstance-1).isMissing(attr)) { lastInstance--; missingValueCounts[(int) data.instance(lastInstance). classValue()]++; } int i = 0; int cl = 0; // index of next bucket to create int it; while (i < lastInstance) { // start a new bucket for (int j = 0; j < counts.length; j++) counts[j] = 0; do { // fill it until it has enough of the majority class it = (int) data.instance(i++).classValue(); counts[it]++; } while (counts[it] < m_minBucketSize && i < lastInstance); // while class remains the same, keep on filling while (i < lastInstance && (int) data.instance(i).classValue() == it) { counts[it]++; i++; } while (i < lastInstance && // keep on while attr value is the same (data.instance(i - 1).value(attr) == data.instance(i).value(attr))) { counts[(int) data.instance(i++).classValue()]++; } for (int j = 0; j < counts.length; j++) { if (counts[j] > counts[it]) { it = j; } } if (cl > 0) { // can we coalesce with previous class? if (counts[classifications[cl - 1]] == counts[it]) { it = classifications[cl - 1]; } if (it == classifications[cl - 1]) { cl--; // yes! } } correct += counts[it]; classifications[cl] = it; if (i < lastInstance) { breakpoints[cl] = (data.instance(i - 1).value(attr) + data.instance(i).value(attr)) / 2; } cl++; } if (cl == 0) { throw new Exception("Only missing values in the training data!"); } OneRRule r = new OneRRule(data, attr, cl); // new rule with cl branches r.m_correct = correct; for (int v = 0; v < cl; v++) { r.m_classifications[v] = classifications[v]; if (v < cl-1) { r.m_breakpoints[v] = breakpoints[v]; } } return r; } /** * Returns an enumeration describing the available options.. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { String string = "\tThe minimum number of objects in a bucket (default: 6)."; Vector newVector = new Vector(1); newVector.addElement(new Option(string, "B", 1, "-B <minimum bucket size>")); return newVector.elements(); } /** * Parses a given list of options. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -B <minimum bucket size> * The minimum number of objects in a bucket (default: 6).</pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String bucketSizeString = Utils.getOption('B', options); if (bucketSizeString.length() != 0) { m_minBucketSize = Integer.parseInt(bucketSizeString); } else { m_minBucketSize = 6; } } /** * Gets the current settings of the OneR classifier. * * @return an array of strings suitable for passing to setOptions */ public String [] getOptions() { String [] options = new String [2]; int current = 0; options[current++] = "-B"; options[current++] = "" + m_minBucketSize; while (current < options.length) { options[current++] = ""; } return options; } /** * Returns a description of the classifier * * @return a string representation of the classifier */ public String toString() { if (m_rule == null) { return "OneR: No model built yet."; } return m_rule.toString(); } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String minBucketSizeTipText() { return "The minimum bucket size used for discretizing numeric " + "attributes."; } /** * Get the value of minBucketSize. * @return Value of minBucketSize. */ public int getMinBucketSize() { return m_minBucketSize; } /** * Set the value of minBucketSize. * @param v Value to assign to minBucketSize. */ public void setMinBucketSize(int v) { m_minBucketSize = v; } /** * Main method for testing this class * * @param argv the commandline options */ public static void main(String [] argv) { try { System.out.println(Evaluation.evaluateModel(new OneR(), argv)); } catch (Exception e) { System.err.println(e.getMessage()); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -