📄 oner.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * OneR.java * Copyright (C) 1999 Ian H. Witten * */package weka.classifiers.rules;import weka.classifiers.Classifier;import weka.classifiers.Evaluation;import weka.core.Attribute;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.Utils;import weka.core.TechnicalInformation;import weka.core.TechnicalInformation.Type;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformationHandler;import weka.core.Capabilities.Capability;import java.io.Serializable;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * Class for building and using a 1R classifier; in other words, uses the minimum-error attribute for prediction, discretizing numeric attributes. For more information, see:<br/> * <br/> * R.C. Holte (1993). Very simple classification rules perform well on most commonly used datasets. Machine Learning. 11:63-91. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * @article{Holte1993, * author = {R.C. Holte}, * journal = {Machine Learning}, * pages = {63-91}, * title = {Very simple classification rules perform well on most commonly used datasets}, * volume = {11}, * year = {1993} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -B <minimum bucket size> * The minimum number of objects in a bucket (default: 6).</pre> * <!-- options-end --> * * @author Ian H. Witten (ihw@cs.waikato.ac.nz) * @version $Revision: 1.20 $ */public class OneR extends Classifier implements OptionHandler, TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = -2459427002147861445L; /** * Returns a string describing classifier * @return a description suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Class for building and using a 1R classifier; in other words, uses " + "the minimum-error attribute for prediction, discretizing numeric " + "attributes. For more information, see:\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing * detailed information about the technical background of this class, * e.g., paper reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.ARTICLE); result.setValue(Field.AUTHOR, "R.C. Holte"); result.setValue(Field.YEAR, "1993"); result.setValue(Field.TITLE, "Very simple classification rules perform well on most commonly used datasets"); result.setValue(Field.JOURNAL, "Machine Learning"); result.setValue(Field.VOLUME, "11"); result.setValue(Field.PAGES, "63-91"); return result; } /** * Class for storing store a 1R rule. */ private class OneRRule implements Serializable { /** for serialization */ static final long serialVersionUID = 1152814630957092281L; /** The class attribute. */ private Attribute m_class; /** The number of instances used for building the rule. */ private int m_numInst; /** Attribute to test */ private Attribute m_attr; /** Training set examples this rule gets right */ private int m_correct; /** Predicted class for each value of attr */ private int[] m_classifications; /** Predicted class for missing values */ private int m_missingValueClass = -1; /** Breakpoints (numeric attributes only) */ private double[] m_breakpoints; /** * Constructor for nominal attribute. * * @param data the data to work with * @param attribute the attribute to use * @throws Exception if something goes wrong */ public OneRRule(Instances data, Attribute attribute) throws Exception { m_class = data.classAttribute(); m_numInst = data.numInstances(); m_attr = attribute; m_correct = 0; m_classifications = new int[m_attr.numValues()]; } /** * Constructor for numeric attribute. * * @param data the data to work with * @param attribute the attribute to use * @param nBreaks the break point * @throws Exception if something goes wrong */ public OneRRule(Instances data, Attribute attribute, int nBreaks) throws Exception { m_class = data.classAttribute(); m_numInst = data.numInstances(); m_attr = attribute; m_correct = 0; m_classifications = new int[nBreaks]; m_breakpoints = new double[nBreaks - 1]; // last breakpoint is infinity } /** * Returns a description of the rule. * * @return a string representation of the rule */ public String toString() { try { StringBuffer text = new StringBuffer(); text.append(m_attr.name() + ":\n"); for (int v = 0; v < m_classifications.length; v++) { text.append("\t"); if (m_attr.isNominal()) { text.append(m_attr.value(v)); } else if (v < m_breakpoints.length) { text.append("< " + m_breakpoints[v]); } else if (v > 0) { text.append(">= " + m_breakpoints[v - 1]); } else { text.append("not ?"); } text.append("\t-> " + m_class.value(m_classifications[v]) + "\n"); } if (m_missingValueClass != -1) { text.append("\t?\t-> " + m_class.value(m_missingValueClass) + "\n"); } text.append("(" + m_correct + "/" + m_numInst + " instances correct)\n"); return text.toString(); } catch (Exception e) { return "Can't print OneR classifier!"; } } } /** A 1-R rule */ private OneRRule m_rule; /** The minimum bucket size */ private int m_minBucketSize = 6; /** * Classifies a given instance. * * @param inst the instance to be classified * @return the classification of the instance */ public double classifyInstance(Instance inst) { int v = 0; if (inst.isMissing(m_rule.m_attr)) { if (m_rule.m_missingValueClass != -1) { return m_rule.m_missingValueClass; } else { return 0; // missing values occur in test but not training set } } if (m_rule.m_attr.isNominal()) { v = (int) inst.value(m_rule.m_attr); } else { while (v < m_rule.m_breakpoints.length && inst.value(m_rule.m_attr) >= m_rule.m_breakpoints[v]) { v++; } } return m_rule.m_classifications[v]; } /** * Returns default capabilities of the classifier. * * @return the capabilities of this classifier */ public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); // attributes result.enable(Capability.NOMINAL_ATTRIBUTES); result.enable(Capability.NUMERIC_ATTRIBUTES); result.enable(Capability.DATE_ATTRIBUTES); result.enable(Capability.MISSING_VALUES); // class result.enable(Capability.NOMINAL_CLASS); result.enable(Capability.MISSING_CLASS_VALUES); return result; } /** * Generates the classifier. * * @param instances the instances to be used for building the classifier * @throws Exception if the classifier can't be built successfully */ public void buildClassifier(Instances instances) throws Exception { boolean noRule = true; // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class Instances data = new Instances(instances);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -