j48graft.java

来自「Weka」· Java 代码 · 共 823 行 · 第 1/2 页

JAVA
823
字号
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    J48graft.java *    Copyright (C) 2007 Geoff Webb & Janice Boughton *    (adapted from code written by Eibe Frank). */package weka.classifiers.trees;import weka.classifiers.Classifier;import weka.classifiers.Sourcable;import weka.classifiers.trees.j48.BinC45ModelSelection;import weka.classifiers.trees.j48.C45ModelSelection;import weka.classifiers.trees.j48.C45PruneableClassifierTreeG;import weka.classifiers.trees.j48.ClassifierTree;import weka.classifiers.trees.j48.ModelSelection;import weka.core.AdditionalMeasureProducer;import weka.core.Capabilities;import weka.core.Drawable;import weka.core.Instance;import weka.core.Instances;import weka.core.Matchable;import weka.core.Option;import weka.core.OptionHandler;import weka.core.Summarizable;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.WeightedInstancesHandler;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * Class for generating a grafted (pruned or unpruned) C4.5 decision tree. For more information, see:<br/> * <br/> * Geoff Webb (1999). Decision Tree Grafting From the All-Tests-But-One Partition. Morgan Kaufmann, San Francisco, CA. * <br/> *  also:<br/> * Webb, G. I. (1996). Further Experimental Evidence Against The Utility Of Occams Razor. Journal of Artificial Intelligence Research 4. Menlo Park, CA: AAAI Press, pages 397-417. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * &#64;INPROCEEDINGS{Webb99, *   year = {1999}, *   title = {Decision Tree Grafting From The All Tests But One Partition}, *   booktitle = {Proceedings of the Sixteenth International Joint Conference on Artificial Intelligence (IJCAI 99)}, *   publisher = {Morgan Kaufmann}, *   editor = {T. Dean}, *   address = {San Francisco}, *   author = {G. I. Webb}, *   location = {Stockholm, Sweden}, *   pages = {702-707}, * } * &#64:article{Webb96b, *   year = {1996}, *   title = {Further Experimental Evidence Against The Utility Of Occams Razor}, *   journal = {Journal of Artificial Intelligence Research}, *   volume = {4}, *   pages = {397-417}, *   publisher = {AAAI Press}, *   address = {Menlo Park, CA}, *   author = {G. I. Webb} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -U *  Use unpruned tree.</pre> * * <pre> -C &lt;pruning confidence&gt; *  Set confidence threshold for pruning. *  (default 0.25)</pre> * * <pre> -M &lt;minimum number of instances&gt; *  Set minimum number of instances per leaf. *  (default 2)</pre> * * <pre> -B *  Use binary splits only.</pre> * * <pre> -S *  Don't perform subtree raising.</pre> * * <pre> -L *  Do not clean up after the tree has been built.</pre> * * <pre> -A *  Laplace smoothing for predicted probabilities. *  (note: this option only affects initial tree; grafting process always uses laplace).</pre> * * <pre> -E * Option to allow relabeling during grafting.</pre> * <!-- options-end --> * * @author Janice Boughton (jrbought@csse.monash.edu.au) *  (based on J48.java written by Eibe Frank) * @version $Revision: 1.1 $ */public class J48graft extends Classifier implements OptionHandler,   Drawable, Matchable, Sourcable, WeightedInstancesHandler, Summarizable,  AdditionalMeasureProducer, TechnicalInformationHandler {  /** for serialization */  static final long serialVersionUID = 8823716098042427799L;  /** The decision tree */  private ClassifierTree m_root;  /** Unpruned tree? */  private boolean m_unpruned = false;  /** Confidence level */  private float m_CF = 0.25f;  /** Minimum number of instances */  private int m_minNumObj = 2;  /** Determines whether probabilities are smoothed using      Laplace correction when predictions are generated */  private boolean m_useLaplace = false;  /** Number of folds for reduced error pruning. */  private int m_numFolds = 3;  /** Binary splits on nominal attributes? */  private boolean m_binarySplits = false;  /** Subtree raising to be performed? */  private boolean m_subtreeRaising = true;  /** Cleanup after the tree has been built. */  private boolean m_noCleanup = false;  /** relabel instances when grafting */  private boolean m_relabel = false;  /**   * Returns a string describing classifier   * @return a description suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return  "Class for generating a grafted (pruned or unpruned) C4.5 "      + "decision tree. For more information, see\n\n"      + getTechnicalInformation().toString();  }  /**   * Returns an instance of a TechnicalInformation object, containing   * detailed information about the technical background of this class,   * e.g., paper reference or book this class is based on.   *   * @return the technical information about this class   */  public TechnicalInformation getTechnicalInformation() {    TechnicalInformation        result;    result = new TechnicalInformation(Type.INPROCEEDINGS);    result.setValue(Field.AUTHOR, "Geoff Webb");    result.setValue(Field.YEAR, "1999");    result.setValue(Field.TITLE, "Decision Tree Grafting From the All-Tests-But-One Partition");    result.setValue(Field.PUBLISHER, "Morgan Kaufmann");    result.setValue(Field.ADDRESS, "San Francisco, CA");    return result;  }  /**   * Returns default capabilities of the classifier.   *   * @return      the capabilities of this classifier   */  public Capabilities getCapabilities() {    Capabilities      result;    try {     result = new C45PruneableClassifierTreeG(null, !m_unpruned, m_CF, m_subtreeRaising, m_relabel, !m_noCleanup).getCapabilities();    }    catch (Exception e) {      result = new Capabilities(this);    }    result.setOwner(this);    return result;  }  /**   * Generates the classifier.   *   * @param instances the data to train the classifier with   * @throws Exception if classifier can't be built successfully   */  public void buildClassifier(Instances instances)       throws Exception {    ModelSelection modSelection;    if (m_binarySplits)      modSelection = new BinC45ModelSelection(m_minNumObj, instances);    else      modSelection = new C45ModelSelection(m_minNumObj, instances);      m_root = new C45PruneableClassifierTreeG(modSelection,                               !m_unpruned, m_CF, m_subtreeRaising,                                m_relabel, !m_noCleanup);    m_root.buildClassifier(instances);    if (m_binarySplits) {      ((BinC45ModelSelection)modSelection).cleanup();    } else {      ((C45ModelSelection)modSelection).cleanup();    }  }  /**   * Classifies an instance.   *   * @param instance the instance to classify   * @return the classification for the instance   * @throws Exception if instance can't be classified successfully   */  public double classifyInstance(Instance instance) throws Exception {    return m_root.classifyInstance(instance);  }  /**    * Returns class probabilities for an instance.   *   * @param instance the instance to calculate the class probabilities for   * @return the class probabilities   * @throws Exception if distribution can't be computed successfully   */  public final double [] distributionForInstance(Instance instance)        throws Exception {    return m_root.distributionForInstance(instance, m_useLaplace);  }  /**   *  Returns the type of graph this classifier   *  represents.   *  @return Drawable.TREE   */     public int graphType() {      return Drawable.TREE;  }  /**   * Returns graph describing the tree.   *   * @return the graph describing the tree   * @throws Exception if graph can't be computed   */  public String graph() throws Exception {    return m_root.graph();  }  /**   * Returns tree in prefix order.   *   * @return the tree in prefix order   * @throws Exception if something goes wrong   */  public String prefix() throws Exception {        return m_root.prefix();  }  /**   * Returns tree as an if-then statement.   *   * @param className the name of the Java class   * @return the tree as a Java if-then type statement   * @throws Exception if something goes wrong   */  public String toSource(String className) throws Exception {    StringBuffer [] source = m_root.toSource(className);    return     "class " + className + " {\n\n"    +"  public static double classify(Object [] i)\n"    +"    throws Exception {\n\n"    +"    double p = Double.NaN;\n"    + source[0]  // Assignment code    +"    return p;\n"    +"  }\n"    + source[1]  // Support code    +"}\n";  }  /**   * Returns an enumeration describing the available options.   *   * Valid options are: <p>   *   * -U <br>   * Use unpruned tree.<p>   *   * -C confidence <br>   * Set confidence threshold for pruning. (Default: 0.25) <p>   *   * -M number <br>   * Set minimum number of instances per leaf. (Default: 2) <p>   *   * -B <br>   * Use binary splits for nominal attributes. <p>   *   * -S <br>   * Don't perform subtree raising. <p>   *   * -L <br>   * Do not clean up after the tree has been built.   *   * -A <br>   * If set, Laplace smoothing is used for predicted probabilites.    *  (note: this option only affects initial tree; grafting process always uses laplace). <p>   *   * -E <br>   * Allow relabelling when grafting. <p>   *   * @return an enumeration of all the available options.   */  public Enumeration listOptions() {    Vector newVector = new Vector(9);    newVector.       addElement(new Option("\tUse unpruned tree.",			      "U", 0, "-U"));    newVector.       addElement(new Option("\tSet confidence threshold for pruning.\n" +                             "\t(default 0.25)",			     "C", 1, "-C <pruning confidence>"));    newVector.       addElement(new Option("\tSet minimum number of instances per leaf.\n" +			      "\t(default 2)",			      "M", 1, "-M <minimum number of instances>"));    newVector.       addElement(new Option("\tUse binary splits only.",			      "B", 0, "-B"));    newVector.       addElement(new Option("\tDon't perform subtree raising.",			      "S", 0, "-S"));    newVector.       addElement(new Option("\tDo not clean up after the tree has been built.",			      "L", 0, "-L"));    newVector.       addElement(new Option("\tLaplace smoothing for predicted probabilities.  (note: this option only affects initial tree; grafting process always uses laplace).", 			      "A", 0, "-A"));    newVector.       addElement(new Option("\tRelabel when grafting.",                             "E", 0, "-E"));    return newVector.elements();  }  /**   * Parses a given list of options.   *   <!-- options-start -->   * Valid options are: <p/>   *   * <pre> -U   *  Use unpruned tree.</pre>   *   * <pre> -C &lt;pruning confidence&gt;   *  Set confidence threshold for pruning.   *  (default 0.25)</pre>   *   * <pre> -M &lt;minimum number of instances&gt;   *  Set minimum number of instances per leaf.   *  (default 2)</pre>   *   * <pre> -B   *  Use binary splits only.</pre>   *   * <pre> -S   *  Don't perform subtree raising.</pre>   *   * <pre> -L   *  Do not clean up after the tree has been built.</pre>   *

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?