id3.java

来自「决策树在Weka中的设计与实现,数据的属性要是名词性的。」· Java 代码 · 共 337 行
JAVA
337 行
package weka.classifiers.trees;import weka.classifiers.Classifier;import weka.classifiers.Evaluation;import weka.core.Attribute;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.NoSupportForMissingValuesException;import weka.core.TechnicalInformation;import weka.core.TechnicalInformation.Type;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.Capabilities.Capability;import java.util.Enumeration;public class Id3   extends Classifier   implements TechnicalInformationHandler {  /** for serialization */  static final long serialVersionUID = -2693678647096322561L;    /** The node's successors. */   private Id3[] m_Successors;  /** Attribute used for splitting. */  private Attribute m_Attribute;  /** Class value if node is leaf. */  private double m_ClassValue;  /** Class distribution if node is leaf. */  private double[] m_Distribution;  /** Class attribute of dataset. */  private Attribute m_ClassAttribute;  /**   * Returns a string describing the classifier.   * @return a description suitable for the GUI.   */  public String globalInfo() {    return  "Class for constructing an unpruned decision tree based on the ID3 "      + "algorithm. Can only deal with nominal attributes. No missing values "      + "allowed. Empty leaves may result in unclassified instances. For more "      + "information see: \n\n"      + getTechnicalInformation().toString();  }  /**   * Returns an instance of a TechnicalInformation object, containing    * detailed information about the technical background of this class,   * e.g., paper reference or book this class is based on.   *    * @return the technical information about this class   */  public TechnicalInformation getTechnicalInformation() {    TechnicalInformation 	result;        result = new TechnicalInformation(Type.ARTICLE);    result.setValue(Field.AUTHOR, "R. Quinlan");    result.setValue(Field.YEAR, "1986");    result.setValue(Field.TITLE, "Induction of decision trees");    result.setValue(Field.JOURNAL, "Machine Learning");    result.setValue(Field.VOLUME, "1");    result.setValue(Field.NUMBER, "1");    result.setValue(Field.PAGES, "81-106");        return result;  }  /**   * Returns default capabilities of the classifier.   *   * @return      the capabilities of this classifier   */  public Capabilities getCapabilities() {    Capabilities result = super.getCapabilities();    // attributes    result.enable(Capability.NOMINAL_ATTRIBUTES);    // class    result.enable(Capability.NOMINAL_CLASS);    result.enable(Capability.MISSING_CLASS_VALUES);    // instances    result.setMinimumNumberInstances(0);        return result;  }  /**   * Builds Id3 decision tree classifier.   *   * @param data the training data   * @exception Exception if classifier can't be built successfully   */  public void buildClassifier(Instances data) throws Exception {    // can classifier handle the data?    getCapabilities().testWithFail(data);    // remove instances with missing class    data = new Instances(data);    data.deleteWithMissingClass();        makeTree(data);  }  /**   * Method for building an Id3 tree.   *   * @param data the training data   * @exception Exception if decision tree can't be built successfully   */  private void makeTree(Instances data) throws Exception {    // Check if no instances have reached this node.    if (data.numInstances() == 0) {      m_Attribute = null;      m_ClassValue = Instance.missingValue();      m_Distribution = new double[data.numClasses()];      return;    }    // Compute attribute with maximum information gain.    double[] infoGains = new double[data.numAttributes()];    Enumeration attEnum = data.enumerateAttributes();    while (attEnum.hasMoreElements()) {      Attribute att = (Attribute) attEnum.nextElement();      infoGains[att.index()] = computeInfoGain(data, att);    }    m_Attribute = data.attribute(Utils.maxIndex(infoGains));        // Make leaf if information gain is zero.     // Otherwise create successors.    if (Utils.eq(infoGains[m_Attribute.index()], 0)) {      m_Attribute = null;      m_Distribution = new double[data.numClasses()];      Enumeration instEnum = data.enumerateInstances();      while (instEnum.hasMoreElements()) {        Instance inst = (Instance) instEnum.nextElement();        m_Distribution[(int) inst.classValue()]++;      }      Utils.normalize(m_Distribution);      m_ClassValue = Utils.maxIndex(m_Distribution);      m_ClassAttribute = data.classAttribute();    } else {      Instances[] splitData = splitData(data, m_Attribute);      m_Successors = new Id3[m_Attribute.numValues()];      for (int j = 0; j < m_Attribute.numValues(); j++) {        m_Successors[j] = new Id3();        m_Successors[j].makeTree(splitData[j]);      }    }  }  /**   * Classifies a given test instance using the decision tree.   *   * @param instance the instance to be classified   * @return the classification   * @throws NoSupportForMissingValuesException if instance has missing values   */  public double classifyInstance(Instance instance)     throws NoSupportForMissingValuesException {    if (instance.hasMissingValue()) {      throw new NoSupportForMissingValuesException("Id3: no missing values, "                                                   + "please.");    }    if (m_Attribute == null) {      return m_ClassValue;    } else {      return m_Successors[(int) instance.value(m_Attribute)].        classifyInstance(instance);    }  }  /**   * Computes class distribution for instance using decision tree.   *   * @param instance the instance for which distribution is to be computed   * @return the class distribution for the given instance   * @throws NoSupportForMissingValuesException if instance has missing values   */  public double[] distributionForInstance(Instance instance)     throws NoSupportForMissingValuesException {    if (instance.hasMissingValue()) {      throw new NoSupportForMissingValuesException("Id3: no missing values, "                                                   + "please.");    }    if (m_Attribute == null) {      return m_Distribution;    } else {       return m_Successors[(int) instance.value(m_Attribute)].        distributionForInstance(instance);    }  }  /**   * Prints the decision tree using the private toString method from below.   *   * @return a textual description of the classifier   */  public String toString() {    if ((m_Distribution == null) && (m_Successors == null)) {      return "Id3: No model built yet.";    }    return "Id3\n\n" + toString(0);  }  /**   * Computes information gain for an attribute.   *   * @param data the data for which info gain is to be computed   * @param att the attribute   * @return the information gain for the given attribute and data   * @throws Exception if computation fails   */  private double computeInfoGain(Instances data, Attribute att)     throws Exception {    double infoGain = computeEntropy(data);    Instances[] splitData = splitData(data, att);    for (int j = 0; j < att.numValues(); j++) {      if (splitData[j].numInstances() > 0) {        infoGain -= ((double) splitData[j].numInstances() /                     (double) data.numInstances()) *          computeEntropy(splitData[j]);      }    }    return infoGain;  }  /**   * Computes the entropy of a dataset.   *    * @param data the data for which entropy is to be computed   * @return the entropy of the data's class distribution   * @throws Exception if computation fails   */  private double computeEntropy(Instances data) throws Exception {    double [] classCounts = new double[data.numClasses()];    Enumeration instEnum = data.enumerateInstances();    while (instEnum.hasMoreElements()) {      Instance inst = (Instance) instEnum.nextElement();      classCounts[(int) inst.classValue()]++;    }    double entropy = 0;    for (int j = 0; j < data.numClasses(); j++) {      if (classCounts[j] > 0) {        entropy -= classCounts[j] * Utils.log2(classCounts[j]);      }    }    entropy /= (double) data.numInstances();    return entropy + Utils.log2(data.numInstances());  }  /**   * Splits a dataset according to the values of a nominal attribute.   *   * @param data the data which is to be split   * @param att the attribute to be used for splitting   * @return the sets of instances produced by the split   */  private Instances[] splitData(Instances data, Attribute att) {    Instances[] splitData = new Instances[att.numValues()];    for (int j = 0; j < att.numValues(); j++) {      splitData[j] = new Instances(data, data.numInstances());    }    Enumeration instEnum = data.enumerateInstances();    while (instEnum.hasMoreElements()) {      Instance inst = (Instance) instEnum.nextElement();      splitData[(int) inst.value(att)].add(inst);    }    for (int i = 0; i < splitData.length; i++) {      splitData[i].compactify();    }    return splitData;  }  /**   * Outputs a tree at a certain level.   *   * @param level the level at which the tree is to be printed   * @return the tree as string at the given level   */  private String toString(int level) {    StringBuffer text = new StringBuffer();        if (m_Attribute == null) {      if (Instance.isMissingValue(m_ClassValue)) {        text.append(": null");      } else {        text.append(": " + m_ClassAttribute.value((int) m_ClassValue));      }     } else {      for (int j = 0; j < m_Attribute.numValues(); j++) {        text.append("\n");        for (int i = 0; i < level; i++) {          text.append("|  ");        }        text.append(m_Attribute.name() + " = " + m_Attribute.value(j));        text.append(m_Successors[j].toString(level + 1));      }    }    return text.toString();  }  /**   * Main method.   *   * @param args the options for the classifier   */  public static void main(String[] args) {    try {      System.out.println(Evaluation.evaluateModel(new Id3(), args));    } catch (Exception e) {      System.err.println(e.getMessage());    }  }}
id3.java - 源码说明

本页面展示了「决策树在Weka中的设计与实现,数据的属性要是名词性的。」中的 id3.java 源码文件，采用 Java 编程语言编写，共 337 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Weka相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?