⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 randomtree.java

📁 Java 编写的多种数据挖掘算法 包括聚类、分类、预处理等
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    RandomTree.java *    Copyright (C) 2001 Richard Kirkby, Eibe Frank * */package weka.classifiers.trees;import weka.classifiers.Classifier;import weka.classifiers.Evaluation;import weka.core.Attribute;import weka.core.Capabilities;import weka.core.ContingencyTables;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.Randomizable;import weka.core.Utils;import weka.core.WeightedInstancesHandler;import weka.core.Capabilities.Capability;import java.util.Enumeration;import java.util.Random;import java.util.Vector;/** <!-- globalinfo-start --> * Class for constructing a tree that considers K randomly  chosen attributes at each node. Performs no pruning. * <p/> <!-- globalinfo-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -K &lt;number of attributes&gt; *  Number of attributes to randomly investigate *  (&lt;1 = int(log(#attributes)+1)).</pre> *  * <pre> -M &lt;minimum number of instances&gt; *  Set minimum number of instances per leaf.</pre> *  * <pre> -S &lt;num&gt; *  Seed for random number generator. *  (default 1)</pre> *  * <pre> -depth &lt;num&gt; *  The maximum depth of the tree, 0 for unlimited. *  (default 0)</pre> *  * <pre> -D *  If set, classifier is run in debug mode and *  may output additional info to the console</pre> *  <!-- options-end --> * * @author Eibe Frank (eibe@cs.waikato.ac.nz) * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) * @version $Revision: 1.14 $ */public class RandomTree   extends Classifier   implements OptionHandler, WeightedInstancesHandler, Randomizable {  /** for serialization */  static final long serialVersionUID = 8934314652175299374L;    /** The subtrees appended to this tree. */   protected RandomTree[] m_Successors;      /** The attribute to split on. */  protected int m_Attribute = -1;      /** The split point. */  protected double m_SplitPoint = Double.NaN;      /** The class distribution from the training data. */  protected double[][] m_Distribution = null;      /** The header information. */  protected Instances m_Info = null;      /** The proportions of training instances going down each branch. */  protected double[] m_Prop = null;      /** Class probabilities from the training data. */  protected double[] m_ClassProbs = null;      /** Minimum number of instances for leaf. */  protected double m_MinNum = 1.0;    /** The number of attributes considered for a split. */  protected int m_KValue = 1;  /** The random seed to use. */  protected int m_randomSeed = 1;    /** The maximum depth of the tree (0 = unlimited) */  protected int m_MaxDepth = 0;  /**   * Returns a string describing classifier   * @return a description suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return  "Class for constructing a tree that considers K randomly " +      " chosen attributes at each node. Performs no pruning.";  }    /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String minNumTipText() {    return "The minimum total weight of the instances in a leaf.";  }  /**   * Get the value of MinNum.   *   * @return Value of MinNum.   */  public double getMinNum() {        return m_MinNum;  }    /**   * Set the value of MinNum.   *   * @param newMinNum Value to assign to MinNum.   */  public void setMinNum(double newMinNum) {        m_MinNum = newMinNum;  }    /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String KValueTipText() {    return "Sets the number of randomly chosen attributes.";  }    /**   * Get the value of K.   *   * @return Value of K.   */  public int getKValue() {        return m_KValue;  }    /**   * Set the value of K.   *   * @param k Value to assign to K.   */  public void setKValue(int k) {        m_KValue = k;  }  /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String seedTipText() {    return "The random number seed used for selecting attributes.";  }  /**   * Set the seed for random number generation.   *   * @param seed the seed    */  public void setSeed(int seed) {    m_randomSeed = seed;  }    /**   * Gets the seed for the random number generations   *   * @return the seed for the random number generation   */  public int getSeed() {    return m_randomSeed;  }    /**   * Returns the tip text for this property   *    * @return 		tip text for this property suitable for   * 			displaying in the explorer/experimenter gui   */  public String maxDepthTipText() {    return "The maximum depth of the tree, 0 for unlimited.";  }  /**   * Get the maximum depth of trh tree, 0 for unlimited.   *   * @return 		the maximum depth.   */  public int getMaxDepth() {    return m_MaxDepth;  }    /**   * Set the maximum depth of the tree, 0 for unlimited.   *   * @param value 	the maximum depth.   */  public void setMaxDepth(int value) {    m_MaxDepth = value;  }    /**   * Lists the command-line options for this classifier.   *    * @return an enumeration over all possible options   */  public Enumeration listOptions() {        Vector newVector = new Vector();    newVector.addElement(new Option(	"\tNumber of attributes to randomly investigate\n"	+"\t(<1 = int(log(#attributes)+1)).",	"K", 1, "-K <number of attributes>"));    newVector.addElement(new Option(	"\tSet minimum number of instances per leaf.",	"M", 1, "-M <minimum number of instances>"));    newVector.addElement(new Option(	"\tSeed for random number generator.\n"	+ "\t(default 1)",	"S", 1, "-S <num>"));    newVector.addElement(new Option(	"\tThe maximum depth of the tree, 0 for unlimited.\n"	+ "\t(default 0)",	"depth", 1, "-depth <num>"));    Enumeration enu = super.listOptions();    while (enu.hasMoreElements()) {      newVector.addElement(enu.nextElement());    }    return newVector.elements();  }   /**   * Gets options from this classifier.   *    * @return the options for the current setup   */  public String[] getOptions() {    Vector        result;    String[]      options;    int           i;        result = new Vector();        result.add("-K");    result.add("" + getKValue());        result.add("-M");    result.add("" + getMinNum());        result.add("-S");    result.add("" + getSeed());        if (getMaxDepth() > 0) {      result.add("-depth");      result.add("" + getMaxDepth());    }        options = super.getOptions();    for (i = 0; i < options.length; i++)      result.add(options[i]);        return (String[]) result.toArray(new String[result.size()]);  }  /**   * Parses a given list of options. <p/>   *    <!-- options-start -->   * Valid options are: <p/>   *    * <pre> -K &lt;number of attributes&gt;   *  Number of attributes to randomly investigate   *  (&lt;1 = int(log(#attributes)+1)).</pre>   *    * <pre> -M &lt;minimum number of instances&gt;   *  Set minimum number of instances per leaf.</pre>   *    * <pre> -S &lt;num&gt;   *  Seed for random number generator.   *  (default 1)</pre>   *    * <pre> -depth &lt;num&gt;   *  The maximum depth of the tree, 0 for unlimited.   *  (default 0)</pre>   *    * <pre> -D   *  If set, classifier is run in debug mode and   *  may output additional info to the console</pre>   *    <!-- options-end -->   *    * @param options the list of options as an array of strings   * @throws Exception if an option is not supported   */  public void setOptions(String[] options) throws Exception{    String	tmpStr;        tmpStr = Utils.getOption('K', options);    if (tmpStr.length() != 0) {      m_KValue = Integer.parseInt(tmpStr);    } else {      m_KValue = 1;    }        tmpStr = Utils.getOption('M', options);    if (tmpStr.length() != 0) {      m_MinNum = Double.parseDouble(tmpStr);    } else {      m_MinNum = 1;    }        tmpStr = Utils.getOption('S', options);    if (tmpStr.length() != 0) {      setSeed(Integer.parseInt(tmpStr));    } else {      setSeed(1);    }        tmpStr = Utils.getOption("depth", options);    if (tmpStr.length() != 0) {      setMaxDepth(Integer.parseInt(tmpStr));    } else {      setMaxDepth(0);    }        super.setOptions(options);        Utils.checkForRemainingOptions(options);  }  /**   * Returns default capabilities of the classifier.   *   * @return      the capabilities of this classifier   */  public Capabilities getCapabilities() {    Capabilities result = super.getCapabilities();    // attributes    result.enable(Capability.NOMINAL_ATTRIBUTES);    result.enable(Capability.NUMERIC_ATTRIBUTES);    result.enable(Capability.DATE_ATTRIBUTES);    result.enable(Capability.MISSING_VALUES);    // class    result.enable(Capability.NOMINAL_CLASS);    result.enable(Capability.MISSING_CLASS_VALUES);        return result;  }  /**   * Builds classifier.   *    * @param data the data to train with   * @throws Exception if something goes wrong or the data doesn't fit   */  public void buildClassifier(Instances data) throws Exception {    // Make sure K value is in range    if (m_KValue > data.numAttributes()-1) m_KValue = data.numAttributes()-1;    if (m_KValue < 1) m_KValue = (int) Utils.log2(data.numAttributes())+1;    // can classifier handle the data?    getCapabilities().testWithFail(data);    // remove instances with missing class    data = new Instances(data);    data.deleteWithMissingClass();        Instances train = data;    // Create array of sorted indices and weights    int[][] sortedIndices = new int[train.numAttributes()][0];    double[][] weights = new double[train.numAttributes()][0];    double[] vals = new double[train.numInstances()];    for (int j = 0; j < train.numAttributes(); j++) {      if (j != train.classIndex()) {	weights[j] = new double[train.numInstances()];	if (train.attribute(j).isNominal()) {	  // Handling nominal attributes. Putting indices of	  // instances with missing values at the end.	  sortedIndices[j] = new int[train.numInstances()];	  int count = 0;	  for (int i = 0; i < train.numInstances(); i++) {	    Instance inst = train.instance(i);	    if (!inst.isMissing(j)) {	      sortedIndices[j][count] = i;	      weights[j][count] = inst.weight();	      count++;	    }	  }	  for (int i = 0; i < train.numInstances(); i++) {	    Instance inst = train.instance(i);	    if (inst.isMissing(j)) {	      sortedIndices[j][count] = i;	      weights[j][count] = inst.weight();	      count++;	    }	  }	} else {	  	  // Sorted indices are computed for numeric attributes	  for (int i = 0; i < train.numInstances(); i++) {	    Instance inst = train.instance(i);	    vals[i] = inst.value(j);	  }	  sortedIndices[j] = Utils.sort(vals);	  for (int i = 0; i < train.numInstances(); i++) {	    weights[j][i] = train.instance(sortedIndices[j][i]).weight();	  }	}      }    }    // Compute initial class counts    double[] classProbs = new double[train.numClasses()];    for (int i = 0; i < train.numInstances(); i++) {      Instance inst = train.instance(i);      classProbs[(int)inst.classValue()] += inst.weight();    }    // Create the attribute indices window    int[] attIndicesWindow = new int[data.numAttributes()-1];    int j=0;    for (int i=0; i<attIndicesWindow.length; i++) {      if (j == data.classIndex()) j++; // do not include the class      attIndicesWindow[i] = j++;    }    // Build tree    buildTree(sortedIndices, weights, train, classProbs,	      new Instances(train, 0), m_MinNum, m_Debug,	      attIndicesWindow, data.getRandomNumberGenerator(m_randomSeed), 0);  }    /**   * Computes class distribution of an instance using the decision tree.   *    * @param instance the instance to compute the distribution for   * @return the computed class distribution   * @throws Exception if computation fails   */  public double[] distributionForInstance(Instance instance) throws Exception {        double[] returnedDist = null;        if (m_Attribute > -1) {            // Node is not a leaf      if (instance.isMissing(m_Attribute)) {	// Value is missing	returnedDist = new double[m_Info.numClasses()];	// Split instance up	for (int i = 0; i < m_Successors.length; i++) {	  double[] help = m_Successors[i].distributionForInstance(instance);	  if (help != null) {	    for (int j = 0; j < help.length; j++) {	      returnedDist[j] += m_Prop[i] * help[j];	    }	  }	}      } else if (m_Info.attribute(m_Attribute).isNominal()) {	  	// For nominal attributes	returnedDist =  m_Successors[(int)instance.value(m_Attribute)].	  distributionForInstance(instance);      } else {		// For numeric attributes	if (Utils.sm(instance.value(m_Attribute), m_SplitPoint)) {	  returnedDist = m_Successors[0].distributionForInstance(instance);	} else {	  returnedDist = m_Successors[1].distributionForInstance(instance);	}      }    }    if ((m_Attribute == -1) || (returnedDist == null)) {      // Node is a leaf or successor is empty      return m_ClassProbs;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -