📄 randomtree.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * RandomTree.java * Copyright (C) 2001 Richard Kirkby, Eibe Frank * */package weka.classifiers.trees;import weka.classifiers.Classifier;import weka.classifiers.Evaluation;import weka.core.Attribute;import weka.core.Capabilities;import weka.core.ContingencyTables;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.Randomizable;import weka.core.Utils;import weka.core.WeightedInstancesHandler;import weka.core.Capabilities.Capability;import java.util.Enumeration;import java.util.Random;import java.util.Vector;/** <!-- globalinfo-start --> * Class for constructing a tree that considers K randomly chosen attributes at each node. Performs no pruning. * <p/> <!-- globalinfo-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -K <number of attributes> * Number of attributes to randomly investigate * (<1 = int(log(#attributes)+1)).</pre> * * <pre> -M <minimum number of instances> * Set minimum number of instances per leaf.</pre> * * <pre> -S <num> * Seed for random number generator. * (default 1)</pre> * * <pre> -depth <num> * The maximum depth of the tree, 0 for unlimited. * (default 0)</pre> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * <!-- options-end --> * * @author Eibe Frank (eibe@cs.waikato.ac.nz) * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) * @version $Revision: 1.14 $ */public class RandomTree extends Classifier implements OptionHandler, WeightedInstancesHandler, Randomizable { /** for serialization */ static final long serialVersionUID = 8934314652175299374L; /** The subtrees appended to this tree. */ protected RandomTree[] m_Successors; /** The attribute to split on. */ protected int m_Attribute = -1; /** The split point. */ protected double m_SplitPoint = Double.NaN; /** The class distribution from the training data. */ protected double[][] m_Distribution = null; /** The header information. */ protected Instances m_Info = null; /** The proportions of training instances going down each branch. */ protected double[] m_Prop = null; /** Class probabilities from the training data. */ protected double[] m_ClassProbs = null; /** Minimum number of instances for leaf. */ protected double m_MinNum = 1.0; /** The number of attributes considered for a split. */ protected int m_KValue = 1; /** The random seed to use. */ protected int m_randomSeed = 1; /** The maximum depth of the tree (0 = unlimited) */ protected int m_MaxDepth = 0; /** * Returns a string describing classifier * @return a description suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Class for constructing a tree that considers K randomly " + " chosen attributes at each node. Performs no pruning."; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String minNumTipText() { return "The minimum total weight of the instances in a leaf."; } /** * Get the value of MinNum. * * @return Value of MinNum. */ public double getMinNum() { return m_MinNum; } /** * Set the value of MinNum. * * @param newMinNum Value to assign to MinNum. */ public void setMinNum(double newMinNum) { m_MinNum = newMinNum; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String KValueTipText() { return "Sets the number of randomly chosen attributes."; } /** * Get the value of K. * * @return Value of K. */ public int getKValue() { return m_KValue; } /** * Set the value of K. * * @param k Value to assign to K. */ public void setKValue(int k) { m_KValue = k; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String seedTipText() { return "The random number seed used for selecting attributes."; } /** * Set the seed for random number generation. * * @param seed the seed */ public void setSeed(int seed) { m_randomSeed = seed; } /** * Gets the seed for the random number generations * * @return the seed for the random number generation */ public int getSeed() { return m_randomSeed; } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String maxDepthTipText() { return "The maximum depth of the tree, 0 for unlimited."; } /** * Get the maximum depth of trh tree, 0 for unlimited. * * @return the maximum depth. */ public int getMaxDepth() { return m_MaxDepth; } /** * Set the maximum depth of the tree, 0 for unlimited. * * @param value the maximum depth. */ public void setMaxDepth(int value) { m_MaxDepth = value; } /** * Lists the command-line options for this classifier. * * @return an enumeration over all possible options */ public Enumeration listOptions() { Vector newVector = new Vector(); newVector.addElement(new Option( "\tNumber of attributes to randomly investigate\n" +"\t(<1 = int(log(#attributes)+1)).", "K", 1, "-K <number of attributes>")); newVector.addElement(new Option( "\tSet minimum number of instances per leaf.", "M", 1, "-M <minimum number of instances>")); newVector.addElement(new Option( "\tSeed for random number generator.\n" + "\t(default 1)", "S", 1, "-S <num>")); newVector.addElement(new Option( "\tThe maximum depth of the tree, 0 for unlimited.\n" + "\t(default 0)", "depth", 1, "-depth <num>")); Enumeration enu = super.listOptions(); while (enu.hasMoreElements()) { newVector.addElement(enu.nextElement()); } return newVector.elements(); } /** * Gets options from this classifier. * * @return the options for the current setup */ public String[] getOptions() { Vector result; String[] options; int i; result = new Vector(); result.add("-K"); result.add("" + getKValue()); result.add("-M"); result.add("" + getMinNum()); result.add("-S"); result.add("" + getSeed()); if (getMaxDepth() > 0) { result.add("-depth"); result.add("" + getMaxDepth()); } options = super.getOptions(); for (i = 0; i < options.length; i++) result.add(options[i]); return (String[]) result.toArray(new String[result.size()]); } /** * Parses a given list of options. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -K <number of attributes> * Number of attributes to randomly investigate * (<1 = int(log(#attributes)+1)).</pre> * * <pre> -M <minimum number of instances> * Set minimum number of instances per leaf.</pre> * * <pre> -S <num> * Seed for random number generator. * (default 1)</pre> * * <pre> -depth <num> * The maximum depth of the tree, 0 for unlimited. * (default 0)</pre> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception{ String tmpStr; tmpStr = Utils.getOption('K', options); if (tmpStr.length() != 0) { m_KValue = Integer.parseInt(tmpStr); } else { m_KValue = 1; } tmpStr = Utils.getOption('M', options); if (tmpStr.length() != 0) { m_MinNum = Double.parseDouble(tmpStr); } else { m_MinNum = 1; } tmpStr = Utils.getOption('S', options); if (tmpStr.length() != 0) { setSeed(Integer.parseInt(tmpStr)); } else { setSeed(1); } tmpStr = Utils.getOption("depth", options); if (tmpStr.length() != 0) { setMaxDepth(Integer.parseInt(tmpStr)); } else { setMaxDepth(0); } super.setOptions(options); Utils.checkForRemainingOptions(options); } /** * Returns default capabilities of the classifier. * * @return the capabilities of this classifier */ public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); // attributes result.enable(Capability.NOMINAL_ATTRIBUTES); result.enable(Capability.NUMERIC_ATTRIBUTES); result.enable(Capability.DATE_ATTRIBUTES); result.enable(Capability.MISSING_VALUES); // class result.enable(Capability.NOMINAL_CLASS); result.enable(Capability.MISSING_CLASS_VALUES); return result; } /** * Builds classifier. * * @param data the data to train with * @throws Exception if something goes wrong or the data doesn't fit */ public void buildClassifier(Instances data) throws Exception { // Make sure K value is in range if (m_KValue > data.numAttributes()-1) m_KValue = data.numAttributes()-1; if (m_KValue < 1) m_KValue = (int) Utils.log2(data.numAttributes())+1; // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); Instances train = data; // Create array of sorted indices and weights int[][] sortedIndices = new int[train.numAttributes()][0]; double[][] weights = new double[train.numAttributes()][0]; double[] vals = new double[train.numInstances()]; for (int j = 0; j < train.numAttributes(); j++) { if (j != train.classIndex()) { weights[j] = new double[train.numInstances()]; if (train.attribute(j).isNominal()) { // Handling nominal attributes. Putting indices of // instances with missing values at the end. sortedIndices[j] = new int[train.numInstances()]; int count = 0; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (!inst.isMissing(j)) { sortedIndices[j][count] = i; weights[j][count] = inst.weight(); count++; } } for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (inst.isMissing(j)) { sortedIndices[j][count] = i; weights[j][count] = inst.weight(); count++; } } } else { // Sorted indices are computed for numeric attributes for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); vals[i] = inst.value(j); } sortedIndices[j] = Utils.sort(vals); for (int i = 0; i < train.numInstances(); i++) { weights[j][i] = train.instance(sortedIndices[j][i]).weight(); } } } } // Compute initial class counts double[] classProbs = new double[train.numClasses()]; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); classProbs[(int)inst.classValue()] += inst.weight(); } // Create the attribute indices window int[] attIndicesWindow = new int[data.numAttributes()-1]; int j=0; for (int i=0; i<attIndicesWindow.length; i++) { if (j == data.classIndex()) j++; // do not include the class attIndicesWindow[i] = j++; } // Build tree buildTree(sortedIndices, weights, train, classProbs, new Instances(train, 0), m_MinNum, m_Debug, attIndicesWindow, data.getRandomNumberGenerator(m_randomSeed), 0); } /** * Computes class distribution of an instance using the decision tree. * * @param instance the instance to compute the distribution for * @return the computed class distribution * @throws Exception if computation fails */ public double[] distributionForInstance(Instance instance) throws Exception { double[] returnedDist = null; if (m_Attribute > -1) { // Node is not a leaf if (instance.isMissing(m_Attribute)) { // Value is missing returnedDist = new double[m_Info.numClasses()]; // Split instance up for (int i = 0; i < m_Successors.length; i++) { double[] help = m_Successors[i].distributionForInstance(instance); if (help != null) { for (int j = 0; j < help.length; j++) { returnedDist[j] += m_Prop[i] * help[j]; } } } } else if (m_Info.attribute(m_Attribute).isNominal()) { // For nominal attributes returnedDist = m_Successors[(int)instance.value(m_Attribute)]. distributionForInstance(instance); } else { // For numeric attributes if (Utils.sm(instance.value(m_Attribute), m_SplitPoint)) { returnedDist = m_Successors[0].distributionForInstance(instance); } else { returnedDist = m_Successors[1].distributionForInstance(instance); } } } if ((m_Attribute == -1) || (returnedDist == null)) { // Node is a leaf or successor is empty return m_ClassProbs;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -