📄 lmt.java

📁 JAVA的一个程序
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    LMT.java *    Copyright (C) 2003 Niels Landwehr * */package weka.classifiers.trees;import weka.classifiers.Classifier;import weka.classifiers.Evaluation;import weka.classifiers.trees.j48.C45ModelSelection;import weka.classifiers.trees.j48.ModelSelection;import weka.classifiers.trees.lmt.LMTNode;import weka.classifiers.trees.lmt.ResidualModelSelection;import weka.core.AdditionalMeasureProducer;import weka.core.Capabilities;import weka.core.Drawable;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.TechnicalInformation;import weka.core.TechnicalInformation.Type;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.filters.Filter;import weka.filters.supervised.attribute.NominalToBinary;import weka.filters.unsupervised.attribute.ReplaceMissingValues;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * Classifier for building 'logistic model trees', which are classification trees with logistic regression functions at the leaves. The algorithm can deal with binary and multi-class target variables, numeric and nominal attributes and missing values.<br/> * <br/> * For more information see: <br/> * <br/> * Niels Landwehr, Mark Hall, Eibe Frank (2005). Logistic Model Trees.<br/> * <br/> * Marc Sumner, Eibe Frank, Mark Hall: Speeding up Logistic Model Tree Induction. In: 9th European Conference on Principles and Practice of Knowledge Discovery in Databases, 675-683, 2005. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * &#64;article{Landwehr2005, *    author = {Niels Landwehr and Mark Hall and Eibe Frank}, *    booktitle = {Machine Learning}, *    number = {1-2}, *    pages = {161-205}, *    title = {Logistic Model Trees}, *    volume = {95}, *    year = {2005} * } *  * &#64;inproceedings{Sumner2005, *    author = {Marc Sumner and Eibe Frank and Mark Hall}, *    booktitle = {9th European Conference on Principles and Practice of Knowledge Discovery in Databases}, *    pages = {675-683}, *    publisher = {Springer}, *    title = {Speeding up Logistic Model Tree Induction}, *    year = {2005} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -B *  Binary splits (convert nominal attributes to binary ones)</pre> *  * <pre> -R *  Split on residuals instead of class values</pre> *  * <pre> -C *  Use cross-validation for boosting at all nodes (i.e., disable heuristic)</pre> *  * <pre> -P *  Use error on probabilities instead of misclassification error for stopping criterion of LogitBoost.</pre> *  * <pre> -I &lt;numIterations&gt; *  Set fixed number of iterations for LogitBoost (instead of using cross-validation)</pre> *  * <pre> -M &lt;numInstances&gt; *  Set minimum number of instances at which a node can be split (default 15)</pre> *  * <pre> -W &lt;beta&gt; *  Set beta for weight trimming for LogitBoost. Set to 0 (default) for no weight trimming.</pre> *  * <pre> -A *  The AIC is used to choose the best iteration.</pre> *  <!-- options-end --> * * @author Niels Landwehr  * @author Marc Sumner  * @version $Revision: 1.8 $ */public class LMT   extends Classifier   implements OptionHandler, AdditionalMeasureProducer, Drawable,             TechnicalInformationHandler {      /** for serialization */  static final long serialVersionUID = -1113212459618104943L;    /** Filter to replace missing values*/  protected ReplaceMissingValues m_replaceMissing;      /** Filter to replace nominal attributes*/  protected NominalToBinary m_nominalToBinary;      /** root of the logistic model tree*/  protected LMTNode m_tree;      /** use heuristic that determines the number of LogitBoost iterations only once in the beginning?*/  protected boolean m_fastRegression;  /** convert nominal attributes to binary ?*/  protected boolean m_convertNominal;  /** split on residuals?*/  protected boolean m_splitOnResiduals;      /**use error on probabilties instead of misclassification for stopping criterion of LogitBoost?*/  protected boolean m_errorOnProbabilities;  /**minimum number of instances at which a node is considered for splitting*/  protected int m_minNumInstances;  /**if non-zero, use fixed number of iterations for LogitBoost*/  protected int m_numBoostingIterations;      /**Threshold for trimming weights. Instances with a weight lower than this (as a percentage   * of total weights) are not included in the regression fit.   **/  protected double m_weightTrimBeta;    /** If true, the AIC is used to choose the best LogitBoost iteration*/  private boolean m_useAIC = false;    /**   * Creates an instance of LMT with standard options   */  public LMT() {    m_fastRegression = true;    m_numBoostingIterations = -1;    m_minNumInstances = 15;    m_weightTrimBeta = 0;    m_useAIC = false;  }      /**   * Returns default capabilities of the classifier.   *   * @return      the capabilities of this classifier   */  public Capabilities getCapabilities() {    Capabilities result = super.getCapabilities();    // attributes    result.enable(Capability.NOMINAL_ATTRIBUTES);    result.enable(Capability.NUMERIC_ATTRIBUTES);    result.enable(Capability.DATE_ATTRIBUTES);    result.enable(Capability.MISSING_VALUES);    // class    result.enable(Capability.NOMINAL_CLASS);    result.enable(Capability.MISSING_CLASS_VALUES);        return result;  }  /**   * Builds the classifier.   *   * @param data the data to train with   * @throws Exception if classifier can't be built successfully   */  public void buildClassifier(Instances data) throws Exception{	    // can classifier handle the data?    getCapabilities().testWithFail(data);    // remove instances with missing class    Instances filteredData = new Instances(data);    filteredData.deleteWithMissingClass();        //replace missing values    m_replaceMissing = new ReplaceMissingValues();    m_replaceMissing.setInputFormat(filteredData);	    filteredData = Filter.useFilter(filteredData, m_replaceMissing);		    //possibly convert nominal attributes globally    if (m_convertNominal) {	          m_nominalToBinary = new NominalToBinary();      m_nominalToBinary.setInputFormat(filteredData);	      filteredData = Filter.useFilter(filteredData, m_nominalToBinary);    }    int minNumInstances = 2;	    //create ModelSelection object, either for splits on the residuals or for splits on the class value     ModelSelection modSelection;	    if (m_splitOnResiduals) {      modSelection = new ResidualModelSelection(minNumInstances);    } else {      modSelection = new C45ModelSelection(minNumInstances, filteredData);    }	    //create tree root    m_tree = new LMTNode(modSelection, m_numBoostingIterations, m_fastRegression, 			 m_errorOnProbabilities, m_minNumInstances, m_weightTrimBeta, m_useAIC);    //build tree    m_tree.buildClassifier(filteredData);    if (modSelection instanceof C45ModelSelection) ((C45ModelSelection)modSelection).cleanup();  }  /**    * Returns class probabilities for an instance.   *   * @param instance the instance to compute the distribution for   * @return the class probabilities   * @throws Exception if distribution can't be computed successfully   */  public double [] distributionForInstance(Instance instance) throws Exception {	    //replace missing values    m_replaceMissing.input(instance);    instance = m_replaceMissing.output();		    //possibly convert nominal attributes    if (m_convertNominal) {      m_nominalToBinary.input(instance);      instance = m_nominalToBinary.output();    }	    return m_tree.distributionForInstance(instance);  }  /**   * Classifies an instance.   *   * @param instance the instance to classify   * @return the classification   * @throws Exception if instance can't be classified successfully   */  public double classifyInstance(Instance instance) throws Exception {    double maxProb = -1;    int maxIndex = 0;          //classify by maximum probability    double[] probs = distributionForInstance(instance);           for (int j = 0; j < instance.numClasses(); j++) {      if (Utils.gr(probs[j], maxProb)) {	maxIndex = j;	maxProb = probs[j];      }    }         return (double)maxIndex;        }           /**   * Returns a description of the classifier.   *    * @return a string representation of the classifier   */  public String toString() {    if (m_tree!=null) {      return "Logistic model tree \n------------------\n" + m_tree.toString();    } else {      return "No tree build";    }  }          /**   * Returns an enumeration describing the available options.   *   * @return an enumeration of all the available options.   */  public Enumeration listOptions() {    Vector newVector = new Vector(8);        newVector.addElement(new Option("\tBinary splits (convert nominal attributes to binary ones)",                                    "B", 0, "-B"));        newVector.addElement(new Option("\tSplit on residuals instead of class values",                                    "R", 0, "-R"));        newVector.addElement(new Option("\tUse cross-validation for boosting at all nodes (i.e., disable heuristic)",                                    "C", 0, "-C"));        newVector.addElement(new Option("\tUse error on probabilities instead of misclassification error "+                                    "for stopping criterion of LogitBoost.",                                    "P", 0, "-P"));        newVector.addElement(new Option("\tSet fixed number of iterations for LogitBoost (instead of using "+                                    "cross-validation)",                                    "I",1,"-I <numIterations>"));        newVector.addElement(new Option("\tSet minimum number of instances at which a node can be split (default 15)",                                    "M",1,"-M <numInstances>"));        newVector.addElement(new Option("\tSet beta for weight trimming for LogitBoost. Set to 0 (default) for no weight trimming.",                                    "W",1,"-W <beta>"));        newVector.addElement(new Option("\tThe AIC is used to choose the best iteration.",                                    "A", 0, "-A"));        return newVector.elements();  }      /**   * Parses a given list of options. <p/>   *    <!-- options-start -->   * Valid options are: <p/>   *    * <pre> -B   *  Binary splits (convert nominal attributes to binary ones)</pre>   *    * <pre> -R   *  Split on residuals instead of class values</pre>   *    * <pre> -C   *  Use cross-validation for boosting at all nodes (i.e., disable heuristic)</pre>   *    * <pre> -P   *  Use error on probabilities instead of misclassification error for stopping criterion of LogitBoost.</pre>   *    * <pre> -I &lt;numIterations&gt;   *  Set fixed number of iterations for LogitBoost (instead of using cross-validation)</pre>   *    * <pre> -M &lt;numInstances&gt;   *  Set minimum number of instances at which a node can be split (default 15)</pre>   *    * <pre> -W &lt;beta&gt;   *  Set beta for weight trimming for LogitBoost. Set to 0 (default) for no weight trimming.</pre>   *    * <pre> -A   *  The AIC is used to choose the best iteration.</pre>   *    <!-- options-end -->   *   * @param options the list of options as an array of strings   * @throws Exception if an option is not supported   */  public void setOptions(String[] options) throws Exception {    setConvertNominal(Utils.getFlag('B', options));    setSplitOnResiduals(Utils.getFlag('R', options));    setFastRegression(!Utils.getFlag('C', options));    setErrorOnProbabilities(Utils.getFlag('P', options));    String optionString = Utils.getOption('I', options);    if (optionString.length() != 0) {      setNumBoostingIterations((new Integer(optionString)).intValue());    }	    optionString = Utils.getOption('M', options);    if (optionString.length() != 0) {      setMinNumInstances((new Integer(optionString)).intValue());    }    optionString = Utils.getOption('W', options);    if (optionString.length() != 0) {      setWeightTrimBeta((new Double(optionString)).doubleValue());    }        setUseAIC(Utils.getFlag('A', options));                Utils.checkForRemainingOptions(options);	  }       /**
12 下一页
💿 文件大小 7 K
👤 上传用户 sheng_xia
📂 所属分类数值算法/人工智能
📄 代码行数 800 行
💻 语言类型 Java
🏷️ 相关标签

#JAVA #程序
更多JAVA资源 →
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -