📄 m5prime.java

📁 :<<数据挖掘--实用机器学习技术及java实现>>一书的配套源程序
💻 JAVA
字号:
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    M5Prime.java *    Copyright (C) 1999 Yong Wang * */package weka.classifiers.m5;import java.io.*;import java.util.*;import weka.core.*;import weka.classifiers.*;import weka.filters.*;/** * Class for contructing and evaluating model trees; M5' algorithm. <p> * * Reference: Wang, Y. and Witten, I.H. (1997). <i> Induction of model * trees for predicting continuous classes.</i> Proceedings of the poster * papers of the European Conference on Machine Learning. University of * Economics, Faculty of Informatics and Statistics, Prague. <p> * * Valid options are: <p> * * -O <l|r|m> <br> * Type of model to be used. (l: linear regression,  * r: regression tree, m: model tree) (default: m) <p> * * -U <br> * Use unsmoothed tree. <p> * * -F factor <br> * Set pruning factor (default: 2). <p> * * -V <0|1|2> <br> * Verbosity (default: 0). <p> * * @author Yong Wang (yongwang@cs.waikato.ac.nz) * @version $Revision: 1.15.2.1 $ */public final class  M5Prime extends Classifier implements OptionHandler, AdditionalMeasureProducer {    /** The root node */  private Node m_root[];  /** The options */  private Options options;  /** No smoothing? */  private boolean m_UseUnsmoothed = false;  /** Pruning factor */  private double m_PruningFactor = 2;  /** Type of model */  private int m_Model = Node.MODEL_TREE;  /** Verbosity */  private int m_Verbosity = 0;  /** Filter for replacing missing values. */  private ReplaceMissingValuesFilter m_ReplaceMissingValuesFilter;  /** Filter for replacing nominal attributes with numeric binary ones. */  private NominalToBinaryFilter m_NominalToBinaryFilter;  public static final int MODEL_LINEAR_REGRESSION = Node.LINEAR_REGRESSION;  public static final int MODEL_REGRESSION_TREE = Node.REGRESSION_TREE;  public static final int MODEL_MODEL_TREE = Node.MODEL_TREE;  public static final Tag [] TAGS_MODEL_TYPES = {    new Tag(MODEL_LINEAR_REGRESSION, "Simple linear regression"),    new Tag(MODEL_REGRESSION_TREE, "Regression tree"),    new Tag(MODEL_MODEL_TREE, "Model tree")  };    /**   * Construct a model tree by training instances   *   * @param inst training instances   * @param options information for constructing the model tree,    * mostly from command line options   * @return the root of the model tree   * @exception Exception if the classifier can't be built   */  public final void buildClassifier(Instances inst) throws Exception{    if (inst.checkForStringAttributes()) {      throw new Exception("Can't handle string attributes!");    }    options = new Options(inst);    options.model = m_Model;    options.smooth = !m_UseUnsmoothed;    options.pruningFactor = m_PruningFactor;    options.verbosity = m_Verbosity;    if(!inst.classAttribute().isNumeric())       throw new Exception("Class has to be numeric.");     inst = new Instances(inst);    inst.deleteWithMissingClass();    m_ReplaceMissingValuesFilter = new ReplaceMissingValuesFilter();    m_ReplaceMissingValuesFilter.setInputFormat(inst);    inst = Filter.useFilter(inst, m_ReplaceMissingValuesFilter);    m_NominalToBinaryFilter = new NominalToBinaryFilter();    m_NominalToBinaryFilter.setInputFormat(inst);    inst = Filter.useFilter(inst, m_NominalToBinaryFilter);        m_root = new Node[2];    options.deviation = M5Utils.stdDev(inst.classIndex(),inst);    m_root[0] = new Node(inst,null,options);       // build an empty tree    m_root[0].split(inst);         // build the unpruned initial tree    m_root[0].numLeaves(0);       // set tree leaves' number of the unpruned treee    m_root[1] = m_root[0].copy(null);  // make a copy of the unpruned tree    m_root[1].prune();            // prune the tree    if(options.model != Node.LINEAR_REGRESSION){      m_root[1].smoothen();    // compute the smoothed linear models at the leaves      m_root[1].numLeaves(0);  // set tree leaves' number of the pruned tree    }  }  /**   * Classifies the given test instance.   *   * @param instance the instance to be classified   * @return the predicted class for the instance    * @exception Exception if the instance can't be classified   */  public double classifyInstance(Instance ins) throws Exception {    m_ReplaceMissingValuesFilter.input(ins);    m_ReplaceMissingValuesFilter.batchFinished();    ins = m_ReplaceMissingValuesFilter.output();    m_NominalToBinaryFilter.input(ins);    m_NominalToBinaryFilter.batchFinished();    ins = m_NominalToBinaryFilter.output();    return m_root[1].predict(ins,!m_UseUnsmoothed);  }  /**   * Returns an enumeration describing the available options.   *   * Valid options are: <p>   *   * -O <l|r|m> <br>   * Type of model to be used. (l: linear regression,    * r: regression tree, m: model tree) (default: m) <p>   *   * -U <br>   * Use unsmoothed tree. <p>   *   * -F factor <br>   * Set pruning factor (default: 2). <p>   *   * -V <0|1|2> <br>   * Verbosity (default: 0). <p>   *   * @return an enumeration of all the available options   */  public Enumeration listOptions() {    Vector newVector = new Vector(4);    newVector.addElement(new Option("\tType of model to be used.\n"+				    "\tl: linear regression\n"+				    "\tr: regression tree\n"+				    "\tm: model tree\n"+				    "\t(default: m)",				    "-O", 1, "-O <l|r|m>"));    newVector.addElement(new Option("\tUse unsmoothed tree.", "C", 0, 				    "-U"));    newVector.addElement(new Option("\tPruning factor (default: 2).",				    "-F", 1, "-F <double>"));    newVector.addElement(new Option("\tVerbosity (default: 0).",				    "-V", 1, "-V <0|1|2>"));    return newVector.elements();  }  /**   * Parses a given list of options.   *   * @param options the list of options as an array of strings   * @exception Exception if an option is not supported   */  public void setOptions(String[] options) throws Exception{    String modelString = Utils.getOption('O', options);    if (modelString.length() != 0) {      if (modelString.equals("l"))	setModelType(new SelectedTag(MODEL_LINEAR_REGRESSION,				     TAGS_MODEL_TYPES));      else if (modelString.equals("r"))	setModelType(new SelectedTag(MODEL_REGRESSION_TREE,				     TAGS_MODEL_TYPES));      else if (modelString.equals("m"))	setModelType(new SelectedTag(MODEL_MODEL_TREE,				     TAGS_MODEL_TYPES));      else	throw new Exception("Don't know model type " + modelString);    } else {      setModelType(new SelectedTag(MODEL_MODEL_TREE,				   TAGS_MODEL_TYPES));    }        setUseUnsmoothed(Utils.getFlag('U', options));    if (m_Model != Node.MODEL_TREE) {      setUseUnsmoothed(true);    }    String pruningString = Utils.getOption('F', options);    if (pruningString.length() != 0) {      setPruningFactor((new Double(pruningString)).doubleValue());    } else {      setPruningFactor(2);    }        String verbosityString = Utils.getOption('V', options);    if (verbosityString.length() != 0) {      setVerbosity(Integer.parseInt(verbosityString));    } else {      setVerbosity(0);    }  }  /**   * Gets the current settings of the Classifier.   *   * @return an array of strings suitable for passing to setOptions   */  public String [] getOptions() {    String [] options = new String [7];    int current = 0;    switch (m_Model) {    case MODEL_MODEL_TREE:      options[current++] = "-O"; options[current++] = "m";      if (m_UseUnsmoothed) {	options[current++] = "-U";      }      break;    case MODEL_REGRESSION_TREE:      options[current++] = "-O"; options[current++] = "r";      break;    case MODEL_LINEAR_REGRESSION:      options[current++] = "-O"; options[current++] = "l";      break;    }    options[current++] = "-F"; options[current++] = "" + m_PruningFactor;    options[current++] = "-V"; options[current++] = "" + m_Verbosity;    while (current < options.length) {      options[current++] = "";    }    return options;  }  /**   * Converts the output of the training process into a string   *   * @return the converted string   */  public final String toString() {    try{      StringBuffer text = new StringBuffer();      double absDev = M5Utils.absDev(m_root[0].instances.classIndex(),				     m_root[0].instances);            if(options.verbosity >= 1 && options.model != Node.LINEAR_REGRESSION){	switch(m_root[0].model){	case Node.LINEAR_REGRESSION: 	  break;	case Node.REGRESSION_TREE  : 	  text.append("Unpruned training regression tree:\n"); break;	case Node.MODEL_TREE       : 	  text.append("Unpruned training model tree:\n"); break;	}     	if(m_root[0].type == false)text.append("\n");		text.append(m_root[0].treeToString(0,absDev)+ "\n");	text.append("Models at the leaves:\n\n");	//    the linear models at the leaves of the unpruned tree	text.append(m_root[0].formulaeToString(false) + "\n");;        }            if(m_root[0].model != Node.LINEAR_REGRESSION){	switch(m_root[0].model){	case Node.LINEAR_REGRESSION: 	  break;	case Node.REGRESSION_TREE  : 	  text.append("Pruned training regression tree:\n"); break;	case Node.MODEL_TREE       : 	  text.append("Pruned training model tree:\n"); break;	}	if(m_root[1].type == false)text.append("\n");	text.append(m_root[1].treeToString(0,absDev) + "\n"); //the pruned tree	text.append("Models at the leaves:\n\n");	if ((m_root[0].model != Node.LINEAR_REGRESSION) &&	    (m_UseUnsmoothed)) {	  text.append("  Unsmoothed (simple):\n\n");	  //     the unsmoothed linear models at the leaves of the pruned tree	  text.append(m_root[1].formulaeToString(false) + "\n");	}	if ((m_root[0].model == Node.MODEL_TREE) &&	    (!m_UseUnsmoothed)) {	  text.append("  Smoothed (complex):\n\n");	  text.append(m_root[1].formulaeToString(true) + "\n");	  //   the smoothed linear models at the leaves of the pruned tree	}      }      else {	text.append("Training linear regression model:\n\n");	text.append(m_root[1].unsmoothed.toString(m_root[1].instances,0) + "\n\n");	//       print the linear regression model      }            text.append("Number of Rules : "+m_root[1].numberOfLinearModels());      return text.toString();    } catch (Exception e) {      return "can't print m5' tree";    }  }  /**   * return the number of linear models   * @return the number of linear models   */  public double measureNumLinearModels() {    return m_root[1].numberOfLinearModels();  }  /**   * return the number of leaves in the tree   * @return the number leaves in the tree (same as # linear models &   * # rules)   */  public double measureNumLeaves() {    return measureNumLinearModels();  }  /**   * return the number of rules   * @return the number of rules (same as # linear models &   * # leaves in the tree)   */  public double measureNumRules() {    return measureNumLinearModels();  }  /**   * Returns an enumeration of the additional measure names   * @return an enumeration of the measure names   */  public Enumeration enumerateMeasures() {    Vector newVector = new Vector(3);    newVector.addElement("measureNumLinearModels");    newVector.addElement("measureNumLeaves");    newVector.addElement("measureNumRules");    return newVector.elements();  }  /**   * Returns the value of the named measure   * @param measureName the name of the measure to query for its value   * @return the value of the named measure   * @exception IllegalArgumentException if the named measure is not supported   */  public double getMeasure(String additionalMeasureName) {    if (additionalMeasureName.compareTo("measureNumRules") == 0) {      return measureNumRules();    } else if (additionalMeasureName.compareTo("measureNumLinearModels") == 0){      return measureNumLinearModels();    } else if (additionalMeasureName.compareTo("measureNumLeaves") == 0) {      return measureNumLeaves();    } else {      throw new IllegalArgumentException(additionalMeasureName 			  + " not supported (M5)");    }  }    /**   * Get the value of UseUnsmoothed.   *   * @return Value of UseUnsmoothed.   */  public boolean getUseUnsmoothed() {        return m_UseUnsmoothed;  }    /**   * Set the value of UseUnsmoothed.   *   * @param v  Value to assign to UseUnsmoothed.   */  public void setUseUnsmoothed(boolean v) {        if (m_Model != Node.MODEL_TREE) m_UseUnsmoothed = true;    else m_UseUnsmoothed = v;  }    /**   * Get the value of PruningFactor.   *   * @return Value of PruningFactor.   */  public double getPruningFactor() {        return m_PruningFactor;  }    /**   * Set the value of PruningFactor.   *   * @param v  Value to assign to PruningFactor.   */  public void setPruningFactor(double v) {        m_PruningFactor = v;  }    /**   * Get the value of Model.   *   * @return Value of Model.   */  public SelectedTag getModelType() {        return new SelectedTag(m_Model, TAGS_MODEL_TYPES);  }    /**   * Set the value of Model.   *   * @param v  Value to assign to Model.   */  public void setModelType(SelectedTag newMethod) {        if (newMethod.getTags() == TAGS_MODEL_TYPES) {      m_Model = newMethod.getSelectedTag().getID();      if (m_Model != Node.MODEL_TREE) setUseUnsmoothed(true);    }  }    /**   * Get the value of Verbosity.   *   * @return Value of Verbosity.   */  public int getVerbosity() {        return m_Verbosity;  }    /**   * Set the value of Verbosity.   *   * @param v  Value to assign to Verbosity.   */  public void setVerbosity(int v) {        m_Verbosity = v;  }  /**   * Main method for M5' algorithm   *   * @param argv command line arguments   */  public static void  main(String [] argv){    try {      System.out.println(Evaluation.evaluateModel(new M5Prime(), argv));    } catch (Exception e) {      System.err.println(e.getMessage());    }  }}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -