📄 logitboost.java

📁 代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    LogitBoost.java *    Copyright (C) 1999, 2002 Len Trigg, Eibe Frank * */package weka.classifiers.meta;import weka.classifiers.Classifier;import weka.classifiers.Evaluation;import weka.classifiers.RandomizableIteratedSingleClassifierEnhancer;import weka.classifiers.Sourcable;import weka.core.Attribute;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.WeightedInstancesHandler;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import java.util.Enumeration;import java.util.Random;import java.util.Vector;/** <!-- globalinfo-start --> * Class for performing additive logistic regression. <br/> * This class performs classification using a regression scheme as the base learner, and can handle multi-class problems.  For more information, see<br/> * <br/> * J. Friedman, T. Hastie, R. Tibshirani (1998). Additive Logistic Regression: a Statistical View of Boosting. Stanford University.<br/> * <br/> * Can do efficient internal cross-validation to determine appropriate number of iterations. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * &#64;techreport{Friedman1998, *    address = {Stanford University}, *    author = {J. Friedman and T. Hastie and R. Tibshirani}, *    title = {Additive Logistic Regression: a Statistical View of Boosting}, *    year = {1998}, *    PS = {http://www-stat.stanford.edu/~jhf/ftp/boost.ps} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -Q *  Use resampling instead of reweighting for boosting.</pre> *  * <pre> -P &lt;percent&gt; *  Percentage of weight mass to base training on. *  (default 100, reduce to around 90 speed up)</pre> *  * <pre> -F &lt;num&gt; *  Number of folds for internal cross-validation. *  (default 0 -- no cross-validation)</pre> *  * <pre> -R &lt;num&gt; *  Number of runs for internal cross-validation. *  (default 1)</pre> *  * <pre> -L &lt;num&gt; *  Threshold on the improvement of the likelihood. *  (default -Double.MAX_VALUE)</pre> *  * <pre> -H &lt;num&gt; *  Shrinkage parameter. *  (default 1)</pre> *  * <pre> -S &lt;num&gt; *  Random number seed. *  (default 1)</pre> *  * <pre> -I &lt;num&gt; *  Number of iterations. *  (default 10)</pre> *  * <pre> -D *  If set, classifier is run in debug mode and *  may output additional info to the console</pre> *  * <pre> -W *  Full name of base classifier. *  (default: weka.classifiers.trees.DecisionStump)</pre> *  * <pre>  * Options specific to classifier weka.classifiers.trees.DecisionStump: * </pre> *  * <pre> -D *  If set, classifier is run in debug mode and *  may output additional info to the console</pre> *  <!-- options-end --> * * Options after -- are passed to the designated learner.<p> * * @author Len Trigg (trigg@cs.waikato.ac.nz) * @author Eibe Frank (eibe@cs.waikato.ac.nz) * @version $Revision: 1.37 $  */public class LogitBoost   extends RandomizableIteratedSingleClassifierEnhancer  implements Sourcable, WeightedInstancesHandler, TechnicalInformationHandler {  /** for serialization */  static final long serialVersionUID = -3905660358715833753L;    /** Array for storing the generated base classifiers.    Note: we are hiding the variable from IteratedSingleClassifierEnhancer*/  protected Classifier [][] m_Classifiers;  /** The number of classes */  protected int m_NumClasses;  /** The number of successfully generated base classifiers. */  protected int m_NumGenerated;  /** The number of folds for the internal cross-validation. */  protected int m_NumFolds = 0;  /** The number of runs for the internal cross-validation. */  protected int m_NumRuns = 1;  /** Weight thresholding. The percentage of weight mass used in training */  protected int m_WeightThreshold = 100;  /** A threshold for responses (Friedman suggests between 2 and 4) */  protected static final double Z_MAX = 3;  /** Dummy dataset with a numeric class */  protected Instances m_NumericClassData;  /** The actual class attribute (for getting class names) */  protected Attribute m_ClassAttribute;  /** Use boosting with reweighting? */  protected boolean m_UseResampling;  /** The threshold on the improvement of the likelihood */     protected double m_Precision = -Double.MAX_VALUE;  /** The value of the shrinkage parameter */  protected double m_Shrinkage = 1;  /** The random number generator used */  protected Random m_RandomInstance = null;  /** The value by which the actual target value for the      true class is offset. */  protected double m_Offset = 0.0;      /**   * Returns a string describing classifier   * @return a description suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return "Class for performing additive logistic regression. \n"      + "This class performs classification using a regression scheme as the "      + "base learner, and can handle multi-class problems.  For more "      + "information, see\n\n"      + getTechnicalInformation().toString() + "\n\n"      + "Can do efficient internal cross-validation to determine "      + "appropriate number of iterations.";  }      /**   * Constructor.   */  public LogitBoost() {        m_Classifier = new weka.classifiers.trees.DecisionStump();  }  /**   * Returns an instance of a TechnicalInformation object, containing    * detailed information about the technical background of this class,   * e.g., paper reference or book this class is based on.   *    * @return the technical information about this class   */  public TechnicalInformation getTechnicalInformation() {    TechnicalInformation 	result;        result = new TechnicalInformation(Type.TECHREPORT);    result.setValue(Field.AUTHOR, "J. Friedman and T. Hastie and R. Tibshirani");    result.setValue(Field.YEAR, "1998");    result.setValue(Field.TITLE, "Additive Logistic Regression: a Statistical View of Boosting");    result.setValue(Field.ADDRESS, "Stanford University");    result.setValue(Field.PS, "http://www-stat.stanford.edu/~jhf/ftp/boost.ps");        return result;  }  /**   * String describing default classifier.   *    * @return the default classifier classname   */  protected String defaultClassifierString() {        return "weka.classifiers.trees.DecisionStump";  }  /**   * Select only instances with weights that contribute to    * the specified quantile of the weight distribution   *   * @param data the input instances   * @param quantile the specified quantile eg 0.9 to select    * 90% of the weight mass   * @return the selected instances   */  protected Instances selectWeightQuantile(Instances data, double quantile) {     int numInstances = data.numInstances();    Instances trainData = new Instances(data, numInstances);    double [] weights = new double [numInstances];    double sumOfWeights = 0;    for (int i = 0; i < numInstances; i++) {      weights[i] = data.instance(i).weight();      sumOfWeights += weights[i];    }    double weightMassToSelect = sumOfWeights * quantile;    int [] sortedIndices = Utils.sort(weights);    // Select the instances    sumOfWeights = 0;    for (int i = numInstances-1; i >= 0; i--) {      Instance instance = (Instance)data.instance(sortedIndices[i]).copy();      trainData.add(instance);      sumOfWeights += weights[sortedIndices[i]];      if ((sumOfWeights > weightMassToSelect) && 	  (i > 0) && 	  (weights[sortedIndices[i]] != weights[sortedIndices[i-1]])) {	break;      }    }    if (m_Debug) {      System.err.println("Selected " + trainData.numInstances()			 + " out of " + numInstances);    }    return trainData;  }  /**   * Returns an enumeration describing the available options.   *   * @return an enumeration of all the available options.   */  public Enumeration listOptions() {    Vector newVector = new Vector(6);    newVector.addElement(new Option(	      "\tUse resampling instead of reweighting for boosting.",	      "Q", 0, "-Q"));    newVector.addElement(new Option(	      "\tPercentage of weight mass to base training on.\n"	      +"\t(default 100, reduce to around 90 speed up)",	      "P", 1, "-P <percent>"));    newVector.addElement(new Option(	      "\tNumber of folds for internal cross-validation.\n"	      +"\t(default 0 -- no cross-validation)",	      "F", 1, "-F <num>"));    newVector.addElement(new Option(	      "\tNumber of runs for internal cross-validation.\n"	      +"\t(default 1)",	      "R", 1, "-R <num>"));    newVector.addElement(new Option(	      "\tThreshold on the improvement of the likelihood.\n"	      +"\t(default -Double.MAX_VALUE)",	      "L", 1, "-L <num>"));    newVector.addElement(new Option(	      "\tShrinkage parameter.\n"	      +"\t(default 1)",	      "H", 1, "-H <num>"));    Enumeration enu = super.listOptions();    while (enu.hasMoreElements()) {      newVector.addElement(enu.nextElement());    }    return newVector.elements();  }  /**   * Parses a given list of options. <p/>   *    <!-- options-start -->   * Valid options are: <p/>   *    * <pre> -Q   *  Use resampling instead of reweighting for boosting.</pre>   *    * <pre> -P &lt;percent&gt;   *  Percentage of weight mass to base training on.   *  (default 100, reduce to around 90 speed up)</pre>   *    * <pre> -F &lt;num&gt;   *  Number of folds for internal cross-validation.   *  (default 0 -- no cross-validation)</pre>   *    * <pre> -R &lt;num&gt;   *  Number of runs for internal cross-validation.   *  (default 1)</pre>   *    * <pre> -L &lt;num&gt;   *  Threshold on the improvement of the likelihood.   *  (default -Double.MAX_VALUE)</pre>   *    * <pre> -H &lt;num&gt;   *  Shrinkage parameter.   *  (default 1)</pre>   *    * <pre> -S &lt;num&gt;   *  Random number seed.   *  (default 1)</pre>   *    * <pre> -I &lt;num&gt;   *  Number of iterations.   *  (default 10)</pre>   *    * <pre> -D   *  If set, classifier is run in debug mode and   *  may output additional info to the console</pre>   *    * <pre> -W   *  Full name of base classifier.   *  (default: weka.classifiers.trees.DecisionStump)</pre>   *    * <pre>    * Options specific to classifier weka.classifiers.trees.DecisionStump:   * </pre>   *    * <pre> -D   *  If set, classifier is run in debug mode and   *  may output additional info to the console</pre>   *    <!-- options-end -->   *   * Options after -- are passed to the designated learner.<p>   *   * @param options the list of options as an array of strings   * @throws Exception if an option is not supported   */  public void setOptions(String[] options) throws Exception {
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -