📄 logitboost.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * LogitBoost.java * Copyright (C) 1999, 2002 Len Trigg, Eibe Frank * */package weka.classifiers.meta;import weka.classifiers.Classifier;import weka.classifiers.Evaluation;import weka.classifiers.RandomizableIteratedSingleClassifierEnhancer;import weka.classifiers.Sourcable;import weka.core.Attribute;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.WeightedInstancesHandler;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import java.util.Enumeration;import java.util.Random;import java.util.Vector;/** <!-- globalinfo-start --> * Class for performing additive logistic regression. <br/> * This class performs classification using a regression scheme as the base learner, and can handle multi-class problems. For more information, see<br/> * <br/> * J. Friedman, T. Hastie, R. Tibshirani (1998). Additive Logistic Regression: a Statistical View of Boosting. Stanford University.<br/> * <br/> * Can do efficient internal cross-validation to determine appropriate number of iterations. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * @techreport{Friedman1998, * address = {Stanford University}, * author = {J. Friedman and T. Hastie and R. Tibshirani}, * title = {Additive Logistic Regression: a Statistical View of Boosting}, * year = {1998}, * PS = {http://www-stat.stanford.edu/~jhf/ftp/boost.ps} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -Q * Use resampling instead of reweighting for boosting.</pre> * * <pre> -P <percent> * Percentage of weight mass to base training on. * (default 100, reduce to around 90 speed up)</pre> * * <pre> -F <num> * Number of folds for internal cross-validation. * (default 0 -- no cross-validation)</pre> * * <pre> -R <num> * Number of runs for internal cross-validation. * (default 1)</pre> * * <pre> -L <num> * Threshold on the improvement of the likelihood. * (default -Double.MAX_VALUE)</pre> * * <pre> -H <num> * Shrinkage parameter. * (default 1)</pre> * * <pre> -S <num> * Random number seed. * (default 1)</pre> * * <pre> -I <num> * Number of iterations. * (default 10)</pre> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * * <pre> -W * Full name of base classifier. * (default: weka.classifiers.trees.DecisionStump)</pre> * * <pre> * Options specific to classifier weka.classifiers.trees.DecisionStump: * </pre> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * <!-- options-end --> * * Options after -- are passed to the designated learner.<p> * * @author Len Trigg (trigg@cs.waikato.ac.nz) * @author Eibe Frank (eibe@cs.waikato.ac.nz) * @version $Revision: 1.37 $ */public class LogitBoost extends RandomizableIteratedSingleClassifierEnhancer implements Sourcable, WeightedInstancesHandler, TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = -3905660358715833753L; /** Array for storing the generated base classifiers. Note: we are hiding the variable from IteratedSingleClassifierEnhancer*/ protected Classifier [][] m_Classifiers; /** The number of classes */ protected int m_NumClasses; /** The number of successfully generated base classifiers. */ protected int m_NumGenerated; /** The number of folds for the internal cross-validation. */ protected int m_NumFolds = 0; /** The number of runs for the internal cross-validation. */ protected int m_NumRuns = 1; /** Weight thresholding. The percentage of weight mass used in training */ protected int m_WeightThreshold = 100; /** A threshold for responses (Friedman suggests between 2 and 4) */ protected static final double Z_MAX = 3; /** Dummy dataset with a numeric class */ protected Instances m_NumericClassData; /** The actual class attribute (for getting class names) */ protected Attribute m_ClassAttribute; /** Use boosting with reweighting? */ protected boolean m_UseResampling; /** The threshold on the improvement of the likelihood */ protected double m_Precision = -Double.MAX_VALUE; /** The value of the shrinkage parameter */ protected double m_Shrinkage = 1; /** The random number generator used */ protected Random m_RandomInstance = null; /** The value by which the actual target value for the true class is offset. */ protected double m_Offset = 0.0; /** * Returns a string describing classifier * @return a description suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Class for performing additive logistic regression. \n" + "This class performs classification using a regression scheme as the " + "base learner, and can handle multi-class problems. For more " + "information, see\n\n" + getTechnicalInformation().toString() + "\n\n" + "Can do efficient internal cross-validation to determine " + "appropriate number of iterations."; } /** * Constructor. */ public LogitBoost() { m_Classifier = new weka.classifiers.trees.DecisionStump(); } /** * Returns an instance of a TechnicalInformation object, containing * detailed information about the technical background of this class, * e.g., paper reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.TECHREPORT); result.setValue(Field.AUTHOR, "J. Friedman and T. Hastie and R. Tibshirani"); result.setValue(Field.YEAR, "1998"); result.setValue(Field.TITLE, "Additive Logistic Regression: a Statistical View of Boosting"); result.setValue(Field.ADDRESS, "Stanford University"); result.setValue(Field.PS, "http://www-stat.stanford.edu/~jhf/ftp/boost.ps"); return result; } /** * String describing default classifier. * * @return the default classifier classname */ protected String defaultClassifierString() { return "weka.classifiers.trees.DecisionStump"; } /** * Select only instances with weights that contribute to * the specified quantile of the weight distribution * * @param data the input instances * @param quantile the specified quantile eg 0.9 to select * 90% of the weight mass * @return the selected instances */ protected Instances selectWeightQuantile(Instances data, double quantile) { int numInstances = data.numInstances(); Instances trainData = new Instances(data, numInstances); double [] weights = new double [numInstances]; double sumOfWeights = 0; for (int i = 0; i < numInstances; i++) { weights[i] = data.instance(i).weight(); sumOfWeights += weights[i]; } double weightMassToSelect = sumOfWeights * quantile; int [] sortedIndices = Utils.sort(weights); // Select the instances sumOfWeights = 0; for (int i = numInstances-1; i >= 0; i--) { Instance instance = (Instance)data.instance(sortedIndices[i]).copy(); trainData.add(instance); sumOfWeights += weights[sortedIndices[i]]; if ((sumOfWeights > weightMassToSelect) && (i > 0) && (weights[sortedIndices[i]] != weights[sortedIndices[i-1]])) { break; } } if (m_Debug) { System.err.println("Selected " + trainData.numInstances() + " out of " + numInstances); } return trainData; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector newVector = new Vector(6); newVector.addElement(new Option( "\tUse resampling instead of reweighting for boosting.", "Q", 0, "-Q")); newVector.addElement(new Option( "\tPercentage of weight mass to base training on.\n" +"\t(default 100, reduce to around 90 speed up)", "P", 1, "-P <percent>")); newVector.addElement(new Option( "\tNumber of folds for internal cross-validation.\n" +"\t(default 0 -- no cross-validation)", "F", 1, "-F <num>")); newVector.addElement(new Option( "\tNumber of runs for internal cross-validation.\n" +"\t(default 1)", "R", 1, "-R <num>")); newVector.addElement(new Option( "\tThreshold on the improvement of the likelihood.\n" +"\t(default -Double.MAX_VALUE)", "L", 1, "-L <num>")); newVector.addElement(new Option( "\tShrinkage parameter.\n" +"\t(default 1)", "H", 1, "-H <num>")); Enumeration enu = super.listOptions(); while (enu.hasMoreElements()) { newVector.addElement(enu.nextElement()); } return newVector.elements(); } /** * Parses a given list of options. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -Q * Use resampling instead of reweighting for boosting.</pre> * * <pre> -P <percent> * Percentage of weight mass to base training on. * (default 100, reduce to around 90 speed up)</pre> * * <pre> -F <num> * Number of folds for internal cross-validation. * (default 0 -- no cross-validation)</pre> * * <pre> -R <num> * Number of runs for internal cross-validation. * (default 1)</pre> * * <pre> -L <num> * Threshold on the improvement of the likelihood. * (default -Double.MAX_VALUE)</pre> * * <pre> -H <num> * Shrinkage parameter. * (default 1)</pre> * * <pre> -S <num> * Random number seed. * (default 1)</pre> * * <pre> -I <num> * Number of iterations. * (default 10)</pre> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * * <pre> -W * Full name of base classifier. * (default: weka.classifiers.trees.DecisionStump)</pre> * * <pre> * Options specific to classifier weka.classifiers.trees.DecisionStump: * </pre> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * <!-- options-end --> * * Options after -- are passed to the designated learner.<p> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -