logistic.java

来自「Java 编写的多种数据挖掘算法包括聚类、分类、预处理等」· Java 代码 · 共 799 行 · 第 1/2 页
JAVA
799 行
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    Logistic.java *    Copyright (C) 2003 Xin Xu * */package weka.classifiers.functions;import weka.classifiers.Classifier;import weka.classifiers.Evaluation;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.Optimization;import weka.core.Option;import weka.core.OptionHandler;import weka.core.TechnicalInformation;import weka.core.TechnicalInformation.Type;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.WeightedInstancesHandler;import weka.core.Capabilities.Capability;import weka.filters.Filter;import weka.filters.unsupervised.attribute.NominalToBinary;import weka.filters.unsupervised.attribute.RemoveUseless;import weka.filters.unsupervised.attribute.ReplaceMissingValues;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * Class for building and using a multinomial logistic regression model with a ridge estimator.<br/> * <br/> * There are some modifications, however, compared to the paper of leCessie and van Houwelingen(1992): <br/> * <br/> * If there are k classes for n instances with m attributes, the parameter matrix B to be calculated will be an m*(k-1) matrix.<br/> * <br/> * The probability for class j with the exception of the last class is<br/> * <br/> * Pj(Xi) = exp(XiBj)/((sum[j=1..(k-1)]exp(Xi*Bj))+1) <br/> * <br/> * The last class has probability<br/> * <br/> * 1-(sum[j=1..(k-1)]Pj(Xi)) <br/> * 	= 1/((sum[j=1..(k-1)]exp(Xi*Bj))+1)<br/> * <br/> * The (negative) multinomial log-likelihood is thus: <br/> * <br/> * L = -sum[i=1..n]{<br/> * 	sum[j=1..(k-1)](Yij * ln(Pj(Xi)))<br/> * 	+(1 - (sum[j=1..(k-1)]Yij)) <br/> * 	* ln(1 - sum[j=1..(k-1)]Pj(Xi))<br/> * 	} + ridge * (B^2)<br/> * <br/> * In order to find the matrix B for which L is minimised, a Quasi-Newton Method is used to search for the optimized values of the m*(k-1) variables.  Note that before we use the optimization procedure, we 'squeeze' the matrix B into a m*(k-1) vector.  For details of the optimization procedure, please check weka.core.Optimization class.<br/> * <br/> * Although original Logistic Regression does not deal with instance weights, we modify the algorithm a little bit to handle the instance weights.<br/> * <br/> * For more information see:<br/> * <br/> * le Cessie, S., van Houwelingen, J.C. (1992). Ridge Estimators in Logistic Regression. Applied Statistics. 41(1):191-201.<br/> * <br/> * Note: Missing values are replaced using a ReplaceMissingValuesFilter, and nominal attributes are transformed into numeric attributes using a NominalToBinaryFilter. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * &#64;article{leCessie1992, *    author = {le Cessie, S. and van Houwelingen, J.C.}, *    journal = {Applied Statistics}, *    number = {1}, *    pages = {191-201}, *    title = {Ridge Estimators in Logistic Regression}, *    volume = {41}, *    year = {1992} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -D *  Turn on debugging output.</pre> *  * <pre> -R &lt;ridge&gt; *  Set the ridge in the log-likelihood.</pre> *  * <pre> -M &lt;number&gt; *  Set the maximum number of iterations (default -1, until convergence).</pre> *  <!-- options-end --> * * @author Xin Xu (xx5@cs.waikato.ac.nz) * @version $Revision: 1.35 $ */public class Logistic extends Classifier   implements OptionHandler, WeightedInstancesHandler, TechnicalInformationHandler {    /** for serialization */  static final long serialVersionUID = 3932117032546553727L;    /** The coefficients (optimized parameters) of the model */  protected double [][] m_Par;      /** The data saved as a matrix */  protected double [][] m_Data;      /** The number of attributes in the model */  protected int m_NumPredictors;      /** The index of the class attribute */  protected int m_ClassIndex;      /** The number of the class labels */  protected int m_NumClasses;      /** The ridge parameter. */  protected double m_Ridge = 1e-8;      /** An attribute filter */  private RemoveUseless m_AttFilter;      /** The filter used to make attributes numeric. */  private NominalToBinary m_NominalToBinary;      /** The filter used to get rid of missing values. */  private ReplaceMissingValues m_ReplaceMissingValues;      /** Debugging output */  protected boolean m_Debug;  /** Log-likelihood of the searched model */  protected double m_LL;      /** The maximum number of iterations. */  private int m_MaxIts = -1;      /**   * Returns a string describing this classifier   * @return a description of the classifier suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return "Class for building and using a multinomial logistic "      +"regression model with a ridge estimator.\n\n"      +"There are some modifications, however, compared to the paper of "      +"leCessie and van Houwelingen(1992): \n\n"       +"If there are k classes for n instances with m attributes, the "      +"parameter matrix B to be calculated will be an m*(k-1) matrix.\n\n"      +"The probability for class j with the exception of the last class is\n\n"      +"Pj(Xi) = exp(XiBj)/((sum[j=1..(k-1)]exp(Xi*Bj))+1) \n\n"      +"The last class has probability\n\n"      +"1-(sum[j=1..(k-1)]Pj(Xi)) \n\t= 1/((sum[j=1..(k-1)]exp(Xi*Bj))+1)\n\n"      +"The (negative) multinomial log-likelihood is thus: \n\n"      +"L = -sum[i=1..n]{\n\tsum[j=1..(k-1)](Yij * ln(Pj(Xi)))"      +"\n\t+(1 - (sum[j=1..(k-1)]Yij)) \n\t* ln(1 - sum[j=1..(k-1)]Pj(Xi))"      +"\n\t} + ridge * (B^2)\n\n"      +"In order to find the matrix B for which L is minimised, a "      +"Quasi-Newton Method is used to search for the optimized values of "      +"the m*(k-1) variables.  Note that before we use the optimization "      +"procedure, we 'squeeze' the matrix B into a m*(k-1) vector.  For "      +"details of the optimization procedure, please check "      +"weka.core.Optimization class.\n\n"      +"Although original Logistic Regression does not deal with instance "      +"weights, we modify the algorithm a little bit to handle the "      +"instance weights.\n\n"      +"For more information see:\n\n"      + getTechnicalInformation().toString() + "\n\n"      +"Note: Missing values are replaced using a ReplaceMissingValuesFilter, and "      +"nominal attributes are transformed into numeric attributes using a "      +"NominalToBinaryFilter.";  }  /**   * Returns an instance of a TechnicalInformation object, containing    * detailed information about the technical background of this class,   * e.g., paper reference or book this class is based on.   *    * @return the technical information about this class   */  public TechnicalInformation getTechnicalInformation() {    TechnicalInformation 	result;        result = new TechnicalInformation(Type.ARTICLE);    result.setValue(Field.AUTHOR, "le Cessie, S. and van Houwelingen, J.C.");    result.setValue(Field.YEAR, "1992");    result.setValue(Field.TITLE, "Ridge Estimators in Logistic Regression");    result.setValue(Field.JOURNAL, "Applied Statistics");    result.setValue(Field.VOLUME, "41");    result.setValue(Field.NUMBER, "1");    result.setValue(Field.PAGES, "191-201");        return result;  }  /**   * Returns an enumeration describing the available options   *   * @return an enumeration of all the available options   */  public Enumeration listOptions() {    Vector newVector = new Vector(3);    newVector.addElement(new Option("\tTurn on debugging output.",				    "D", 0, "-D"));    newVector.addElement(new Option("\tSet the ridge in the log-likelihood.",				    "R", 1, "-R <ridge>"));    newVector.addElement(new Option("\tSet the maximum number of iterations"+				    " (default -1, until convergence).",				    "M", 1, "-M <number>"));    return newVector.elements();  }      /**   * Parses a given list of options. <p/>   *   <!-- options-start -->   * Valid options are: <p/>   *    * <pre> -D   *  Turn on debugging output.</pre>   *    * <pre> -R &lt;ridge&gt;   *  Set the ridge in the log-likelihood.</pre>   *    * <pre> -M &lt;number&gt;   *  Set the maximum number of iterations (default -1, until convergence).</pre>   *    <!-- options-end -->   *   * @param options the list of options as an array of strings   * @throws Exception if an option is not supported   */  public void setOptions(String[] options) throws Exception {    setDebug(Utils.getFlag('D', options));    String ridgeString = Utils.getOption('R', options);    if (ridgeString.length() != 0)       m_Ridge = Double.parseDouble(ridgeString);    else       m_Ridge = 1.0e-8;	    String maxItsString = Utils.getOption('M', options);    if (maxItsString.length() != 0)       m_MaxIts = Integer.parseInt(maxItsString);    else       m_MaxIts = -1;  }      /**   * Gets the current settings of the classifier.   *   * @return an array of strings suitable for passing to setOptions   */  public String [] getOptions() {	    String [] options = new String [5];    int current = 0;	    if (getDebug())       options[current++] = "-D";    options[current++] = "-R";    options[current++] = ""+m_Ridge;	    options[current++] = "-M";    options[current++] = ""+m_MaxIts;    while (current < options.length)       options[current++] = "";    return options;  }     /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String debugTipText() {    return "Output debug information to the console.";  }  /**   * Sets whether debugging output will be printed.   *   * @param debug true if debugging output should be printed   */  public void setDebug(boolean debug) {    m_Debug = debug;  }      /**   * Gets whether debugging output will be printed.   *   * @return true if debugging output will be printed   */  public boolean getDebug() {    return m_Debug;  }        /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String ridgeTipText() {    return "Set the Ridge value in the log-likelihood.";  }  /**   * Sets the ridge in the log-likelihood.   *   * @param ridge the ridge   */  public void setRidge(double ridge) {    m_Ridge = ridge;  }      /**   * Gets the ridge in the log-likelihood.   *   * @return the ridge   */  public double getRidge() {    return m_Ridge;  }     /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String maxItsTipText() {    return "Maximum number of iterations to perform.";  }  /**   * Get the value of MaxIts.   *   * @return Value of MaxIts.   */  public int getMaxIts() {	    return m_MaxIts;  }      /**   * Set the value of MaxIts.   *   * @param newMaxIts Value to assign to MaxIts.   */  public void setMaxIts(int newMaxIts) {	    m_MaxIts = newMaxIts;  }          private class OptEng extends Optimization{    /** Weights of instances in the data */    private double[] weights;    /** Class labels of instances */    private int[] cls;	    /**      * Set the weights of instances     * @param w the weights to be set     */     public void setWeights(double[] w) {      weights = w;    }	    /**      * Set the class labels of instances     * @param c the class labels to be set     */     public void setClassLabels(int[] c) {      cls = c;    }	    /**      * Evaluate objective function     * @param x the current values of variables
logistic.java - 源码说明

本页面展示了「Java 编写的多种数据挖掘算法包括聚类、分类、预处理等」中的 logistic.java 源码文件，采用 Java 编程语言编写，共 799 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Java相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?