⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 linearregression.java

📁 MacroWeka扩展了著名数据挖掘工具weka
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    LinearRegression.java
 *    Copyright (C) 1999 Eibe Frank,Len Trigg
 *
 */

package weka.classifiers.functions;

import weka.classifiers.Classifier;
import weka.classifiers.Evaluation;
import weka.filters.supervised.attribute.NominalToBinary;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;
import weka.filters.Filter;
import java.io.*;
import java.util.*;
import weka.core.*;

/**
 * Class for using linear regression for prediction. Uses the Akaike 
 * criterion for model selection, and is able to deal with weighted
 * instances. <p>
 *
 * Valid options are:<p>
 *
 * -D <br>
 * Produce debugging output. <p>
 *
 * -S num <br>
 * Set the attriute selection method to use. 1 = None, 2 = Greedy
 * (default 0 = M5' method) <p>
 *
 * -C <br>
 * Do not try to eliminate colinear attributes <p>
 *
 * -R num <br>
 * The ridge parameter (default 1.0e-8) <p>
 *
 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
 * @author Len Trigg (trigg@cs.waikato.ac.nz)
 * @version $Revision: 1.1 $
 */
public class LinearRegression extends Classifier implements OptionHandler,
  WeightedInstancesHandler {

  /** Array for storing coefficients of linear regression. */
  private double[] m_Coefficients;

  /** Which attributes are relevant? */
  private boolean[] m_SelectedAttributes;

  /** Variable for storing transformed training data. */
  private Instances m_TransformedData;

  /** The filter for removing missing values. */
  private ReplaceMissingValues m_MissingFilter;

  /** The filter storing the transformation from nominal to 
      binary attributes. */
  private NominalToBinary m_TransformFilter;

  /** The standard deviations of the class attribute */
  private double m_ClassStdDev;

  /** The mean of the class attribute */
  private double m_ClassMean;

  /** The index of the class attribute */
  private int m_ClassIndex;

  /** The attributes means */
  private double[] m_Means;

  /** The attribute standard deviations */
  private double[] m_StdDevs;

  /** True if debug output will be printed */
  private boolean b_Debug;

  /** The current attribute selection method */
  private int m_AttributeSelection;

  /* Attribute selection methods */
  public static final int SELECTION_M5 = 0;
  public static final int SELECTION_NONE = 1;
  public static final int SELECTION_GREEDY = 2;
  public static final Tag [] TAGS_SELECTION = {
    new Tag(SELECTION_NONE, "No attribute selection"),
    new Tag(SELECTION_M5, "M5 method"),
    new Tag(SELECTION_GREEDY, "Greedy method")
  };

  /** Try to eliminate correlated attributes? */
  private boolean m_EliminateColinearAttributes = true;

  /** Turn off all checks and conversions? */
  private boolean m_checksTurnedOff = false;

  /** The ridge parameter */
  private double m_Ridge = 1.0e-8;

  /**
   * Turns off checks for missing values, etc. Use with caution.
   * Also turns off scaling.
   */
  public void turnChecksOff() {

    m_checksTurnedOff = true;
  }

  /**
   * Turns on checks for missing values, etc. Also turns
   * on scaling.
   */
  public void turnChecksOn() {

    m_checksTurnedOff = false;
  }

  /**
   * Returns a string describing this classifier
   * @return a description of the classifier suitable for
   * displaying in the explorer/experimenter gui
   */
  public String globalInfo() {
    return "Class for using linear regression for prediction. Uses the Akaike "
      +"criterion for model selection, and is able to deal with weighted "
      +"instances.";
  }

  /**
   * Builds a regression model for the given data.
   *
   * @param data the training data to be used for generating the
   * linear regression function
   * @exception Exception if the classifier could not be built successfully
   */
  public void buildClassifier(Instances data) throws Exception {
  
    if (!m_checksTurnedOff) {
      if (!data.classAttribute().isNumeric()) {
	throw new UnsupportedClassTypeException("Class attribute has to be numeric for regression!");
      }
      if (data.numInstances() == 0) {
	throw new Exception("No instances in training file!");
      }
      if (data.checkForStringAttributes()) {
	throw new UnsupportedAttributeTypeException("Cannot handle string attributes!");
      }
    }

    // Preprocess instances
    if (!m_checksTurnedOff) {
      m_TransformFilter = new NominalToBinary();
      m_TransformFilter.setInputFormat(data);
      data = Filter.useFilter(data, m_TransformFilter);
      m_MissingFilter = new ReplaceMissingValues();
      m_MissingFilter.setInputFormat(data);
      data = Filter.useFilter(data, m_MissingFilter);
      data.deleteWithMissingClass();
    } else {
      m_TransformFilter = null;
      m_MissingFilter = null;
    }

    m_ClassIndex = data.classIndex();
    m_TransformedData = data;

    // Turn all attributes on for a start
    m_SelectedAttributes = new boolean[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
      if (i != m_ClassIndex) {
	m_SelectedAttributes[i] = true;
      }
    }
    m_Coefficients = null;

    // Compute means and standard deviations
    m_Means = new double[data.numAttributes()];
    m_StdDevs = new double[data.numAttributes()];
    for (int j = 0; j < data.numAttributes(); j++) {
      if (j != data.classIndex()) {
	m_Means[j] = data.meanOrMode(j);
	m_StdDevs[j] = Math.sqrt(data.variance(j));
	if (m_StdDevs[j] == 0) {
	  m_SelectedAttributes[j] = false;
	} 
      }
    }

    m_ClassStdDev = Math.sqrt(data.variance(m_TransformedData.classIndex()));
    m_ClassMean = data.meanOrMode(m_TransformedData.classIndex());

    // Perform the regression
    findBestModel();

    // Save memory
    m_TransformedData = new Instances(data, 0);
  }

  /**
   * Classifies the given instance using the linear regression function.
   *
   * @param instance the test instance
   * @return the classification
   * @exception Exception if classification can't be done successfully
   */
  public double classifyInstance(Instance instance) throws Exception {

    // Transform the input instance
    Instance transformedInstance = instance;
    if (!m_checksTurnedOff) {
      m_TransformFilter.input(transformedInstance);
      m_TransformFilter.batchFinished();
      transformedInstance = m_TransformFilter.output();
      m_MissingFilter.input(transformedInstance);
      m_MissingFilter.batchFinished();
      transformedInstance = m_MissingFilter.output();
    }

    // Calculate the dependent variable from the regression model
    return regressionPrediction(transformedInstance,
				m_SelectedAttributes,
				m_Coefficients);
  }

  /**
   * Outputs the linear regression model as a string.
   */
  public String toString() {

    if (m_TransformedData == null) {
      return "Linear Regression: No model built yet.";
    }
    try {
      StringBuffer text = new StringBuffer();
      int column = 0;
      boolean first = true;
      
      text.append("\nLinear Regression Model\n\n");
      
      text.append(m_TransformedData.classAttribute().name()+" =\n\n");
      for (int i = 0; i < m_TransformedData.numAttributes(); i++) {
	if ((i != m_ClassIndex) 
	    && (m_SelectedAttributes[i])) {
	  if (!first) 
	    text.append(" +\n");
	  else
	    first = false;
	  text.append(Utils.doubleToString(m_Coefficients[column], 12, 4)
		      + " * ");
	  text.append(m_TransformedData.attribute(i).name());
	  column++;
	}
      }
      text.append(" +\n" + 
		  Utils.doubleToString(m_Coefficients[column], 12, 4));
      return text.toString();
    } catch (Exception e) {
      return "Can't print Linear Regression!";
    }
  }

  /**
   * Returns an enumeration describing the available options.
   *
   * @return an enumeration of all the available options.
   */
  public Enumeration listOptions() {
    
    Vector newVector = new Vector(4);
    newVector.addElement(new Option("\tProduce debugging output.\n"
				    + "\t(default no debugging output)",
				    "D", 0, "-D"));
    newVector.addElement(new Option("\tSet the attribute selection method"
				    + " to use. 1 = None, 2 = Greedy.\n"
				    + "\t(default 0 = M5' method)",
				    "S", 1, "-S <number of selection method>"));
    newVector.addElement(new Option("\tDo not try to eliminate colinear"
				    + " attributes.\n",
				    "C", 0, "-C"));
    newVector.addElement(new Option("\tSet ridge parameter (default 1.0e-8).\n",
				    "R", 1, "-R <double>"));
    return newVector.elements();
  }

  /**
   * Parses a given list of options. Valid options are:<p>
   *
   * -D <br>
   * Produce debugging output. <p>
   *
   * -S num <br>
   * Set the attriute selection method to use. 1 = None, 2 = Greedy
   * (default 0 = M5' method) <p>
   *
   * -C <br>
   * Do not try to eliminate colinear attributes <p>
   *
   * -R num <br>
   * The ridge parameter (default 1.0e-8) <p>
   *
   * @param options the list of options as an array of strings
   * @exception Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {

    String selectionString = Utils.getOption('S', options);
    if (selectionString.length() != 0) {
      setAttributeSelectionMethod(new SelectedTag(Integer
						  .parseInt(selectionString),
						  TAGS_SELECTION));
    } else {
      setAttributeSelectionMethod(new SelectedTag(SELECTION_M5,
						  TAGS_SELECTION));
    }
    String ridgeString = Utils.getOption('R', options);
    if (ridgeString.length() != 0) {
      setRidge(new Double(ridgeString).doubleValue());
    } else {
      setRidge(1.0e-8);
    }
    setDebug(Utils.getFlag('D', options));
    setEliminateColinearAttributes(!Utils.getFlag('C', options));
  }

  /**
   * Returns the coefficients for this linear model.
   */
  public double[] coefficients() {

    double[] coefficients = new double[m_SelectedAttributes.length + 1];
    int counter = 0;
    for (int i = 0; i < m_SelectedAttributes.length; i++) {
      if ((m_SelectedAttributes[i]) && ((i != m_ClassIndex))) {
	coefficients[i] = m_Coefficients[counter++];
      }
    }
    coefficients[m_SelectedAttributes.length] = m_Coefficients[counter];
    return coefficients;
  }

  /**
   * Gets the current settings of the classifier.
   *
   * @return an array of strings suitable for passing to setOptions
   */
  public String [] getOptions() {

    String [] options = new String [6];
    int current = 0;

    options[current++] = "-S";
    options[current++] = "" + getAttributeSelectionMethod()
      .getSelectedTag().getID();
    if (getDebug()) {
      options[current++] = "-D";
    }
    if (!getEliminateColinearAttributes()) {
      options[current++] = "-C";
    }
    options[current++] = "-R";
    options[current++] = "" + getRidge();

    while (current < options.length) {
      options[current++] = "";
    }
    return options;
  }
  
  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String ridgeTipText() {
    return "The value of the Ridge parameter.";
  }

  /**
   * Get the value of Ridge.
   *
   * @return Value of Ridge.
   */
  public double getRidge() {
    
    return m_Ridge;
  }
  
  /**
   * Set the value of Ridge.
   *
   * @param newRidge Value to assign to Ridge.
   */
  public void setRidge(double newRidge) {
    
    m_Ridge = newRidge;
  }
  
  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String eliminateColinearAttributesTipText() {
    return "Eliminate colinear attributes.";
  }

  /**
   * Get the value of EliminateColinearAttributes.
   *
   * @return Value of EliminateColinearAttributes.
   */
  public boolean getEliminateColinearAttributes() {
    
    return m_EliminateColinearAttributes;
  }
  
  /**
   * Set the value of EliminateColinearAttributes.

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -