📄 linearregression.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* LinearRegression.java
* Copyright (C) 1999 Eibe Frank,Len Trigg
*
*/
package weka.classifiers.functions;
import weka.classifiers.Classifier;
import weka.classifiers.Evaluation;
import weka.filters.supervised.attribute.NominalToBinary;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;
import weka.filters.Filter;
import java.io.*;
import java.util.*;
import weka.core.*;
/**
* Class for using linear regression for prediction. Uses the Akaike
* criterion for model selection, and is able to deal with weighted
* instances. <p>
*
* Valid options are:<p>
*
* -D <br>
* Produce debugging output. <p>
*
* -S num <br>
* Set the attriute selection method to use. 1 = None, 2 = Greedy
* (default 0 = M5' method) <p>
*
* -C <br>
* Do not try to eliminate colinear attributes <p>
*
* -R num <br>
* The ridge parameter (default 1.0e-8) <p>
*
* @author Eibe Frank (eibe@cs.waikato.ac.nz)
* @author Len Trigg (trigg@cs.waikato.ac.nz)
* @version $Revision: 1.1 $
*/
public class LinearRegression extends Classifier implements OptionHandler,
WeightedInstancesHandler {
/** Array for storing coefficients of linear regression. */
private double[] m_Coefficients;
/** Which attributes are relevant? */
private boolean[] m_SelectedAttributes;
/** Variable for storing transformed training data. */
private Instances m_TransformedData;
/** The filter for removing missing values. */
private ReplaceMissingValues m_MissingFilter;
/** The filter storing the transformation from nominal to
binary attributes. */
private NominalToBinary m_TransformFilter;
/** The standard deviations of the class attribute */
private double m_ClassStdDev;
/** The mean of the class attribute */
private double m_ClassMean;
/** The index of the class attribute */
private int m_ClassIndex;
/** The attributes means */
private double[] m_Means;
/** The attribute standard deviations */
private double[] m_StdDevs;
/** True if debug output will be printed */
private boolean b_Debug;
/** The current attribute selection method */
private int m_AttributeSelection;
/* Attribute selection methods */
public static final int SELECTION_M5 = 0;
public static final int SELECTION_NONE = 1;
public static final int SELECTION_GREEDY = 2;
public static final Tag [] TAGS_SELECTION = {
new Tag(SELECTION_NONE, "No attribute selection"),
new Tag(SELECTION_M5, "M5 method"),
new Tag(SELECTION_GREEDY, "Greedy method")
};
/** Try to eliminate correlated attributes? */
private boolean m_EliminateColinearAttributes = true;
/** Turn off all checks and conversions? */
private boolean m_checksTurnedOff = false;
/** The ridge parameter */
private double m_Ridge = 1.0e-8;
/**
* Turns off checks for missing values, etc. Use with caution.
* Also turns off scaling.
*/
public void turnChecksOff() {
m_checksTurnedOff = true;
}
/**
* Turns on checks for missing values, etc. Also turns
* on scaling.
*/
public void turnChecksOn() {
m_checksTurnedOff = false;
}
/**
* Returns a string describing this classifier
* @return a description of the classifier suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "Class for using linear regression for prediction. Uses the Akaike "
+"criterion for model selection, and is able to deal with weighted "
+"instances.";
}
/**
* Builds a regression model for the given data.
*
* @param data the training data to be used for generating the
* linear regression function
* @exception Exception if the classifier could not be built successfully
*/
public void buildClassifier(Instances data) throws Exception {
if (!m_checksTurnedOff) {
if (!data.classAttribute().isNumeric()) {
throw new UnsupportedClassTypeException("Class attribute has to be numeric for regression!");
}
if (data.numInstances() == 0) {
throw new Exception("No instances in training file!");
}
if (data.checkForStringAttributes()) {
throw new UnsupportedAttributeTypeException("Cannot handle string attributes!");
}
}
// Preprocess instances
if (!m_checksTurnedOff) {
m_TransformFilter = new NominalToBinary();
m_TransformFilter.setInputFormat(data);
data = Filter.useFilter(data, m_TransformFilter);
m_MissingFilter = new ReplaceMissingValues();
m_MissingFilter.setInputFormat(data);
data = Filter.useFilter(data, m_MissingFilter);
data.deleteWithMissingClass();
} else {
m_TransformFilter = null;
m_MissingFilter = null;
}
m_ClassIndex = data.classIndex();
m_TransformedData = data;
// Turn all attributes on for a start
m_SelectedAttributes = new boolean[data.numAttributes()];
for (int i = 0; i < data.numAttributes(); i++) {
if (i != m_ClassIndex) {
m_SelectedAttributes[i] = true;
}
}
m_Coefficients = null;
// Compute means and standard deviations
m_Means = new double[data.numAttributes()];
m_StdDevs = new double[data.numAttributes()];
for (int j = 0; j < data.numAttributes(); j++) {
if (j != data.classIndex()) {
m_Means[j] = data.meanOrMode(j);
m_StdDevs[j] = Math.sqrt(data.variance(j));
if (m_StdDevs[j] == 0) {
m_SelectedAttributes[j] = false;
}
}
}
m_ClassStdDev = Math.sqrt(data.variance(m_TransformedData.classIndex()));
m_ClassMean = data.meanOrMode(m_TransformedData.classIndex());
// Perform the regression
findBestModel();
// Save memory
m_TransformedData = new Instances(data, 0);
}
/**
* Classifies the given instance using the linear regression function.
*
* @param instance the test instance
* @return the classification
* @exception Exception if classification can't be done successfully
*/
public double classifyInstance(Instance instance) throws Exception {
// Transform the input instance
Instance transformedInstance = instance;
if (!m_checksTurnedOff) {
m_TransformFilter.input(transformedInstance);
m_TransformFilter.batchFinished();
transformedInstance = m_TransformFilter.output();
m_MissingFilter.input(transformedInstance);
m_MissingFilter.batchFinished();
transformedInstance = m_MissingFilter.output();
}
// Calculate the dependent variable from the regression model
return regressionPrediction(transformedInstance,
m_SelectedAttributes,
m_Coefficients);
}
/**
* Outputs the linear regression model as a string.
*/
public String toString() {
if (m_TransformedData == null) {
return "Linear Regression: No model built yet.";
}
try {
StringBuffer text = new StringBuffer();
int column = 0;
boolean first = true;
text.append("\nLinear Regression Model\n\n");
text.append(m_TransformedData.classAttribute().name()+" =\n\n");
for (int i = 0; i < m_TransformedData.numAttributes(); i++) {
if ((i != m_ClassIndex)
&& (m_SelectedAttributes[i])) {
if (!first)
text.append(" +\n");
else
first = false;
text.append(Utils.doubleToString(m_Coefficients[column], 12, 4)
+ " * ");
text.append(m_TransformedData.attribute(i).name());
column++;
}
}
text.append(" +\n" +
Utils.doubleToString(m_Coefficients[column], 12, 4));
return text.toString();
} catch (Exception e) {
return "Can't print Linear Regression!";
}
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector newVector = new Vector(4);
newVector.addElement(new Option("\tProduce debugging output.\n"
+ "\t(default no debugging output)",
"D", 0, "-D"));
newVector.addElement(new Option("\tSet the attribute selection method"
+ " to use. 1 = None, 2 = Greedy.\n"
+ "\t(default 0 = M5' method)",
"S", 1, "-S <number of selection method>"));
newVector.addElement(new Option("\tDo not try to eliminate colinear"
+ " attributes.\n",
"C", 0, "-C"));
newVector.addElement(new Option("\tSet ridge parameter (default 1.0e-8).\n",
"R", 1, "-R <double>"));
return newVector.elements();
}
/**
* Parses a given list of options. Valid options are:<p>
*
* -D <br>
* Produce debugging output. <p>
*
* -S num <br>
* Set the attriute selection method to use. 1 = None, 2 = Greedy
* (default 0 = M5' method) <p>
*
* -C <br>
* Do not try to eliminate colinear attributes <p>
*
* -R num <br>
* The ridge parameter (default 1.0e-8) <p>
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
String selectionString = Utils.getOption('S', options);
if (selectionString.length() != 0) {
setAttributeSelectionMethod(new SelectedTag(Integer
.parseInt(selectionString),
TAGS_SELECTION));
} else {
setAttributeSelectionMethod(new SelectedTag(SELECTION_M5,
TAGS_SELECTION));
}
String ridgeString = Utils.getOption('R', options);
if (ridgeString.length() != 0) {
setRidge(new Double(ridgeString).doubleValue());
} else {
setRidge(1.0e-8);
}
setDebug(Utils.getFlag('D', options));
setEliminateColinearAttributes(!Utils.getFlag('C', options));
}
/**
* Returns the coefficients for this linear model.
*/
public double[] coefficients() {
double[] coefficients = new double[m_SelectedAttributes.length + 1];
int counter = 0;
for (int i = 0; i < m_SelectedAttributes.length; i++) {
if ((m_SelectedAttributes[i]) && ((i != m_ClassIndex))) {
coefficients[i] = m_Coefficients[counter++];
}
}
coefficients[m_SelectedAttributes.length] = m_Coefficients[counter];
return coefficients;
}
/**
* Gets the current settings of the classifier.
*
* @return an array of strings suitable for passing to setOptions
*/
public String [] getOptions() {
String [] options = new String [6];
int current = 0;
options[current++] = "-S";
options[current++] = "" + getAttributeSelectionMethod()
.getSelectedTag().getID();
if (getDebug()) {
options[current++] = "-D";
}
if (!getEliminateColinearAttributes()) {
options[current++] = "-C";
}
options[current++] = "-R";
options[current++] = "" + getRidge();
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String ridgeTipText() {
return "The value of the Ridge parameter.";
}
/**
* Get the value of Ridge.
*
* @return Value of Ridge.
*/
public double getRidge() {
return m_Ridge;
}
/**
* Set the value of Ridge.
*
* @param newRidge Value to assign to Ridge.
*/
public void setRidge(double newRidge) {
m_Ridge = newRidge;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String eliminateColinearAttributesTipText() {
return "Eliminate colinear attributes.";
}
/**
* Get the value of EliminateColinearAttributes.
*
* @return Value of EliminateColinearAttributes.
*/
public boolean getEliminateColinearAttributes() {
return m_EliminateColinearAttributes;
}
/**
* Set the value of EliminateColinearAttributes.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -