📄 linearregression.java
字号:
*
* @param newEliminateColinearAttributes Value to assign to EliminateColinearAttributes.
*/
public void setEliminateColinearAttributes(boolean newEliminateColinearAttributes) {
m_EliminateColinearAttributes = newEliminateColinearAttributes;
}
/**
* Get the number of coefficients used in the model
*
* @return the number of coefficients
*/
public int numParameters()
{
return m_Coefficients.length-1;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String attributeSelectionMethodTipText() {
return "Set the method used to select attributes for use in the linear "
+"regression. Available methods are: no attribute selection, attribute "
+"selection using M5's method (step through the attributes removing the one "
+"with the smallest standardised coefficient until no improvement is observed "
+"in the estimate of the error given by the Akaike "
+"information criterion), and a greedy selection using the Akaike information "
+"metric.";
}
/**
* Sets the method used to select attributes for use in the
* linear regression.
*
* @param method the attribute selection method to use.
*/
public void setAttributeSelectionMethod(SelectedTag method) {
if (method.getTags() == TAGS_SELECTION) {
m_AttributeSelection = method.getSelectedTag().getID();
}
}
/**
* Gets the method used to select attributes for use in the
* linear regression.
*
* @return the method to use.
*/
public SelectedTag getAttributeSelectionMethod() {
return new SelectedTag(m_AttributeSelection, TAGS_SELECTION);
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String debugTipText() {
return "Outputs debug information to the console.";
}
/**
* Controls whether debugging output will be printed
*
* @param debug true if debugging output should be printed
*/
public void setDebug(boolean debug) {
b_Debug = debug;
}
/**
* Controls whether debugging output will be printed
*
* @param debug true if debugging output should be printed
*/
public boolean getDebug() {
return b_Debug;
}
/**
* Removes the attribute with the highest standardised coefficient
* greater than 1.5 from the selected attributes.
*
* @param selectedAttributes an array of flags indicating which
* attributes are included in the regression model
* @param coefficients an array of coefficients for the regression
* model
* @return true if an attribute was removed
*/
private boolean deselectColinearAttributes(boolean [] selectedAttributes,
double [] coefficients) {
double maxSC = 1.5;
int maxAttr = -1, coeff = 0;
for (int i = 0; i < selectedAttributes.length; i++) {
if (selectedAttributes[i]) {
double SC = Math.abs(coefficients[coeff] * m_StdDevs[i]
/ m_ClassStdDev);
if (SC > maxSC) {
maxSC = SC;
maxAttr = i;
}
coeff++;
}
}
if (maxAttr >= 0) {
selectedAttributes[maxAttr] = false;
if (b_Debug) {
System.out.println("Deselected colinear attribute:" + (maxAttr + 1)
+ " with standardised coefficient: " + maxSC);
}
return true;
}
return false;
}
/**
* Performs a greedy search for the best regression model using
* Akaike's criterion.
*
* @exception Exception if regression can't be done
*/
private void findBestModel() throws Exception {
// For the weighted case we still use numInstances in
// the calculation of the Akaike criterion.
int numInstances = m_TransformedData.numInstances();
if (b_Debug) {
System.out.println((new Instances(m_TransformedData, 0)).toString());
}
// Perform a regression for the full model, and remove colinear attributes
do {
m_Coefficients = doRegression(m_SelectedAttributes);
} while (m_EliminateColinearAttributes &&
deselectColinearAttributes(m_SelectedAttributes, m_Coefficients));
// Figure out current number of attributes + 1. (We treat this model
// as the full model for the Akaike-based methods.)
int numAttributes = 1;
for (int i = 0; i < m_SelectedAttributes.length; i++) {
if (m_SelectedAttributes[i]) {
numAttributes++;
}
}
double fullMSE = calculateSE(m_SelectedAttributes, m_Coefficients);
double akaike = (numInstances - numAttributes) + 2 * numAttributes;
if (b_Debug) {
System.out.println("Initial Akaike value: " + akaike);
}
boolean improved;
int currentNumAttributes = numAttributes;
switch (m_AttributeSelection) {
case SELECTION_GREEDY:
// Greedy attribute removal
do {
boolean [] currentSelected = (boolean []) m_SelectedAttributes.clone();
improved = false;
currentNumAttributes--;
for (int i = 0; i < m_SelectedAttributes.length; i++) {
if (currentSelected[i]) {
// Calculate the akaike rating without this attribute
currentSelected[i] = false;
double [] currentCoeffs = doRegression(currentSelected);
double currentMSE = calculateSE(currentSelected, currentCoeffs);
double currentAkaike = currentMSE / fullMSE
* (numInstances - numAttributes)
+ 2 * currentNumAttributes;
if (b_Debug) {
System.out.println("(akaike: " + currentAkaike);
}
// If it is better than the current best
if (currentAkaike < akaike) {
if (b_Debug) {
System.err.println("Removing attribute " + (i + 1)
+ " improved Akaike: " + currentAkaike);
}
improved = true;
akaike = currentAkaike;
System.arraycopy(currentSelected, 0,
m_SelectedAttributes, 0,
m_SelectedAttributes.length);
m_Coefficients = currentCoeffs;
}
currentSelected[i] = true;
}
}
} while (improved);
break;
case SELECTION_M5:
// Step through the attributes removing the one with the smallest
// standardised coefficient until no improvement in Akaike
do {
improved = false;
currentNumAttributes--;
// Find attribute with smallest SC
double minSC = 0;
int minAttr = -1, coeff = 0;
for (int i = 0; i < m_SelectedAttributes.length; i++) {
if (m_SelectedAttributes[i]) {
double SC = Math.abs(m_Coefficients[coeff] * m_StdDevs[i]
/ m_ClassStdDev);
if ((coeff == 0) || (SC < minSC)) {
minSC = SC;
minAttr = i;
}
coeff++;
}
}
// See whether removing it improves the Akaike score
if (minAttr >= 0) {
m_SelectedAttributes[minAttr] = false;
double [] currentCoeffs = doRegression(m_SelectedAttributes);
double currentMSE = calculateSE(m_SelectedAttributes, currentCoeffs);
double currentAkaike = currentMSE / fullMSE
* (numInstances - numAttributes)
+ 2 * currentNumAttributes;
if (b_Debug) {
System.out.println("(akaike: " + currentAkaike);
}
// If it is better than the current best
if (currentAkaike < akaike) {
if (b_Debug) {
System.err.println("Removing attribute " + (minAttr + 1)
+ " improved Akaike: " + currentAkaike);
}
improved = true;
akaike = currentAkaike;
m_Coefficients = currentCoeffs;
} else {
m_SelectedAttributes[minAttr] = true;
}
}
} while (improved);
break;
case SELECTION_NONE:
break;
}
}
/**
* Calculate the squared error of a regression model on the
* training data
*
* @param selectedAttributes an array of flags indicating which
* attributes are included in the regression model
* @param coefficients an array of coefficients for the regression
* model
* @return the mean squared error on the training data
* @exception Exception if there is a missing class value in the training
* data
*/
private double calculateSE(boolean [] selectedAttributes,
double [] coefficients) throws Exception {
double mse = 0;
for (int i = 0; i < m_TransformedData.numInstances(); i++) {
double prediction = regressionPrediction(m_TransformedData.instance(i),
selectedAttributes,
coefficients);
double error = prediction - m_TransformedData.instance(i).classValue();
mse += error * error;
}
return mse;
}
/**
* Calculate the dependent value for a given instance for a
* given regression model.
*
* @param transformedInstance the input instance
* @param selectedAttributes an array of flags indicating which
* attributes are included in the regression model
* @param coefficients an array of coefficients for the regression
* model
* @return the regression value for the instance.
* @exception Exception if the class attribute of the input instance
* is not assigned
*/
private double regressionPrediction(Instance transformedInstance,
boolean [] selectedAttributes,
double [] coefficients)
throws Exception {
double result = 0;
int column = 0;
for (int j = 0; j < transformedInstance.numAttributes(); j++) {
if ((m_ClassIndex != j)
&& (selectedAttributes[j])) {
result += coefficients[column] * transformedInstance.value(j);
column++;
}
}
result += coefficients[column];
return result;
}
/**
* Calculate a linear regression using the selected attributes
*
* @param selectedAttributes an array of booleans where each element
* is true if the corresponding attribute should be included in the
* regression.
* @return an array of coefficients for the linear regression model.
* @exception Exception if an error occurred during the regression.
*/
private double [] doRegression(boolean [] selectedAttributes)
throws Exception {
if (b_Debug) {
System.out.print("doRegression(");
for (int i = 0; i < selectedAttributes.length; i++) {
System.out.print(" " + selectedAttributes[i]);
}
System.out.println(" )");
}
int numAttributes = 0;
for (int i = 0; i < selectedAttributes.length; i++) {
if (selectedAttributes[i]) {
numAttributes++;
}
}
// Check whether there are still attributes left
Matrix independent = null, dependent = null;
double[] weights = null;
if (numAttributes > 0) {
independent = new Matrix(m_TransformedData.numInstances(),
numAttributes);
dependent = new Matrix(m_TransformedData.numInstances(), 1);
for (int i = 0; i < m_TransformedData.numInstances(); i ++) {
Instance inst = m_TransformedData.instance(i);
int column = 0;
for (int j = 0; j < m_TransformedData.numAttributes(); j++) {
if (j == m_ClassIndex) {
dependent.setElement(i, 0, inst.classValue());
} else {
if (selectedAttributes[j]) {
double value = inst.value(j) - m_Means[j];
// We only need to do this if we want to
// scale the input
if (!m_checksTurnedOff) {
value /= m_StdDevs[j];
}
independent.setElement(i, column, value);
column++;
}
}
}
}
// Grab instance weights
weights = new double [m_TransformedData.numInstances()];
for (int i = 0; i < weights.length; i++) {
weights[i] = m_TransformedData.instance(i).weight();
}
}
// Compute coefficients (note that we have to treat the
// intercept separately so that it doesn't get affected
// by the ridge constant.)
double[] coefficients = new double[numAttributes + 1];
if (numAttributes > 0) {
double[] coeffsWithoutIntercept =
independent.regression(dependent, weights, m_Ridge);
System.arraycopy(coeffsWithoutIntercept, 0, coefficients, 0,
numAttributes);
}
coefficients[numAttributes] = m_ClassMean;
// Convert coefficients into original scale
int column = 0;
for(int i = 0; i < m_TransformedData.numAttributes(); i++) {
if ((i != m_TransformedData.classIndex()) &&
(selectedAttributes[i])) {
// We only need to do this if we have scaled the
// input.
if (!m_checksTurnedOff) {
coefficients[column] /= m_StdDevs[i];
}
// We have centred the input
coefficients[coefficients.length - 1] -=
coefficients[column] * m_Means[i];
column++;
}
}
return coefficients;
}
/**
* Generates a linear regression function predictor.
*
* @param String the options
*/
public static void main(String argv[]) {
try {
System.out.println(Evaluation.evaluateModel(new LinearRegression(),
argv));
} catch (Exception e) {
e.printStackTrace();
System.out.println(e.getMessage());
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -