⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 linearregression.java

📁 MacroWeka扩展了著名数据挖掘工具weka
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
   *
   * @param newEliminateColinearAttributes Value to assign to EliminateColinearAttributes.
   */
  public void setEliminateColinearAttributes(boolean newEliminateColinearAttributes) {
    
    m_EliminateColinearAttributes = newEliminateColinearAttributes;
  }
  
  /**
   * Get the number of coefficients used in the model
   *
   * @return the number of coefficients
   */
  public int numParameters()
  {
    return m_Coefficients.length-1;
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String attributeSelectionMethodTipText() {
    return "Set the method used to select attributes for use in the linear "
      +"regression. Available methods are: no attribute selection, attribute "
      +"selection using M5's method (step through the attributes removing the one "
      +"with the smallest standardised coefficient until no improvement is observed "
      +"in the estimate of the error given by the Akaike "
      +"information criterion), and a greedy selection using the Akaike information "
      +"metric.";
  }

  /**
   * Sets the method used to select attributes for use in the
   * linear regression. 
   *
   * @param method the attribute selection method to use.
   */
  public void setAttributeSelectionMethod(SelectedTag method) {
    
    if (method.getTags() == TAGS_SELECTION) {
      m_AttributeSelection = method.getSelectedTag().getID();
    }
  }

  /**
   * Gets the method used to select attributes for use in the
   * linear regression. 
   *
   * @return the method to use.
   */
  public SelectedTag getAttributeSelectionMethod() {
    
    return new SelectedTag(m_AttributeSelection, TAGS_SELECTION);
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String debugTipText() {
    return "Outputs debug information to the console.";
  }

  /**
   * Controls whether debugging output will be printed
   *
   * @param debug true if debugging output should be printed
   */
  public void setDebug(boolean debug) {

    b_Debug = debug;
  }

  /**
   * Controls whether debugging output will be printed
   *
   * @param debug true if debugging output should be printed
   */
  public boolean getDebug() {

    return b_Debug;
  }

  /**
   * Removes the attribute with the highest standardised coefficient
   * greater than 1.5 from the selected attributes.
   *
   * @param selectedAttributes an array of flags indicating which 
   * attributes are included in the regression model
   * @param coefficients an array of coefficients for the regression
   * model
   * @return true if an attribute was removed
   */
  private boolean deselectColinearAttributes(boolean [] selectedAttributes,
					     double [] coefficients) {

    double maxSC = 1.5;
    int maxAttr = -1, coeff = 0;
    for (int i = 0; i < selectedAttributes.length; i++) {
      if (selectedAttributes[i]) {
	double SC = Math.abs(coefficients[coeff] * m_StdDevs[i] 
			     / m_ClassStdDev);
	if (SC > maxSC) {
	  maxSC = SC;
	  maxAttr = i;
	}
	coeff++;
      }
    }
    if (maxAttr >= 0) {
      selectedAttributes[maxAttr] = false;
      if (b_Debug) {
	System.out.println("Deselected colinear attribute:" + (maxAttr + 1)
			   + " with standardised coefficient: " + maxSC);
      }
      return true;
    }
    return false;
  }

  /**
   * Performs a greedy search for the best regression model using
   * Akaike's criterion.
   *
   * @exception Exception if regression can't be done
   */
  private void findBestModel() throws Exception {

    // For the weighted case we still use numInstances in
    // the calculation of the Akaike criterion. 
    int numInstances = m_TransformedData.numInstances();

    if (b_Debug) {
      System.out.println((new Instances(m_TransformedData, 0)).toString());
    }

    // Perform a regression for the full model, and remove colinear attributes
    do {
      m_Coefficients = doRegression(m_SelectedAttributes);
    } while (m_EliminateColinearAttributes && 
	     deselectColinearAttributes(m_SelectedAttributes, m_Coefficients));

    // Figure out current number of attributes + 1. (We treat this model
    // as the full model for the Akaike-based methods.)
    int numAttributes = 1;
    for (int i = 0; i < m_SelectedAttributes.length; i++) {
      if (m_SelectedAttributes[i]) {
	numAttributes++;
      }
    }

    double fullMSE = calculateSE(m_SelectedAttributes, m_Coefficients);
    double akaike = (numInstances - numAttributes) + 2 * numAttributes;
    if (b_Debug) {
      System.out.println("Initial Akaike value: " + akaike);
    }

    boolean improved;
    int currentNumAttributes = numAttributes;
    switch (m_AttributeSelection) {

    case SELECTION_GREEDY:

      // Greedy attribute removal
      do {
	boolean [] currentSelected = (boolean []) m_SelectedAttributes.clone();
	improved = false;
	currentNumAttributes--;

	for (int i = 0; i < m_SelectedAttributes.length; i++) {
	  if (currentSelected[i]) {

	    // Calculate the akaike rating without this attribute
	    currentSelected[i] = false;
	    double [] currentCoeffs = doRegression(currentSelected);
	    double currentMSE = calculateSE(currentSelected, currentCoeffs);
	    double currentAkaike = currentMSE / fullMSE 
	      * (numInstances - numAttributes)
	      + 2 * currentNumAttributes;
	    if (b_Debug) {
	      System.out.println("(akaike: " + currentAkaike);
	    }

	    // If it is better than the current best
	    if (currentAkaike < akaike) {
	      if (b_Debug) {
		System.err.println("Removing attribute " + (i + 1)
				   + " improved Akaike: " + currentAkaike);
	      }
	      improved = true;
	      akaike = currentAkaike;
	      System.arraycopy(currentSelected, 0,
			       m_SelectedAttributes, 0,
			       m_SelectedAttributes.length);
	      m_Coefficients = currentCoeffs;
	    }
	    currentSelected[i] = true;
	  }
	}
      } while (improved);
      break;

    case SELECTION_M5:

      // Step through the attributes removing the one with the smallest 
      // standardised coefficient until no improvement in Akaike
      do {
	improved = false;
	currentNumAttributes--;

	// Find attribute with smallest SC
	double minSC = 0;
	int minAttr = -1, coeff = 0;
	for (int i = 0; i < m_SelectedAttributes.length; i++) {
	  if (m_SelectedAttributes[i]) {
	    double SC = Math.abs(m_Coefficients[coeff] * m_StdDevs[i] 
				 / m_ClassStdDev);
	    if ((coeff == 0) || (SC < minSC)) {
	      minSC = SC;
	      minAttr = i;
	    }
	    coeff++;
	  }
	}

	// See whether removing it improves the Akaike score
	if (minAttr >= 0) {
	  m_SelectedAttributes[minAttr] = false;
	  double [] currentCoeffs = doRegression(m_SelectedAttributes);
	  double currentMSE = calculateSE(m_SelectedAttributes, currentCoeffs);
	  double currentAkaike = currentMSE / fullMSE 
	    * (numInstances - numAttributes)
	    + 2 * currentNumAttributes;
	  if (b_Debug) {
	    System.out.println("(akaike: " + currentAkaike);
	  }

	  // If it is better than the current best
	  if (currentAkaike < akaike) {
	    if (b_Debug) {
	      System.err.println("Removing attribute " + (minAttr + 1)
				 + " improved Akaike: " + currentAkaike);
	    }
	    improved = true;
	    akaike = currentAkaike;
	    m_Coefficients = currentCoeffs;
	  } else {
	    m_SelectedAttributes[minAttr] = true;
	  }
	}
      } while (improved);
      break;

    case SELECTION_NONE:
      break;
    }
  }

  /**
   * Calculate the squared error of a regression model on the 
   * training data
   *
   * @param selectedAttributes an array of flags indicating which 
   * attributes are included in the regression model
   * @param coefficients an array of coefficients for the regression
   * model
   * @return the mean squared error on the training data
   * @exception Exception if there is a missing class value in the training
   * data
   */
  private double calculateSE(boolean [] selectedAttributes, 
			      double [] coefficients) throws Exception {

    double mse = 0;
    for (int i = 0; i < m_TransformedData.numInstances(); i++) {
      double prediction = regressionPrediction(m_TransformedData.instance(i),
					       selectedAttributes,
					       coefficients);
      double error = prediction - m_TransformedData.instance(i).classValue();
      mse += error * error;
    }
    return mse;
  }

  /**
   * Calculate the dependent value for a given instance for a
   * given regression model.
   *
   * @param transformedInstance the input instance
   * @param selectedAttributes an array of flags indicating which 
   * attributes are included in the regression model
   * @param coefficients an array of coefficients for the regression
   * model
   * @return the regression value for the instance.
   * @exception Exception if the class attribute of the input instance
   * is not assigned
   */
  private double regressionPrediction(Instance transformedInstance,
				      boolean [] selectedAttributes,
				      double [] coefficients) 
  throws Exception {
    
    double result = 0;
    int column = 0;
    for (int j = 0; j < transformedInstance.numAttributes(); j++) {
      if ((m_ClassIndex != j) 
	  && (selectedAttributes[j])) {
	result += coefficients[column] * transformedInstance.value(j);
	column++;
      }
    }
    result += coefficients[column];
    
    return result;
  }

  /**
   * Calculate a linear regression using the selected attributes
   *
   * @param selectedAttributes an array of booleans where each element
   * is true if the corresponding attribute should be included in the
   * regression.
   * @return an array of coefficients for the linear regression model.
   * @exception Exception if an error occurred during the regression.
   */
  private double [] doRegression(boolean [] selectedAttributes) 
  throws Exception {

    if (b_Debug) {
      System.out.print("doRegression(");
      for (int i = 0; i < selectedAttributes.length; i++) {
	System.out.print(" " + selectedAttributes[i]);
      }
      System.out.println(" )");
    }
    int numAttributes = 0;
    for (int i = 0; i < selectedAttributes.length; i++) {
      if (selectedAttributes[i]) {
	numAttributes++;
      }
    }

    // Check whether there are still attributes left
    Matrix independent = null, dependent = null;
    double[] weights = null;
    if (numAttributes > 0) {
      independent = new Matrix(m_TransformedData.numInstances(), 
			       numAttributes);
      dependent = new Matrix(m_TransformedData.numInstances(), 1);
      for (int i = 0; i < m_TransformedData.numInstances(); i ++) {
	Instance inst = m_TransformedData.instance(i);
	int column = 0;
	for (int j = 0; j < m_TransformedData.numAttributes(); j++) {
	  if (j == m_ClassIndex) {
	    dependent.setElement(i, 0, inst.classValue());
	  } else {
	    if (selectedAttributes[j]) {
	      double value = inst.value(j) - m_Means[j];
	      
	      // We only need to do this if we want to
	      // scale the input
	      if (!m_checksTurnedOff) {
		value /= m_StdDevs[j];
	      }
	      independent.setElement(i, column, value);
	      column++;
	    }
	  }
	}
      }
      
      // Grab instance weights
      weights = new double [m_TransformedData.numInstances()];
      for (int i = 0; i < weights.length; i++) {
	weights[i] = m_TransformedData.instance(i).weight();
      }
    }

    // Compute coefficients (note that we have to treat the
    // intercept separately so that it doesn't get affected
    // by the ridge constant.)
    double[] coefficients = new double[numAttributes + 1];
    if (numAttributes > 0) {
      double[] coeffsWithoutIntercept  =
	independent.regression(dependent, weights, m_Ridge);
      System.arraycopy(coeffsWithoutIntercept, 0, coefficients, 0,
		       numAttributes);
    }
    coefficients[numAttributes] = m_ClassMean;
	   
    // Convert coefficients into original scale
    int column = 0;
    for(int i = 0; i < m_TransformedData.numAttributes(); i++) {
      if ((i != m_TransformedData.classIndex()) &&
	  (selectedAttributes[i])) {

	// We only need to do this if we have scaled the
	// input.
	if (!m_checksTurnedOff) {
	  coefficients[column] /= m_StdDevs[i];
	}

	// We have centred the input
	coefficients[coefficients.length - 1] -= 
	  coefficients[column] * m_Means[i];
	column++;
      }
    }

    return coefficients;
  }
 
  /**
   * Generates a linear regression function predictor.
   *
   * @param String the options
   */
  public static void main(String argv[]) {
    
    try {
      System.out.println(Evaluation.evaluateModel(new LinearRegression(),
						  argv));
    } catch (Exception e) {
      e.printStackTrace();
      System.out.println(e.getMessage());
    }
  }
}


  

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -