📄 modelbag.java

📁 代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
    updateBestTimesChosen();  }    /**   * Forward select one model. Will add the model which has the best effect on   * performance. If replacement is false, and all models are chosen, no   * action is taken. If a model can be added, one always is (even if it hurts   * performance).   *    * @param withReplacement   *            whether a model can be added more than once.   * @param instances   *            The dataset, for calculating performance.   * @param metric   *            The metric to which we will optimize. See EnsembleMetricHelper   * @throws Exception if something goes wrong   */  public void forwardSelect(boolean withReplacement, Instances instances,      int metric) throws Exception {        double bestPerformance = -1.0;    int bestIndex = -1;    double tempPredictions[][];    for (int i = 0; i < m_bagSize; ++i) {      // For each model in the bag      if ((m_timesChosen[i] == 0) || withReplacement) {	// If the model has not been chosen, or we're allowing	// replacement	// Get the predictions we would have if we add this model to the	// ensemble	tempPredictions = computePredictions(i, true);	// And find out how the hypothetical ensemble would perform.	double metric_value = evaluatePredictions(instances,	    tempPredictions, metric);	if (metric_value > bestPerformance) {	  // If it's better than our current best, make it our NEW	  // best.	  bestIndex = i;	  bestPerformance = metric_value;	}      }    }    if (bestIndex == -1) {      // Replacement must be false, with more hillclimb iterations than      // models. Do nothing and return.      if (m_debug) {	System.out.println("Couldn't add model.  No action performed.");      }      return;    }    // We picked bestIndex as our best model. Update appropriate info.    m_timesChosen[bestIndex]++;    m_numChosen++;    if (bestPerformance > m_bestPerformance) {      // We find the peak of our performance over all hillclimb      // iterations.      // If this forwardSelect step improved our overall performance,      // update      // our best ensemble info.      updateBestTimesChosen();      m_bestPerformance = bestPerformance;    }  }    /**   * Find the model whose removal will help the ensemble's performance the   * most, and remove it. If there is only one model left, we leave it in. If   * we can remove a model, we always do, even if it hurts performance.   *    * @param instances   *            The data set, for calculating performance   * @param metric   *            Metric to optimize for. See EnsembleMetricHelper.   * @throws Exception if something goes wrong   */  public void backwardEliminate(Instances instances, int metric)  throws Exception {        // Find the best model to remove. I.e., model for which removal improves    // performance the most (or hurts it least), and remove it.    if (m_numChosen <= 1) {      // If we only have one model left, keep it, as a bag      // which chooses no models doesn't make much sense.      return;    }    double bestPerformance = -1.0;    int bestIndex = -1;    double tempPredictions[][];    for (int i = 0; i < m_bagSize; ++i) {      // For each model in the bag      if (m_timesChosen[i] > 0) {	// If the model has been chosen at least once,	// Get the predictions we would have if we remove this model	tempPredictions = computePredictions(i, false);	// And find out how the hypothetical ensemble would perform.	double metric_value = evaluatePredictions(instances,	    tempPredictions, metric);	if (metric_value > bestPerformance) {	  // If it's better than our current best, make it our NEW	  // best.	  bestIndex = i;	  bestPerformance = metric_value;	}      }    }    if (bestIndex == -1) {      // The most likely cause of this is that we didn't have any models      // we could      // remove. Do nothing & return.      if (m_debug) {	System.out	.println("Couldn't remove model.  No action performed.");      }      return;    }    // We picked bestIndex as our best model. Update appropriate info.    m_timesChosen[bestIndex]--;    m_numChosen--;    if (m_debug) {      System.out.println("Removing model " + m_modelIndex[bestIndex]                                                          + " (" + bestIndex + ") " + bestPerformance);    }    if (bestPerformance > m_bestPerformance) {      // We find the peak of our performance over all hillclimb      // iterations.      // If this forwardSelect step improved our overall performance,      // update      // our best ensemble info.      updateBestTimesChosen();      m_bestPerformance = bestPerformance;    }    // return m_model_index[best_index]; //translate to original indexing    // and return  }    /**   * Find the best action to perform, be it adding a model or removing a   * model, and perform it. Some action is always performed, even if it hurts   * performance.   *    * @param with_replacement   *            whether we can add a model more than once   * @param instances   *            The dataset, for determining performance.   * @param metric   *            The metric for which to optimize. See EnsembleMetricHelper.   * @throws Exception if something goes wrong   */  public void forwardSelectOrBackwardEliminate(boolean with_replacement,      Instances instances, int metric) throws Exception {        // Find the best action to perform, be it adding a model or removing a    // model,    // and do it.    double bestPerformance = -1.0;    int bestIndex = -1;    boolean added = true;    double tempPredictions[][];    for (int i = 0; i < m_bagSize; ++i) {      // For each model in the bag:      // Try removing the model      if (m_timesChosen[i] > 0) {	// If the model has been chosen at least once,	// Get the predictions we would have if we remove this model	tempPredictions = computePredictions(i, false);	// And find out how the hypothetical ensemble would perform.	double metric_value = evaluatePredictions(instances,	    tempPredictions, metric);	if (metric_value > bestPerformance) {	  // If it's better than our current best, make it our NEW	  // best.	  bestIndex = i;	  bestPerformance = metric_value;	  added = false;	}      }      if ((m_timesChosen[i] == 0) || with_replacement) {	// If the model hasn't been chosen, or if we can choose it more	// than once, try adding it:	// Get the predictions we would have if we added the model	tempPredictions = computePredictions(i, true);	// And find out how the hypothetical ensemble would perform.	double metric_value = evaluatePredictions(instances,	    tempPredictions, metric);	if (metric_value > bestPerformance) {	  // If it's better than our current best, make it our NEW	  // best.	  bestIndex = i;	  bestPerformance = metric_value;	  added = true;	}      }    }    if (bestIndex == -1) {      // Shouldn't really happen. Possible (I think) if the model bag is      // empty. Just return.      if (m_debug) {	System.out.println("Couldn't add or remove model.  No action performed.");      }      return;    }    // Now we've found the best change to make:    // * bestIndex is the (virtual) index of the model we should change    // * added is true if the model should be added (false if should be    // removed)    int changeInWeight = added ? 1 : -1;    m_timesChosen[bestIndex] += changeInWeight;    m_numChosen += changeInWeight;    if (bestPerformance > m_bestPerformance) {      // We find the peak of our performance over all hillclimb      // iterations.      // If this forwardSelect step improved our overall performance,      // update      // our best ensemble info.      updateBestTimesChosen();      m_bestPerformance = bestPerformance;    }  }    /**   * returns the model weights   *    * @return		the model weights   */  public int[] getModelWeights() {    return m_bestTimesChosen;  }    /**   * Returns the "model" at the given virtual index. Here, by "model" we mean   * its predictions with respect to the validation set. This is just a   * convenience method, since we use the "virtual" index more than the real   * one inside this class.   *    * @param index   *            the "virtual" index - the one for internal use   * @return the predictions for the model for all validation instances.   */  private double[][] model(int index) {    return m_models[m_modelIndex[index]];  }    /**   * Compute predictions based on the current model, adding (or removing) the   * model at the given (internal) index.   *    * @param index_to_change   *            index of model we're adding or removing   * @param add   *            whether we add it. If false, we remove it.   * @return the predictions for all validation instances   */  private double[][] computePredictions(int index_to_change, boolean add) {    double[][] predictions = new double[m_models[0].length][m_models[0][0].length];    for (int i = 0; i < m_bagSize; ++i) {      if (m_timesChosen[i] > 0) {	for (int j = 0; j < m_models[0].length; ++j) {	  for (int k = 0; k < m_models[0][j].length; ++k) {	    predictions[j][k] += model(i)[j][k] * m_timesChosen[i];	  }	}      }    }    for (int j = 0; j < m_models[0].length; ++j) {      int change = add ? 1 : -1;      for (int k = 0; k < m_models[0][j].length; ++k) {	predictions[j][k] += change * model(index_to_change)[j][k];	predictions[j][k] /= (m_numChosen + change);      }    }    return predictions;  }    /**   * Return the performance of the given predictions on the given instances   * with respect to the given metric (see EnsembleMetricHelper).   *    * @param instances   *            the validation data   * @param temp_predictions   *            the predictions to evaluate   * @param metric   *            the metric for which to optimize (see EnsembleMetricHelper)   * @return the performance   * @throws Exception if something goes wrong   */  private double evaluatePredictions(Instances instances,      double[][] temp_predictions, int metric) throws Exception {        Evaluation eval = new Evaluation(instances);    for (int i = 0; i < instances.numInstances(); ++i) {      eval.evaluateModelOnceAndRecordPrediction(temp_predictions[i],	  instances.instance(i));    }    return EnsembleMetricHelper.getMetric(eval, metric);  }    /**   * Gets the individual performances of all the models in the bag.   *    * @param instances   *            The validation data, for which we want performance.   * @param metric   *            The desired metric (see EnsembleMetricHelper).   * @return the performance   * @throws Exception if something goes wrong   */  public double[] getIndividualPerformance(Instances instances, int metric)    throws Exception {        double[] performance = new double[m_bagSize];    for (int i = 0; i < m_bagSize; ++i) {      performance[i] = evaluatePredictions(instances, model(i), metric);    }    return performance;  }}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -