📄 modelbag.java
字号:
updateBestTimesChosen(); } /** * Forward select one model. Will add the model which has the best effect on * performance. If replacement is false, and all models are chosen, no * action is taken. If a model can be added, one always is (even if it hurts * performance). * * @param withReplacement * whether a model can be added more than once. * @param instances * The dataset, for calculating performance. * @param metric * The metric to which we will optimize. See EnsembleMetricHelper * @throws Exception if something goes wrong */ public void forwardSelect(boolean withReplacement, Instances instances, int metric) throws Exception { double bestPerformance = -1.0; int bestIndex = -1; double tempPredictions[][]; for (int i = 0; i < m_bagSize; ++i) { // For each model in the bag if ((m_timesChosen[i] == 0) || withReplacement) { // If the model has not been chosen, or we're allowing // replacement // Get the predictions we would have if we add this model to the // ensemble tempPredictions = computePredictions(i, true); // And find out how the hypothetical ensemble would perform. double metric_value = evaluatePredictions(instances, tempPredictions, metric); if (metric_value > bestPerformance) { // If it's better than our current best, make it our NEW // best. bestIndex = i; bestPerformance = metric_value; } } } if (bestIndex == -1) { // Replacement must be false, with more hillclimb iterations than // models. Do nothing and return. if (m_debug) { System.out.println("Couldn't add model. No action performed."); } return; } // We picked bestIndex as our best model. Update appropriate info. m_timesChosen[bestIndex]++; m_numChosen++; if (bestPerformance > m_bestPerformance) { // We find the peak of our performance over all hillclimb // iterations. // If this forwardSelect step improved our overall performance, // update // our best ensemble info. updateBestTimesChosen(); m_bestPerformance = bestPerformance; } } /** * Find the model whose removal will help the ensemble's performance the * most, and remove it. If there is only one model left, we leave it in. If * we can remove a model, we always do, even if it hurts performance. * * @param instances * The data set, for calculating performance * @param metric * Metric to optimize for. See EnsembleMetricHelper. * @throws Exception if something goes wrong */ public void backwardEliminate(Instances instances, int metric) throws Exception { // Find the best model to remove. I.e., model for which removal improves // performance the most (or hurts it least), and remove it. if (m_numChosen <= 1) { // If we only have one model left, keep it, as a bag // which chooses no models doesn't make much sense. return; } double bestPerformance = -1.0; int bestIndex = -1; double tempPredictions[][]; for (int i = 0; i < m_bagSize; ++i) { // For each model in the bag if (m_timesChosen[i] > 0) { // If the model has been chosen at least once, // Get the predictions we would have if we remove this model tempPredictions = computePredictions(i, false); // And find out how the hypothetical ensemble would perform. double metric_value = evaluatePredictions(instances, tempPredictions, metric); if (metric_value > bestPerformance) { // If it's better than our current best, make it our NEW // best. bestIndex = i; bestPerformance = metric_value; } } } if (bestIndex == -1) { // The most likely cause of this is that we didn't have any models // we could // remove. Do nothing & return. if (m_debug) { System.out .println("Couldn't remove model. No action performed."); } return; } // We picked bestIndex as our best model. Update appropriate info. m_timesChosen[bestIndex]--; m_numChosen--; if (m_debug) { System.out.println("Removing model " + m_modelIndex[bestIndex] + " (" + bestIndex + ") " + bestPerformance); } if (bestPerformance > m_bestPerformance) { // We find the peak of our performance over all hillclimb // iterations. // If this forwardSelect step improved our overall performance, // update // our best ensemble info. updateBestTimesChosen(); m_bestPerformance = bestPerformance; } // return m_model_index[best_index]; //translate to original indexing // and return } /** * Find the best action to perform, be it adding a model or removing a * model, and perform it. Some action is always performed, even if it hurts * performance. * * @param with_replacement * whether we can add a model more than once * @param instances * The dataset, for determining performance. * @param metric * The metric for which to optimize. See EnsembleMetricHelper. * @throws Exception if something goes wrong */ public void forwardSelectOrBackwardEliminate(boolean with_replacement, Instances instances, int metric) throws Exception { // Find the best action to perform, be it adding a model or removing a // model, // and do it. double bestPerformance = -1.0; int bestIndex = -1; boolean added = true; double tempPredictions[][]; for (int i = 0; i < m_bagSize; ++i) { // For each model in the bag: // Try removing the model if (m_timesChosen[i] > 0) { // If the model has been chosen at least once, // Get the predictions we would have if we remove this model tempPredictions = computePredictions(i, false); // And find out how the hypothetical ensemble would perform. double metric_value = evaluatePredictions(instances, tempPredictions, metric); if (metric_value > bestPerformance) { // If it's better than our current best, make it our NEW // best. bestIndex = i; bestPerformance = metric_value; added = false; } } if ((m_timesChosen[i] == 0) || with_replacement) { // If the model hasn't been chosen, or if we can choose it more // than once, try adding it: // Get the predictions we would have if we added the model tempPredictions = computePredictions(i, true); // And find out how the hypothetical ensemble would perform. double metric_value = evaluatePredictions(instances, tempPredictions, metric); if (metric_value > bestPerformance) { // If it's better than our current best, make it our NEW // best. bestIndex = i; bestPerformance = metric_value; added = true; } } } if (bestIndex == -1) { // Shouldn't really happen. Possible (I think) if the model bag is // empty. Just return. if (m_debug) { System.out.println("Couldn't add or remove model. No action performed."); } return; } // Now we've found the best change to make: // * bestIndex is the (virtual) index of the model we should change // * added is true if the model should be added (false if should be // removed) int changeInWeight = added ? 1 : -1; m_timesChosen[bestIndex] += changeInWeight; m_numChosen += changeInWeight; if (bestPerformance > m_bestPerformance) { // We find the peak of our performance over all hillclimb // iterations. // If this forwardSelect step improved our overall performance, // update // our best ensemble info. updateBestTimesChosen(); m_bestPerformance = bestPerformance; } } /** * returns the model weights * * @return the model weights */ public int[] getModelWeights() { return m_bestTimesChosen; } /** * Returns the "model" at the given virtual index. Here, by "model" we mean * its predictions with respect to the validation set. This is just a * convenience method, since we use the "virtual" index more than the real * one inside this class. * * @param index * the "virtual" index - the one for internal use * @return the predictions for the model for all validation instances. */ private double[][] model(int index) { return m_models[m_modelIndex[index]]; } /** * Compute predictions based on the current model, adding (or removing) the * model at the given (internal) index. * * @param index_to_change * index of model we're adding or removing * @param add * whether we add it. If false, we remove it. * @return the predictions for all validation instances */ private double[][] computePredictions(int index_to_change, boolean add) { double[][] predictions = new double[m_models[0].length][m_models[0][0].length]; for (int i = 0; i < m_bagSize; ++i) { if (m_timesChosen[i] > 0) { for (int j = 0; j < m_models[0].length; ++j) { for (int k = 0; k < m_models[0][j].length; ++k) { predictions[j][k] += model(i)[j][k] * m_timesChosen[i]; } } } } for (int j = 0; j < m_models[0].length; ++j) { int change = add ? 1 : -1; for (int k = 0; k < m_models[0][j].length; ++k) { predictions[j][k] += change * model(index_to_change)[j][k]; predictions[j][k] /= (m_numChosen + change); } } return predictions; } /** * Return the performance of the given predictions on the given instances * with respect to the given metric (see EnsembleMetricHelper). * * @param instances * the validation data * @param temp_predictions * the predictions to evaluate * @param metric * the metric for which to optimize (see EnsembleMetricHelper) * @return the performance * @throws Exception if something goes wrong */ private double evaluatePredictions(Instances instances, double[][] temp_predictions, int metric) throws Exception { Evaluation eval = new Evaluation(instances); for (int i = 0; i < instances.numInstances(); ++i) { eval.evaluateModelOnceAndRecordPrediction(temp_predictions[i], instances.instance(i)); } return EnsembleMetricHelper.getMetric(eval, metric); } /** * Gets the individual performances of all the models in the bag. * * @param instances * The validation data, for which we want performance. * @param metric * The desired metric (see EnsembleMetricHelper). * @return the performance * @throws Exception if something goes wrong */ public double[] getIndividualPerformance(Instances instances, int metric) throws Exception { double[] performance = new double[m_bagSize]; for (int i = 0; i < m_bagSize; ++i) { performance[i] = evaluatePredictions(instances, model(i), metric); } return performance; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -