modelbag.java

来自「代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclip」· Java 代码 · 共 636 行 · 第 1/2 页
JAVA
636 行
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    EnsembleSelection.java *    Copyright (C) 2006 David Michael * */package weka.classifiers.meta.ensembleSelection;import weka.classifiers.Evaluation;import weka.core.Instances;import java.util.Random;/** * This class is responsible for the duties of a bag of models. It is designed * for use with the EnsembleSelection meta classifier. It handles shuffling the * models, doing sort initialization, performing forward selection/ backwards * elimination, etc. * <p/> * We utilize a simple "virtual indexing" scheme inside. If we shuffle and/or * sort the models, we change the "virtual" order around. The elements of the * bag are always those elements with virtual index 0..(m_bagSize-1). Each * "virtual" index maps to some real index in m_models. Not every model in * m_models gets a virtual index... the virtual indexing is what defines the * subset of models of which our Bag is composed. This makes it easy to refer to * models in the bag, by their virtual index, while maintaining the original * indexing for our clients. *  * @author  David Michael * @version $Revision: 1.1 $ */public class ModelBag {    /**   * The "models", as a multidimensional array of predictions for the   * validation set. The first index is the model index, the second index is   * the index of the instance, and the third is the typical "class" index for   * a prediction's distribution. This is given to us in the constructor, and   * we never change it.   */  private double m_models[][][];    /**   * Maps each model in our virtual indexing scheme to its original index as   * it is in m_models. The first m_bag_size elements here are considered our   * bag. Throughout the code, we use the index in to this array to refer to a   * model. When we shuffle the models, we really simply shuffle this array.   * When we want to refer back to the original model, it is easily looked up   * in this array. That is, if j = m_model_index[i], then m_models[j] is the   * model referred to by "virtual index" i. Models can easily be accessed by   * their virtual index using the "model()" method.   */  private int m_modelIndex[];    /**   * The number of models in our bag. 1 <= m_bag_size <= m_models.length   */  private int m_bagSize;    /**   * The total number of models chosen thus far for this bag. This value is   * important when calculating the predictions for the bag. (See   * computePredictions).   */  private int m_numChosen;    /**   * The number of times each model has been chosen. Also can be thought of as   * the weight for each model. Indexed by the "virtual index".   */  private int m_timesChosen[];    /**   * If true, print out debug information.   */  private boolean m_debug;    /**   * Double representing the best performance achieved thus far in this bag.   * This Must be updated each time we make a change to the bag that improves   * performance. This is so that after all hillclimbing is completed, we can   * go back to the best ensemble that we encountered during hillclimbing.   */  private double m_bestPerformance;    /**   * Array representing the weights for all the models which achieved the best   * performance thus far for the bag (i.e., the weights that achieved   * m_bestPerformance. This Must be updated each time we make a change to the   * bag (that improves performance, by calling updateBestTimesChosen. This is   * so that after all hillclimbing is completed, we can go back to the best   * ensemble that we encountered during hillclimbing. This array, unlike   * m_timesChosen, uses the original indexing as taken from m_models. That   * way, any time getModelWeights is called (which returns this array), the   * array is in the correct format for our client.   */  private int m_bestTimesChosen[];    /**   * Constructor for ModelBag.   *    * @param models   *            The complete set of models from which to draw our bag. First   *            index is for the model, second is for the instance. The last   *            is a prediction distribution for that instance. Models are   *            represented by this array of predictions for validation data,   *            since that's all ensemble selection needs to know.   * @param bag_percent   *            The percentage of the set of given models that should be used   *            in the Model Bag.   * @param debug   *            Whether the ModelBag should print debug information.   *    */  public ModelBag(double models[][][], double bag_percent, boolean debug) {    m_debug = debug;    if (models.length == 0) {      throw new IllegalArgumentException(      "ModelBag needs at least 1 model.");    }    m_bagSize = (int) ((double) models.length * bag_percent);    m_models = models;    m_modelIndex = new int[m_models.length];    m_timesChosen = new int[m_models.length];    m_bestTimesChosen = m_timesChosen;    m_bestPerformance = 0.0;        // Initially, no models are chosen.    m_numChosen = 0;    // Prepare our virtual indexing scheme. Initially, the indexes are    // the same as the original.    for (int i = 0; i < m_models.length; ++i) {      m_modelIndex[i] = i;      m_timesChosen[i] = 0;    }  }    /**   * Swap model at virtual index i with model at virtual index j. This is used   * to shuffle the models. We do not change m_models, only the arrays which   * use the virtual indexing; m_modelIndex and m_timesChosen.   *    * @param i	first index   * @param j	second index   */  private void swap(int i, int j) {    if (i != j) {      int temp_index = m_modelIndex[i];      m_modelIndex[i] = m_modelIndex[j];      m_modelIndex[j] = temp_index;            int tempWeight = m_timesChosen[i];      m_timesChosen[i] = m_timesChosen[j];      m_timesChosen[j] = tempWeight;    }  }    /**   * Shuffle the models. The order in m_models is preserved, but we change our   * virtual indexes around.   *    * @param rand	the random number generator to use   */  public void shuffle(Random rand) {    if (m_models.length < 2)      return;        for (int i = 0; i < m_models.length; ++i) {      int swap_index = rand.nextInt(m_models.length - 1);      if (swap_index >= i)	++swap_index; // don't swap with itself      swap(i, swap_index);    }  }    /**   * Convert an array of weights using virtual indices to an array of weights   * using real indices.   *    * @param virtual_weights	the virtual indices   * @return			the real indices   */  private int[] virtualToRealWeights(int virtual_weights[]) {    int real_weights[] = new int[virtual_weights.length];    for (int i = 0; i < real_weights.length; ++i) {      real_weights[m_modelIndex[i]] = virtual_weights[i];    }    return real_weights;  }    /**   *    */  private void updateBestTimesChosen() {    m_bestTimesChosen = virtualToRealWeights(m_timesChosen);  }    /**   * Sort initialize the bag.   *    * @param num   *            the Maximum number of models to initialize with   * @param greedy   *            True if we do greedy addition, up to num. Greedy sort   *            initialization adds models (up to num) in order of best to   *            worst performance until performance no longer improves.   * @param instances   *            the data set (needed for performance evaluation)   * @param metric   *            metric for which to optimize. See EnsembleMetricHelper   * @return returns an array of indexes which were selected, in order   *         starting from the model with best performance.   * @throws Exception if something goes wrong   */  public int[] sortInitialize(int num, boolean greedy, Instances instances,      int metric) throws Exception {        // First, get the performance of each model    double performance[] = new double[m_bagSize];    for (int i = 0; i < m_bagSize; ++i) {      performance[i] = evaluatePredictions(instances, model(i), metric);    }    int bestModels[] = new int[num]; // we'll use this to save model info    // Now sort the models by their performance... note we only need the    // first "num",    // so we don't actually bother to sort the whole thing... instead, we    // pick the num best    // by running num iterations of selection sort.    for (int i = 0; i < num; ++i) {      int max_index = i;      double max_value = performance[i];      for (int j = i + 1; j < m_bagSize; ++j) {	// Find the best model which we haven't already selected	if (performance[j] > max_value) {	  max_value = performance[j];	  max_index = j;	}      }      // Swap ith model in to the ith position (selection sort)      this.swap(i, max_index);      // swap performance numbers, too      double temp_perf = performance[i];      performance[i] = performance[max_index];      performance[max_index] = temp_perf;            bestModels[i] = m_modelIndex[i];      if (!greedy) {	// If we're not being greedy, we just throw the model in	// no matter what	++m_timesChosen[i];	++m_numChosen;      }    }    // Now the best "num" models are all sorted and in position.    if (greedy) {      // If the "greedy" option was specified, do a smart sort      // initialization      // that adds models only so long as they help overall performance.      // This is what was done in the original Caruana paper.      double[][] tempPredictions = null;      double bestPerformance = 0.0;      if (num > 0) {	++m_timesChosen[0];	++m_numChosen;	updateBestTimesChosen();      }      for (int i = 1; i < num; ++i) {	tempPredictions = computePredictions(i, true);	double metric_value = evaluatePredictions(instances,	    tempPredictions, metric);	if (metric_value > bestPerformance) {	  // If performance improved, update the appropriate info.	  bestPerformance = metric_value;	  ++m_timesChosen[i];	  ++m_numChosen;	  updateBestTimesChosen();	} else {	  // We found a model that doesn't help performance, so we	  // stop adding models.	  break;	}      }    }    updateBestTimesChosen();    if (m_debug) {      System.out.println("Sort Initialization added best " + m_numChosen	  + " models to the bag.");    }    return bestModels;  }    /**   * Add "weight" to the number of times each model in the bag was chosen.   * Typically for use with backward elimination.   *    * @param weight	the weight to add   */  public void weightAll(int weight) {    for (int i = 0; i < m_bagSize; ++i) {      m_timesChosen[i] += weight;      m_numChosen += weight;    }
modelbag.java - 源码说明

本页面展示了「代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行」中的 modelbag.java 源码文件，采用 Java 编程语言编写，共 636 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与eclipse相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?