📄 ensembleselection.java
字号:
*/ public String hillclimbMetricTipText() { return "the metric that will be used to optimizer the chosen ensemble.."; } /** * Gets the hill climbing metric. Will be one of METRIC_ACCURACY, * METRIC_RMSE, METRIC_ROC, METRIC_PRECISION, METRIC_RECALL, METRIC_FSCORE, * METRIC_ALL * * @return the hillclimbMetric */ public SelectedTag getHillclimbMetric() { return new SelectedTag(m_hillclimbMetric, TAGS_METRIC); } /** * Sets the hill climbing metric. Will be one of METRIC_ACCURACY, * METRIC_RMSE, METRIC_ROC, METRIC_PRECISION, METRIC_RECALL, METRIC_FSCORE, * METRIC_ALL * * @param newType * the new hillclimbMetric */ public void setHillclimbMetric(SelectedTag newType) { if (newType.getTags() == TAGS_METRIC) { m_hillclimbMetric = newType.getSelectedTag().getID(); } } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String algorithmTipText() { return "the algorithm used to optimizer the ensemble"; } /** * Gets the algorithm * * @return the algorithm */ public SelectedTag getAlgorithm() { return new SelectedTag(m_algorithm, TAGS_ALGORITHM); } /** * Sets the Algorithm to use * * @param newType * the new algorithm */ public void setAlgorithm(SelectedTag newType) { if (newType.getTags() == TAGS_ALGORITHM) { m_algorithm = newType.getSelectedTag().getID(); } } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String hillclimbIterationsTipText() { return "The number of hillclimbing iterations for the ensemble selection algorithm."; } /** * Gets the number of hillclimbIterations. * * @return the number of hillclimbIterations */ public int getHillclimbIterations() { return m_hillclimbIterations; } /** * Sets the number of hillclimbIterations. * * @param n * the number of hillclimbIterations * @throws Exception * if parameter illegal */ public void setHillclimbIterations(int n) throws Exception { if (n < 0) { throw new IllegalArgumentException( "EnsembleSelection: Number of hillclimb iterations " + "must be positive."); } m_hillclimbIterations = n; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String numModelBagsTipText() { return "The number of \"model bags\" used in the ensemble selection algorithm."; } /** * Gets numModelBags. * * @return numModelBags */ public int getNumModelBags() { return m_numModelBags; } /** * Sets numModelBags. * * @param n * the new value for numModelBags * @throws Exception * if parameter illegal */ public void setNumModelBags(int n) throws Exception { if (n <= 0) { throw new IllegalArgumentException( "EnsembleSelection: Number of model bags " + "must be positive."); } m_numModelBags = n; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String sortInitializationRatioTipText() { return "The ratio of library models to be used for sort initialization."; } /** * Get the value of sortInitializationRatio. * * @return Value of sortInitializationRatio. */ public double getSortInitializationRatio() { return m_sortInitializationRatio; } /** * Set the value of sortInitializationRatio. * * @param v * Value to assign to sortInitializationRatio. */ public void setSortInitializationRatio(double v) { m_sortInitializationRatio = v; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String replacementTipText() { return "Whether models in the library can be included more than once in an ensemble."; } /** * Get the value of replacement. * * @return Value of replacement. */ public boolean getReplacement() { return m_replacement; } /** * Set the value of replacement. * * @param newReplacement * Value to assign to replacement. */ public void setReplacement(boolean newReplacement) { m_replacement = newReplacement; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String greedySortInitializationTipText() { return "Whether sort initialization greedily stops adding models when performance degrades."; } /** * Get the value of greedySortInitialization. * * @return Value of replacement. */ public boolean getGreedySortInitialization() { return m_greedySortInitialization; } /** * Set the value of greedySortInitialization. * * @param newGreedySortInitialization * Value to assign to replacement. */ public void setGreedySortInitialization(boolean newGreedySortInitialization) { m_greedySortInitialization = newGreedySortInitialization; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String verboseOutputTipText() { return "Whether metrics are printed for each model."; } /** * Get the value of verboseOutput. * * @return Value of verboseOutput. */ public boolean getVerboseOutput() { return m_verboseOutput; } /** * Set the value of verboseOutput. * * @param newVerboseOutput * Value to assign to verboseOutput. */ public void setVerboseOutput(boolean newVerboseOutput) { m_verboseOutput = newVerboseOutput; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String workingDirectoryTipText() { return "The working directory of the ensemble - where trained models will be stored."; } /** * Get the value of working directory. * * @return Value of working directory. */ public File getWorkingDirectory() { return m_workingDirectory; } /** * Set the value of working directory. * * @param newWorkingDirectory directory Value. */ public void setWorkingDirectory(File newWorkingDirectory) { if (m_Debug) { System.out.println("working directory changed to: " + newWorkingDirectory); } m_library.setWorkingDirectory(newWorkingDirectory); m_workingDirectory = newWorkingDirectory; } /** * Buildclassifier selects a classifier from the set of classifiers by * minimising error on the training data. * * @param trainData the training data to be used for generating the boosted * classifier. * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances trainData) throws Exception { getCapabilities().testWithFail(trainData); // First we need to make sure that some library models // were specified. If not, then use the default list if (m_library.m_Models.size() == 0) { System.out .println("WARNING: No library file specified. Using some default models."); System.out .println("You should specify a model list with -L <file> from the command line."); System.out .println("Or edit the list directly with the LibraryEditor from the GUI"); for (int i = 0; i < 10; i++) { REPTree tree = new REPTree(); tree.setSeed(i); m_library.addModel(new EnsembleSelectionLibraryModel(tree)); } } if (m_library == null) { m_library = new EnsembleSelectionLibrary(); m_library.setDebug(m_Debug); } m_library.setNumFolds(getNumFolds()); m_library.setValidationRatio(getValidationRatio()); // train all untrained models, and set "data" to the hillclimbing set. Instances data = m_library.trainAll(trainData, m_workingDirectory.getAbsolutePath(), m_algorithm); // We cache the hillclimb predictions from all of the models in // the library so that we can evaluate their performances when we // combine them // in various ways (without needing to keep the classifiers in memory). double predictions[][][] = m_library.getHillclimbPredictions(); int numModels = predictions.length; int modelWeights[] = new int[numModels]; m_total_weight = 0; Random rand = new Random(m_Seed); if (m_algorithm == ALGORITHM_BUILD_LIBRARY) { return; } else if (m_algorithm == ALGORITHM_BEST) { // If we want to choose the best model, just make a model bag that // includes all the models, then sort initialize to find the 1 that // performs best. ModelBag model_bag = new ModelBag(predictions, 1.0, m_Debug); int[] modelPicked = model_bag.sortInitialize(1, false, data, m_hillclimbMetric); // Then give it a weight of 1, while all others remain 0. modelWeights[modelPicked[0]] = 1; } else { if (m_Debug) System.out.println("Starting hillclimbing algorithm: " + m_algorithm); for (int i = 0; i < getNumModelBags(); ++i) { // For the number of bags, if (m_Debug) System.out.println("Starting on ensemble bag: " + i); // Create a new bag of the appropriate size ModelBag modelBag = new ModelBag(predictions, getModelRatio(), m_Debug); // And shuffle it. modelBag.shuffle(rand); if (getSortInitializationRatio() > 0.0) { // Sort initialize, if the ratio greater than 0. modelBag.sortInitialize((int) (getSortInitializationRatio() * getModelRatio() * numModels), getGreedySortInitialization(), data, m_hillclimbMetric); } if (m_algorithm == ALGORITHM_BACKWARD) { // If we're doing backwards elimination, we just give all // models // a weight of 1 initially. If the # of hillclimb iterations // is too high, we'll end up with just one model in the end // (we never delete all models from a bag). TODO - it might // be // smarter to base this weight off of how many models we // have. modelBag.weightAll(1); // for now at least, I'm just // assuming 1. } // Now the bag is initialized, and we're ready to hillclimb. for (int j = 0; j < getHillclimbIterations(); ++j) { if (m_algorithm == ALGORITHM_FORWARD) { modelBag.forwardSelect(getReplacement(), data, m_hillclimbMetric); } else if (m_algorithm == ALGORITHM_BACKWARD) { modelBag.backwardEliminate(data, m_hillclimbMetric); } else if (m_algorithm == ALGORITHM_FORWARD_BACKWARD) { modelBag.forwardSelectOrBackwardEliminate( getReplacement(), data, m_hillclimbMetric); } } // Now that we've done all the hillclimbing steps, we can just // get // the model weights that the bag determined, and add them to // our // running total. int[] bagWeights = modelBag.getModelWeights(); for (int j = 0; j < bagWeights.length; ++j) { modelWeights[j] += bagWeights[j]; } } } // Now we've done the hard work of actually learning the ensemble. Now // we set up the appropriate data structures so that Ensemble Selection // can // make predictions for future test examples. Set modelNames = m_library.getModelNames(); String[] modelNamesArray = new String[m_library.size()]; Iterator iter = modelNames.iterator(); // libraryIndex indexes over all the models in the library (not just // those // which we chose for the ensemble). int libraryIndex = 0; // chosenModels will count the total number of models which were // selected // by EnsembleSelection (those that have non-zero weight). int chosenModels = 0; while (iter.hasNext()) { // Note that we have to be careful of order. Our model_weights array // is in the same order as our list of models in m_library. // Get the name of the model, modelNamesArray[libraryIndex] = (String) iter.next(); // and its weight. int weightOfModel = modelWeights[libraryIndex++]; m_total_weight += weightOfModel; if (weightOfModel > 0) { // If the model was chosen at least once, increment the // number of chosen models. ++chosenModels; } } if (m_verboseOutput) { // Output every model and its performance with respect to the // validation // data. ModelBag bag = new ModelBag(predictions, 1.0, m_Debug); int modelIndexes[] = bag.sortInitialize(modelNamesArray.length, false, data, m_hillclimbMetric); double modelPerformance[] = bag.getIndividualPerformance(data, m_hillclimbMetric); for (int i = 0; i < modelIndexes.length; ++i) { // TODO - Could do this in a more readable way.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -