📄 racesearch.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * RaceSearch.java * Copyright (C) 2000 Mark Hall * */package weka.attributeSelection;import java.io.*;import java.util.*;import weka.core.*;import weka.experiment.PairedStats;import weka.experiment.Stats;/** * Class for performing a racing search. <p> * * For more information see: <br> * Moore, A. W. and Lee, M. S. (1994). Efficient algorithms for minimising * cross validation error. Proceedings of the Eleventh International * Conference on Machine Learning. pp 190--198. <p> * * Valid options are:<p> * * -R <race type><br> * 0 = forward, 1 = backward, 2 = schemata, 3 = rank. <p> * * -L <significance level> <br> * significance level to use for t-tests. <p> * * -T <threshold> <br> * threshold for considering mean errors of two subsets the same <p> * * -F <xval type> <br> * 0 = 10 fold, 1 = leave-one-out (selected automatically for schemata race * <p> * * -A <attribute evaluator> <br> * the attribute evaluator to use when doing a rank search <p> * * -Q <br> * produce a ranked list of attributes. Selecting this option forces * the race type to be forward. Racing continues until *all* attributes * have been selected, thus producing a ranked list of attributes. <p> * * -N <number to retain> <br> * Specify the number of attributes to retain. Overides any threshold. * Use in conjunction with -Q. <p> * * -J <threshold> <br> * Specify a threshold by which the AttributeSelection module can discard * attributes. Use in conjunction with -Q. <p> * * -Z <br> * Turn on verbose output for monitoring the search <p> * * @author Mark Hall (mhall@cs.waikato.ac.nz) * @version $Revision: 1.1.1.1 $ */public class RaceSearch extends ASSearch implements RankedOutputSearch, OptionHandler { /* the training instances */ private Instances m_Instances = null; /** search types */ private static final int FORWARD_RACE = 0; private static final int BACKWARD_RACE = 1; private static final int SCHEMATA_RACE = 2; private static final int RANK_RACE = 3; public static final Tag [] TAGS_SELECTION = { new Tag(FORWARD_RACE, "Forward selection race"), new Tag(BACKWARD_RACE, "Backward elimination race"), new Tag(SCHEMATA_RACE, "Schemata race"), new Tag(RANK_RACE, "Rank race") }; /** the selected search type */ private int m_raceType = FORWARD_RACE; /** xval types */ private static final int TEN_FOLD = 0; private static final int LEAVE_ONE_OUT = 1; public static final Tag [] XVALTAGS_SELECTION = { new Tag(TEN_FOLD, "10 Fold"), new Tag(LEAVE_ONE_OUT, "Leave-one-out"), }; /** the selected xval type */ private int m_xvalType = TEN_FOLD; /** the class index */ private int m_classIndex; /** the number of attributes in the data */ private int m_numAttribs; /** the total number of partially/fully evaluated subsets */ private int m_totalEvals; /** holds the merit of the best subset found */ private double m_bestMerit = -Double.MAX_VALUE; /** the subset evaluator to use */ private HoldOutSubsetEvaluator m_theEvaluator = null; /** the significance level for comparisons */ private double m_sigLevel = 0.001; /** threshold for comparisons */ private double m_delta = 0.001; /** the number of samples above which to begin testing for similarity between competing subsets */ private int m_samples = 20; /** number of cross validation folds---equal to the number of instances for leave-one-out cv */ private int m_numFolds = 10; /** the attribute evaluator to generate the initial ranking when doing a rank race */ private ASEvaluation m_ASEval = new GainRatioAttributeEval(); /** will hold the attribute ranking produced by the above attribute evaluator if doing a rank search */ private int [] m_Ranking; /** verbose output for monitoring the search and debugging */ private boolean m_debug = false; /** If true then produce a ranked list of attributes by fully traversing a forward hillclimb race */ private boolean m_rankingRequested = false; /** The ranked list of attributes produced if m_rankingRequested is true */ private double [][] m_rankedAtts; /** The number of attributes ranked so far (if ranking is requested) */ private int m_rankedSoFar; /** The number of attributes to retain if a ranking is requested. -1 indicates that all attributes are to be retained. Has precedence over m_threshold */ private int m_numToSelect = -1; private int m_calculatedNumToSelect = -1; /** the threshold for removing attributes if ranking is requested */ private double m_threshold = -Double.MAX_VALUE; /** * Returns a string describing this search method * @return a description of the search method suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Races the cross validation error of competing " +"attribute subsets. Use in conjuction with a ClassifierSubsetEval. " +"RaceSearch has four modes:\n\nforward selection " +"races all single attribute additions to a base set (initially " +" no attributes), selects the winner to become the new base set " +"and then iterates until there is no improvement over the base set. " +"\n\nBackward elimination is similar but the initial base set has all " +"attributes included and races all single attribute deletions. " +"\n\nSchemata search is a bit different. Each iteration a series of " +"races are run in parallel. Each race in a set determines whether " +"a particular attribute should be included or not---ie the race is " +"between the attribute being \"in\" or \"out\". The other attributes " +"for this race are included or excluded randomly at each point in the " +"evaluation. As soon as one race " +"has a clear winner (ie it has been decided whether a particular " +"attribute should be inor not) then the next set of races begins, " +"using the result of the winning race from the previous iteration as " +"new base set.\n\nRank race first ranks the attributes using an " +"attribute evaluator and then races the ranking. The race includes " +"no attributes, the top ranked attribute, the top two attributes, the " +"top three attributes, etc.\n\nIt is also possible to generate a " +"raked list of attributes through the forward racing process. " +"If generateRanking is set to true then a complete forward race will " +"be run---that is, racing continues until all attributes have been " +"selected. The order that they are added in determines a complete " +"ranking of all the attributes.\n\nRacing uses paired and unpaired " +"t-tests on cross-validation errors of competing subsets. When there " +"is a significant difference between the means of the errors of two " +"competing subsets then the poorer of the two can be eliminated from " +"the race. Similarly, if there is no significant difference between " +"the mean errors of two competing subsets and they are within some " +"threshold of each other, then one can be eliminated from the race. "; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String raceTypeTipText() { return "Set the type of search."; } /** * Set the race type * * @param d the type of race */ public void setRaceType (SelectedTag d) { if (d.getTags() == TAGS_SELECTION) { m_raceType = d.getSelectedTag().getID(); } if (m_raceType == SCHEMATA_RACE && !m_rankingRequested) { try { setFoldsType(new SelectedTag(LEAVE_ONE_OUT, XVALTAGS_SELECTION)); setSignificanceLevel(0.01); } catch (Exception ex) { } } else { try { setFoldsType(new SelectedTag(TEN_FOLD, XVALTAGS_SELECTION)); setSignificanceLevel(0.001); } catch (Exception ex) { } } } /** * Get the race type * * @return the type of race */ public SelectedTag getRaceType() { return new SelectedTag(m_raceType, TAGS_SELECTION); } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String significanceLevelTipText() { return "Set the significance level to use for t-test comparisons."; } /** * Sets the significance level to use * @param sig the significance level */ public void setSignificanceLevel(double sig) { m_sigLevel = sig; } /** * Get the significance level * @return the current significance level */ public double getSignificanceLevel() { return m_sigLevel; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String thresholdTipText() { return "Set the error threshold by which to consider two subsets " +"equivalent."; } /** * Sets the threshold for comparisons * @param t the threshold to use */ public void setThreshold(double t) { m_delta = t; } /** * Get the threshold * @return the current threshold */ public double getThreshold() { return m_delta; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String foldsTipText() { return "Set the number of folds to use for x-val error estimation. " +"Leave-one-out is selected automatically for schemata search."; } /** * Set the xfold type * * @param d the type of xval */ public void setFoldsType (SelectedTag d) { if (d.getTags() == XVALTAGS_SELECTION) { m_xvalType = d.getSelectedTag().getID(); } } /** * Get the xfold type * * @return the type of xval */ public SelectedTag getFoldsType () { return new SelectedTag(m_xvalType, XVALTAGS_SELECTION); } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String debugTipText() { return "Turn on verbose output for monitoring the search's progress."; } /** * Set whether verbose output should be generated. * @param d true if output is to be verbose. */ public void setDebug(boolean d) { m_debug = d; } /** * Get whether output is to be verbose * @return true if output will be verbose */ public boolean getDebug() { return m_debug; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String attributeEvaluatorTipText() { return "Attribute evaluator to use for generating an initial ranking. " +"Use in conjunction with a rank race"; } /** * Set the attribute evaluator to use for generating the ranking. * @param newEvaluator the attribute evaluator to use. */ public void setAttributeEvaluator(ASEvaluation newEvaluator) { m_ASEval = newEvaluator; } /** * Get the attribute evaluator used to generate the ranking. * @return the evaluator used to generate the ranking. */ public ASEvaluation getAttributeEvaluator() { return m_ASEval; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String generateRankingTipText() { return "Use the racing process to generate a ranked list of attributes. " +"Using this mode forces the race to be a forward type and then races " +"until all attributes have been added, thus giving a ranked list"; } /** * Records whether the user has requested a ranked list of attributes. * @param doRank true if ranking is requested */ public void setGenerateRanking(boolean doRank) { m_rankingRequested = doRank; if (m_rankingRequested) { try { setRaceType(new SelectedTag(FORWARD_RACE, TAGS_SELECTION)); } catch (Exception ex) { } } } /**
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -