📄 racesearch.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* RaceSearch.java
* Copyright (C) 2000 Mark Hall
*
*/
package weka.attributeSelection;
import java.util.BitSet;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.SelectedTag;
import weka.core.Statistics;
import weka.core.Tag;
import weka.core.Utils;
import weka.experiment.PairedStats;
import weka.experiment.Stats;
/**
* Class for performing a racing search. <p>
*
* For more information see: <br>
* Moore, A. W. and Lee, M. S. (1994). Efficient algorithms for minimising
* cross validation error. Proceedings of the Eleventh International
* Conference on Machine Learning. pp 190--198. <p>
*
* Valid options are:<p>
*
* -R <race type><br>
* 0 = forward, 1 = backward, 2 = schemata, 3 = rank. <p>
*
* -L <significance level> <br>
* significance level to use for t-tests. <p>
*
* -T <threshold> <br>
* threshold for considering mean errors of two subsets the same <p>
*
* -F <xval type> <br>
* 0 = 10 fold, 1 = leave-one-out (selected automatically for schemata race
* <p>
*
* -A <attribute evaluator> <br>
* the attribute evaluator to use when doing a rank search <p>
*
* -Q <br>
* produce a ranked list of attributes. Selecting this option forces
* the race type to be forward. Racing continues until *all* attributes
* have been selected, thus producing a ranked list of attributes. <p>
*
* -N <number to retain> <br>
* Specify the number of attributes to retain. Overides any threshold.
* Use in conjunction with -Q. <p>
*
* -J <threshold> <br>
* Specify a threshold by which the AttributeSelection module can discard
* attributes. Use in conjunction with -Q. <p>
*
* -Z <br>
* Turn on verbose output for monitoring the search <p>
*
* @author Mark Hall (mhall@cs.waikato.ac.nz)
* @version $Revision$
*/
public class RaceSearch extends ASSearch implements RankedOutputSearch,
OptionHandler {
/* the training instances */
private Instances m_Instances = null;
/** search types */
private static final int FORWARD_RACE = 0;
private static final int BACKWARD_RACE = 1;
private static final int SCHEMATA_RACE = 2;
private static final int RANK_RACE = 3;
public static final Tag [] TAGS_SELECTION = {
new Tag(FORWARD_RACE, "Forward selection race"),
new Tag(BACKWARD_RACE, "Backward elimination race"),
new Tag(SCHEMATA_RACE, "Schemata race"),
new Tag(RANK_RACE, "Rank race")
};
/** the selected search type */
private int m_raceType = FORWARD_RACE;
/** xval types */
private static final int TEN_FOLD = 0;
private static final int LEAVE_ONE_OUT = 1;
public static final Tag [] XVALTAGS_SELECTION = {
new Tag(TEN_FOLD, "10 Fold"),
new Tag(LEAVE_ONE_OUT, "Leave-one-out"),
};
/** the selected xval type */
private int m_xvalType = TEN_FOLD;
/** the class index */
private int m_classIndex;
/** the number of attributes in the data */
private int m_numAttribs;
/** the total number of partially/fully evaluated subsets */
private int m_totalEvals;
/** holds the merit of the best subset found */
private double m_bestMerit = -Double.MAX_VALUE;
/** the subset evaluator to use */
private HoldOutSubsetEvaluator m_theEvaluator = null;
/** the significance level for comparisons */
private double m_sigLevel = 0.001;
/** threshold for comparisons */
private double m_delta = 0.001;
/** the number of samples above which to begin testing for similarity
between competing subsets */
private int m_samples = 20;
/** number of cross validation folds---equal to the number of instances
for leave-one-out cv */
private int m_numFolds = 10;
/** the attribute evaluator to generate the initial ranking when
doing a rank race */
private ASEvaluation m_ASEval = new GainRatioAttributeEval();
/** will hold the attribute ranking produced by the above attribute
evaluator if doing a rank search */
private int [] m_Ranking;
/** verbose output for monitoring the search and debugging */
private boolean m_debug = false;
/** If true then produce a ranked list of attributes by fully traversing
a forward hillclimb race */
private boolean m_rankingRequested = false;
/** The ranked list of attributes produced if m_rankingRequested is true */
private double [][] m_rankedAtts;
/** The number of attributes ranked so far (if ranking is requested) */
private int m_rankedSoFar;
/** The number of attributes to retain if a ranking is requested. -1
indicates that all attributes are to be retained. Has precedence over
m_threshold */
private int m_numToSelect = -1;
private int m_calculatedNumToSelect = -1;
/** the threshold for removing attributes if ranking is requested */
private double m_threshold = -Double.MAX_VALUE;
/**
* Returns a string describing this search method
* @return a description of the search method suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "Races the cross validation error of competing "
+"attribute subsets. Use in conjuction with a ClassifierSubsetEval. "
+"RaceSearch has four modes:\n\nforward selection "
+"races all single attribute additions to a base set (initially "
+" no attributes), selects the winner to become the new base set "
+"and then iterates until there is no improvement over the base set. "
+"\n\nBackward elimination is similar but the initial base set has all "
+"attributes included and races all single attribute deletions. "
+"\n\nSchemata search is a bit different. Each iteration a series of "
+"races are run in parallel. Each race in a set determines whether "
+"a particular attribute should be included or not---ie the race is "
+"between the attribute being \"in\" or \"out\". The other attributes "
+"for this race are included or excluded randomly at each point in the "
+"evaluation. As soon as one race "
+"has a clear winner (ie it has been decided whether a particular "
+"attribute should be inor not) then the next set of races begins, "
+"using the result of the winning race from the previous iteration as "
+"new base set.\n\nRank race first ranks the attributes using an "
+"attribute evaluator and then races the ranking. The race includes "
+"no attributes, the top ranked attribute, the top two attributes, the "
+"top three attributes, etc.\n\nIt is also possible to generate a "
+"raked list of attributes through the forward racing process. "
+"If generateRanking is set to true then a complete forward race will "
+"be run---that is, racing continues until all attributes have been "
+"selected. The order that they are added in determines a complete "
+"ranking of all the attributes.\n\nRacing uses paired and unpaired "
+"t-tests on cross-validation errors of competing subsets. When there "
+"is a significant difference between the means of the errors of two "
+"competing subsets then the poorer of the two can be eliminated from "
+"the race. Similarly, if there is no significant difference between "
+"the mean errors of two competing subsets and they are within some "
+"threshold of each other, then one can be eliminated from the race. ";
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String raceTypeTipText() {
return "Set the type of search.";
}
/**
* Set the race type
*
* @param d the type of race
*/
public void setRaceType (SelectedTag d) {
if (d.getTags() == TAGS_SELECTION) {
m_raceType = d.getSelectedTag().getID();
}
if (m_raceType == SCHEMATA_RACE && !m_rankingRequested) {
try {
setFoldsType(new SelectedTag(LEAVE_ONE_OUT,
XVALTAGS_SELECTION));
setSignificanceLevel(0.01);
} catch (Exception ex) {
}
} else {
try {
setFoldsType(new SelectedTag(TEN_FOLD,
XVALTAGS_SELECTION));
setSignificanceLevel(0.001);
} catch (Exception ex) {
}
}
}
/**
* Get the race type
*
* @return the type of race
*/
public SelectedTag getRaceType() {
return new SelectedTag(m_raceType, TAGS_SELECTION);
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String significanceLevelTipText() {
return "Set the significance level to use for t-test comparisons.";
}
/**
* Sets the significance level to use
* @param sig the significance level
*/
public void setSignificanceLevel(double sig) {
m_sigLevel = sig;
}
/**
* Get the significance level
* @return the current significance level
*/
public double getSignificanceLevel() {
return m_sigLevel;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String thresholdTipText() {
return "Set the error threshold by which to consider two subsets "
+"equivalent.";
}
/**
* Sets the threshold for comparisons
* @param t the threshold to use
*/
public void setThreshold(double t) {
m_delta = t;
}
/**
* Get the threshold
* @return the current threshold
*/
public double getThreshold() {
return m_delta;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String foldsTipText() {
return "Set the number of folds to use for x-val error estimation. "
+"Leave-one-out is selected automatically for schemata search.";
}
/**
* Set the xfold type
*
* @param d the type of xval
*/
public void setFoldsType (SelectedTag d) {
if (d.getTags() == XVALTAGS_SELECTION) {
m_xvalType = d.getSelectedTag().getID();
}
}
/**
* Get the xfold type
*
* @return the type of xval
*/
public SelectedTag getFoldsType () {
return new SelectedTag(m_xvalType, XVALTAGS_SELECTION);
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String debugTipText() {
return "Turn on verbose output for monitoring the search's progress.";
}
/**
* Set whether verbose output should be generated.
* @param d true if output is to be verbose.
*/
public void setDebug(boolean d) {
m_debug = d;
}
/**
* Get whether output is to be verbose
* @return true if output will be verbose
*/
public boolean getDebug() {
return m_debug;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String attributeEvaluatorTipText() {
return "Attribute evaluator to use for generating an initial ranking. "
+"Use in conjunction with a rank race";
}
/**
* Set the attribute evaluator to use for generating the ranking.
* @param newEvaluator the attribute evaluator to use.
*/
public void setAttributeEvaluator(ASEvaluation newEvaluator) {
m_ASEval = newEvaluator;
}
/**
* Get the attribute evaluator used to generate the ranking.
* @return the evaluator used to generate the ranking.
*/
public ASEvaluation getAttributeEvaluator() {
return m_ASEval;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String generateRankingTipText() {
return "Use the racing process to generate a ranked list of attributes. "
+"Using this mode forces the race to be a forward type and then races "
+"until all attributes have been added, thus giving a ranked list";
}
/**
* Records whether the user has requested a ranked list of attributes.
* @param doRank true if ranking is requested
*/
public void setGenerateRanking(boolean doRank) {
m_rankingRequested = doRank;
if (m_rankingRequested) {
try {
setRaceType(new SelectedTag(FORWARD_RACE,
TAGS_SELECTION));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -