📄 racesearch.java
字号:
} catch (Exception ex) {
}
}
}
/**
* Gets whether ranking has been requested. This is used by the
* AttributeSelection module to determine if rankedAttributes()
* should be called.
* @return true if ranking has been requested.
*/
public boolean getGenerateRanking() {
return m_rankingRequested;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String numToSelectTipText() {
return "Specify the number of attributes to retain. Use in conjunction "
+"with generateRanking. The default value "
+"(-1) indicates that all attributes are to be retained. Use either "
+"this option or a threshold to reduce the attribute set.";
}
/**
* Specify the number of attributes to select from the ranked list
* (if generating a ranking). -1
* indicates that all attributes are to be retained.
* @param n the number of attributes to retain
*/
public void setNumToSelect(int n) {
m_numToSelect = n;
}
/**
* Gets the number of attributes to be retained.
* @return the number of attributes to retain
*/
public int getNumToSelect() {
return m_numToSelect;
}
/**
* Gets the calculated number of attributes to retain. This is the
* actual number of attributes to retain. This is the same as
* getNumToSelect if the user specifies a number which is not less
* than zero. Otherwise it should be the number of attributes in the
* (potentially transformed) data.
*/
public int getCalculatedNumToSelect() {
if (m_numToSelect >= 0) {
m_calculatedNumToSelect = m_numToSelect;
}
return m_calculatedNumToSelect;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String selectionThresholdTipText() {
return "Set threshold by which attributes can be discarded. Default value "
+ "results in no attributes being discarded. Use in conjunction with "
+ "generateRanking";
}
/**
* Set the threshold by which the AttributeSelection module can discard
* attributes.
* @param threshold the threshold.
*/
public void setSelectionThreshold(double threshold) {
m_threshold = threshold;
}
/**
* Returns the threshold so that the AttributeSelection module can
* discard attributes from the ranking.
*/
public double getSelectionThreshold() {
return m_threshold;
}
/**
* Returns an enumeration describing the available options.
* @return an enumeration of all the available options.
**/
public Enumeration listOptions () {
Vector newVector = new Vector(8);
newVector.addElement(new Option("\tType of race to perform.\n\t"
+"(default = 0).",
"R", 1 ,"-R <0 = forward | 1 = backward "
+"race | 2 = schemata | 3 = rank>"));
newVector.addElement(new Option("\tSignificance level for comaparisons"
+"\n\t(default = 0.001(forward/backward/"
+"rank)/0.01(schemata)).",
"L",1,"-L <significance>"));
newVector.addElement(new Option("\tThreshold for error comparison.\n\t"
+"(default = 0.001).",
"T",1,"-T <threshold>"));
newVector.addElement(new Option("\tAttribute ranker to use if doing a "
+"\n\trank search. Place any\n\t"
+"evaluator options LAST on the"
+ "\n\tcommand line following a \"--\"."
+ "\n\teg. -A weka.attributeSelection."
+"GainRatioAttributeEval ... "
+ "-- -M.\n\t(default = GainRatioAttributeEval)",
"A", 1, "-A <attribute evaluator>"));
newVector.addElement(new Option("\tFolds for cross validation\n\t"
+"(default = 0 (1 if schemata race)",
"F",1,"-F <0 = 10 fold | 1 = leave-one-out>"));
newVector.addElement(new Option("\tGenerate a ranked list of attributes."
+"\n\tForces the search to be forward\n."
+"\tand races until all attributes have\n"
+"\tselected, thus producing a ranking.",
"Q",0,"-Q"));
newVector
.addElement(new Option("\tSpecify number of attributes to retain from "
+"\n\tthe ranking. Overides -T. Use "
+"in conjunction with -Q"
,"N",1
, "-N <num to select>"));
newVector
.addElement(new Option("\tSpecify a theshold by which attributes"
+ "\n\tmay be discarded from the ranking."
+"\n\tUse in conjuction with -Q","T",1
, "-T <threshold>"));
newVector.addElement(new Option("\tVerbose output for monitoring the "
+"search.",
"Z",0,"-Z"));
if ((m_ASEval != null) &&
(m_ASEval instanceof OptionHandler)) {
newVector.addElement(new Option("", "", 0, "\nOptions specific to "
+ "evaluator "
+ m_ASEval.getClass().getName()
+ ":"));
Enumeration em = ((OptionHandler)m_ASEval).listOptions();
while (em.hasMoreElements()) {
newVector.addElement(em.nextElement());
}
}
return newVector.elements();
}
/**
* Parses a given list of options.
*
* Valid options are:<p>
*
* -R <race type><br>
* 0 = forward, 1 = backward, 2 = schemata, 3 = rank. <p>
*
* -L <significance level> <br>
* significance level to use for t-tests. <p>
*
* -T <threshold> <br>
* threshold for considering mean errors of two subsets the same <p>
*
* -F <xval type> <br>
* 0 = 10 fold, 1 = leave-one-out (selected automatically for schemata race
* <p>
*
* -A <attribute evaluator> <br>
* the attribute evaluator to use when doing a rank search <p>
*
* -Q <br>
* produce a ranked list of attributes. Selecting this option forces
* the race type to be forward. Racing continues until *all* attributes
* have been selected, thus producing a ranked list of attributes. <p>
*
* -N <number to retain> <br>
* Specify the number of attributes to retain. Overides any threshold.
* Use in conjunction with -Q. <p>
*
* -J <threshold> <br>
* Specify a threshold by which the AttributeSelection module can discard
* attributes. Use in conjunction with -Q. <p>
*
* -Z <br>
* Turn on verbose output for monitoring the search <p>
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*
**/
public void setOptions (String[] options)
throws Exception {
String optionString;
resetOptions();
optionString = Utils.getOption('R', options);
if (optionString.length() != 0) {
setRaceType(new SelectedTag(Integer.parseInt(optionString),
TAGS_SELECTION));
}
optionString = Utils.getOption('F', options);
if (optionString.length() != 0) {
setFoldsType(new SelectedTag(Integer.parseInt(optionString),
XVALTAGS_SELECTION));
}
optionString = Utils.getOption('L', options);
if (optionString.length() !=0) {
Double temp;
temp = Double.valueOf(optionString);
setSignificanceLevel(temp.doubleValue());
}
optionString = Utils.getOption('T', options);
if (optionString.length() !=0) {
Double temp;
temp = Double.valueOf(optionString);
setThreshold(temp.doubleValue());
}
optionString = Utils.getOption('A', options);
if (optionString.length() != 0) {
setAttributeEvaluator(ASEvaluation.forName(optionString,
Utils.partitionOptions(options)));
}
setGenerateRanking(Utils.getFlag('Q', options));
optionString = Utils.getOption('T', options);
if (optionString.length() != 0) {
Double temp;
temp = Double.valueOf(optionString);
setThreshold(temp.doubleValue());
}
optionString = Utils.getOption('N', options);
if (optionString.length() != 0) {
setNumToSelect(Integer.parseInt(optionString));
}
setDebug(Utils.getFlag('Z', options));
}
/**
* Gets the current settings of BestFirst.
* @return an array of strings suitable for passing to setOptions()
*/
public String[] getOptions () {
int current = 0;
String[] evaluatorOptions = new String[0];
if ((m_ASEval != null) &&
(m_ASEval instanceof OptionHandler)) {
evaluatorOptions = ((OptionHandler)m_ASEval).getOptions();
}
String[] options = new String[17+evaluatorOptions.length];
options[current++] = "-R"; options[current++] = ""+m_raceType;
options[current++] = "-L"; options[current++] = ""+getSignificanceLevel();
options[current++] = "-T"; options[current++] = ""+getThreshold();
options[current++] = "-F"; options[current++] = ""+m_xvalType;
if (getGenerateRanking()) {
options[current++] = "-Q";
}
options[current++] = "-N"; options[current++] = ""+getNumToSelect();
options[current++] = "-J"; options[current++] = ""+getSelectionThreshold();
if (getDebug()) {
options[current++] = "-Z";
}
if (getAttributeEvaluator() != null) {
options[current++] = "-A";
options[current++] = getAttributeEvaluator().getClass().getName();
options[current++] = "--";
System.arraycopy(evaluatorOptions, 0, options, current,
evaluatorOptions.length);
current += evaluatorOptions.length;
}
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Searches the attribute subset space by racing cross validation
* errors of competing subsets
*
* @param ASEvaluator the attribute evaluator to guide the search
* @param data the training instances.
* @return an array (not necessarily ordered) of selected attribute indexes
* @exception Exception if the search can't be completed
*/
public int[] search (ASEvaluation ASEval, Instances data)
throws Exception {
if (!(ASEval instanceof SubsetEvaluator)) {
throw new Exception(ASEval.getClass().getName()
+ " is not a "
+ "Subset evaluator! (RaceSearch)");
}
if (ASEval instanceof UnsupervisedSubsetEvaluator) {
throw new Exception("Can't use an unsupervised subset evaluator "
+"(RaceSearch).");
}
if (!(ASEval instanceof HoldOutSubsetEvaluator)) {
throw new Exception("Must use a HoldOutSubsetEvaluator, eg. "
+"weka.attributeSelection.ClassifierSubsetEval "
+"(RaceSearch)");
}
if (!(ASEval instanceof ErrorBasedMeritEvaluator)) {
throw new Exception("Only error based subset evaluators can be used, "
+"eg. weka.attributeSelection.ClassifierSubsetEval "
+"(RaceSearch)");
}
m_Instances = data;
m_Instances.deleteWithMissingClass();
if (m_Instances.numInstances() == 0) {
throw new Exception("All instances have missing class! (RaceSearch)");
}
if (m_rankingRequested && m_numToSelect > m_Instances.numAttributes()-1) {
throw new Exception("More attributes requested than exist in the data "
+"(RaceSearch).");
}
m_theEvaluator = (HoldOutSubsetEvaluator)ASEval;
m_numAttribs = m_Instances.numAttributes();
m_classIndex = m_Instances.classIndex();
if (m_rankingRequested) {
m_rankedAtts = new double[m_numAttribs-1][2];
m_rankedSoFar = 0;
}
if (m_xvalType == LEAVE_ONE_OUT) {
m_numFolds = data.numInstances();
} else {
m_numFolds = 10;
}
Random random = new Random(1); // I guess this should really be a parameter?
data.randomize(random);
int [] bestSubset=null;
switch (m_raceType) {
case FORWARD_RACE:
case BACKWARD_RACE:
bestSubset = hillclimbRace(data, random);
break;
case SCHEMATA_RACE:
bestSubset = schemataRace(data, random);
break;
case RANK_RACE:
bestSubset = rankRace(data, random);
break;
}
return bestSubset;
}
public double [][] rankedAttributes() throws Exception {
if (!m_rankingRequested) {
throw new Exception("Need to request a ranked list of attributes "
+"before attributes can be ranked (RaceSearch).");
}
if (m_rankedAtts == null) {
throw new Exception("Search must be performed before attributes "
+"can be ranked (RaceSearch).");
}
double [][] final_rank = new double [m_rankedSoFar][2];
for (int i=0;i<m_rankedSoFar;i++) {
final_rank[i][0] = m_rankedAtts[i][0];
final_rank[i][1] = m_rankedAtts[i][1];
}
if (m_numToSelect <= 0) {
if (m_threshold == -Double.MAX_VALUE) {
m_calculatedNumToSelect = final_rank.length;
} else {
determineNumToSelectFromThreshold(final_rank);
}
}
return final_rank;
}
private void determineNumToSelectFromThreshold(double [][] ranking) {
int count = 0;
for (int i = 0; i < ranking.length; i++) {
if (ranking[i][1] > m_threshold) {
count++;
}
}
m_calculatedNumToSelect = count;
}
/**
* Print an attribute set.
*/
private String printSets(char [][]raceSets) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -