📄 subsetsizeforwardselection.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * SubsetSizeForwardSelection.java * Copyright (C) 2007 Martin Gütlein * */package weka.attributeSelection;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.SelectedTag;import weka.core.Tag;import weka.core.Utils;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import java.util.BitSet;import java.util.Enumeration;import java.util.Random;import java.util.Vector;/** <!-- globalinfo-start --> * SubsetSizeForwardSelection :<br/> * Class for performing a subset size forward selection * <p> <!-- globalinfo-end --> * <!-- options-start --> * Valid options are: * <p> * * <pre> -I * Perform initial ranking to select top-ranked attributes.</pre> * * <pre> -K <num> * Number of top-ranked attributes that are taken into account.</pre> * * <pre> -T <0 = fixed-set | 1 = fixed-width> * Type of Linear Forward Selection (default = 0).</pre> * * <pre> -S <num> * Size of lookup cache for evaluated subsets. Expressed as a multiple of the * number of attributes in the data set. (default = 1).</pre> * * <pre> -E <string> * class name of subset evaluator to use for subset size determination (default = * null, same subset evaluator as for ranking and final forward selection is * used). Place any evaluator options LAST on the command line following a "--". * eg. -A weka.attributeSelection.ClassifierSubsetEval ... -- -M<pre> * * <pre> -F <num> * Number of cross validation folds for subset size determination (default = 5).</pre> * * <pre> -R <num> * Seed for cross validation subset size determination. (default = 1)</pre> * * <pre> -Z * verbose on/off.</pre> * <!-- options-end --> * * @author Martin Guetlein (martin.guetlein@gmail.com) * @version $Revision: 1.1 $ */public class SubsetSizeForwardSelection extends ASSearch implements OptionHandler { /** search directions */ protected static final int TYPE_FIXED_SET = 0; protected static final int TYPE_FIXED_WIDTH = 1; public static final Tag[] TAGS_TYPE = { new Tag(TYPE_FIXED_SET, "Fixed-set"), new Tag(TYPE_FIXED_WIDTH, "Fixed-width"), }; // member variables /** perform initial ranking to select top-ranked attributes */ protected boolean m_performRanking; /** * number of top-ranked attributes that are taken into account for the * search */ protected int m_numUsedAttributes; /** 0 == fixed-set, 1 == fixed-width */ protected int m_linearSelectionType; /** the subset evaluator to use for subset size determination */ private SubsetEvaluator m_setSizeEval; /** * Number of cross validation folds for subset size determination (default = * 5). */ protected int m_numFolds; /** Seed for cross validation subset size determination. (default = 1) */ protected int m_seed; /** number of attributes in the data */ protected int m_numAttribs; /** total number of subsets evaluated during a search */ protected int m_totalEvals; /** for debugging */ protected boolean m_verbose; /** holds the merit of the best subset found */ protected double m_bestMerit; /** holds the maximum size of the lookup cache for evaluated subsets */ protected int m_cacheSize; /** * Constructor */ public SubsetSizeForwardSelection() { resetOptions(); } /** * Returns a string describing this search method * * @return a description of the search method suitable for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "SubsetSizeForwardSelection:\n\n" + "Extension of LinearForwardSelection. The search performs an interior " + "cross-validation (seed and number of folds can be specified). A " + "LinearForwardSelection is performed on each foldto determine the optimal " + "subset-size (using the given SubsetSizeEvaluator). Finally, a " + "LinearForwardSelection up to the optimal subset-size is performed on " + "the whole data.\n\n" + "For more information see:\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing * detailed information about the technical background of this class, * e.g., paper reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.MASTERSTHESIS); result.setValue(Field.AUTHOR, "Martin Guetlein"); result.setValue(Field.YEAR, "2006"); result.setValue(Field.TITLE, "Large Scale Attribute Selection Using Wrappers"); result.setValue(Field.SCHOOL, "Albert-Ludwigs-Universitat"); result.setValue(Field.ADDRESS, "Freiburg, Germany"); return result; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. * */ public Enumeration listOptions() { Vector newVector = new Vector(9); newVector.addElement(new Option("\tPerform initial ranking to select the" + "\n\ttop-ranked attributes.", "I", 0, "-I")); newVector.addElement(new Option( "\tNumber of top-ranked attributes that are " + "\n\ttaken into account by the search.", "K", 1, "-K <num>")); newVector.addElement(new Option( "\tType of Linear Forward Selection (default = 0).", "T", 1, "-T <0 = fixed-set | 1 = fixed-width>")); newVector.addElement(new Option( "\tSize of lookup cache for evaluated subsets." + "\n\tExpressed as a multiple of the number of" + "\n\tattributes in the data set. (default = 1)", "S", 1, "-S <num>")); newVector.addElement(new Option( "\tSubset-evaluator used for subset-size determination." + "-- -M", "E", 1, "-E <subset evaluator>")); newVector.addElement(new Option("\tNumber of cross validation folds" + "\n\tfor subset size determination (default = 5).", "F", 1, "-F <num>")); newVector.addElement(new Option("\tSeed for cross validation" + "\n\tsubset size determination. (default = 1)", "R", 1, "-R <num>")); newVector.addElement(new Option("\tverbose on/off", "Z", 0, "-Z")); if ((m_setSizeEval != null) && (m_setSizeEval instanceof OptionHandler)) { newVector.addElement(new Option("", "", 0, "\nOptions specific to " + "evaluator " + m_setSizeEval.getClass().getName() + ":")); Enumeration enu = ((OptionHandler) m_setSizeEval).listOptions(); while (enu.hasMoreElements()) { newVector.addElement(enu.nextElement()); } } return newVector.elements(); } /** * Parses a given list of options. * * Valid options are: * <p> * * -I <br> * Perform initial ranking to select top-ranked attributes. * <p> * * -K <num> <br> * Number of top-ranked attributes that are taken into account. * <p> * * -T <0 = fixed-set | 1 = fixed-width> <br> * Typ of Linear Forward Selection (default = 0). * <p> * * -S <num> <br> * Size of lookup cache for evaluated subsets. Expressed as a multiple of * the number of attributes in the data set. (default = 1). * <p> * * -E <string> <br> * class name of subset evaluator to use for subset size determination * (default = null, same subset evaluator as for ranking and final forward * selection is used). Place any evaluator options LAST on the command line * following a "--". eg. -A weka.attributeSelection.ClassifierSubsetEval ... -- * -M * * </pre> * * -F <num> <br> * Number of cross validation folds for subset size determination (default = * 5). * <p> * * -R <num> <br> * Seed for cross validation subset size determination. (default = 1) * <p> * * -Z <br> * verbose on/off. * <p> * * @param options * the list of options as an array of strings * @exception Exception * if an option is not supported * */ public void setOptions(String[] options) throws Exception { String optionString; resetOptions(); setPerformRanking(Utils.getFlag('I', options)); optionString = Utils.getOption('K', options); if (optionString.length() != 0) { setNumUsedAttributes(Integer.parseInt(optionString)); } optionString = Utils.getOption('T', options); if (optionString.length() != 0) { setType(new SelectedTag(Integer.parseInt(optionString), TAGS_TYPE)); } else { setType(new SelectedTag(TYPE_FIXED_SET, TAGS_TYPE)); } optionString = Utils.getOption('S', options); if (optionString.length() != 0) { setLookupCacheSize(Integer.parseInt(optionString)); } optionString = Utils.getOption('E', options); if (optionString.length() == 0) { System.out.println( "No subset size evaluator given, using evaluator that is used for final search."); m_setSizeEval = null; } else { setSubsetSizeEvaluator(ASEvaluation.forName(optionString, Utils.partitionOptions(options))); } optionString = Utils.getOption('F', options); if (optionString.length() != 0) { setNumSubsetSizeCVFolds(Integer.parseInt(optionString)); } optionString = Utils.getOption('R', options); if (optionString.length() != 0) { setSeed(Integer.parseInt(optionString)); } m_verbose = Utils.getFlag('Z', options); } /** * Set the maximum size of the evaluated subset cache (hashtable). This is * expressed as a multiplier for the number of attributes in the data set. * (default = 1). * * @param size * the maximum size of the hashtable */ public void setLookupCacheSize(int size) { if (size >= 0) { m_cacheSize = size; } } /** * Return the maximum size of the evaluated subset cache (expressed as a * multiplier for the number of attributes in a data set. * * @return the maximum size of the hashtable. */ public int getLookupCacheSize() { return m_cacheSize; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String lookupCacheSizeTipText() { return "Set the maximum size of the lookup cache of evaluated subsets. This is " + "expressed as a multiplier of the number of attributes in the data set. " + "(default = 1)."; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String performRankingTipText() { return "Perform initial ranking to select top-ranked attributes."; } /** * Perform initial ranking to select top-ranked attributes. * * @param b * true if initial ranking should be performed */ public void setPerformRanking(boolean b) { m_performRanking = b; } /** * Get boolean if initial ranking should be performed to select the * top-ranked attributes * * @return true if initial ranking should be performed */ public boolean getPerformRanking() { return m_performRanking; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String numUsedAttributesTipText() { return "Set the amount of top-ranked attributes that are taken into account by the search process."; } /** * Set the number of top-ranked attributes that taken into account by the * search process. * * @param k * the number of attributes * @exception Exception * if k is less than 2 */ public void setNumUsedAttributes(int k) throws Exception { if (k < 2) { throw new Exception("Value of -K must be >= 2."); } m_numUsedAttributes = k; } /** * Get the number of top-ranked attributes that taken into account by the * search process. * * @return the number of top-ranked attributes that taken into account */ public int getNumUsedAttributes() { return m_numUsedAttributes; } /** * Returns the tip text for this property * * @return tip text for this property suitable for displaying in the * explorer/experimenter gui */ public String typeTipText() {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -