⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 subsetsizeforwardselection.java

📁 这是关于数据挖掘的一些算法
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    SubsetSizeForwardSelection.java *    Copyright (C) 2007 Martin Gütlein * */package weka.attributeSelection;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.SelectedTag;import weka.core.Tag;import weka.core.Utils;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import java.util.BitSet;import java.util.Enumeration;import java.util.Random;import java.util.Vector;/** <!-- globalinfo-start --> * SubsetSizeForwardSelection :<br/> * Class for performing a subset size forward selection * <p> <!-- globalinfo-end --> * <!-- options-start --> * Valid options are: * <p> * * <pre> -I * Perform initial ranking to select top-ranked attributes.</pre> * * <pre> -K &lt;num&gt; * Number of top-ranked attributes that are taken into account.</pre> * * <pre> -T &lt;0 = fixed-set | 1 = fixed-width&gt; * Type of Linear Forward Selection (default = 0).</pre> * * <pre> -S &lt;num&gt; * Size of lookup cache for evaluated subsets. Expressed as a multiple of the * number of attributes in the data set. (default = 1).</pre> * * <pre> -E &lt;string&gt; * class name of subset evaluator to use for subset size determination (default = * null, same subset evaluator as for ranking and final forward selection is * used). Place any evaluator options LAST on the command line following a "--". * eg. -A weka.attributeSelection.ClassifierSubsetEval ... -- -M<pre> * * <pre> -F &lt;num&gt; * Number of cross validation folds for subset size determination (default = 5).</pre> * * <pre> -R &lt;num&gt; * Seed for cross validation subset size determination. (default = 1)</pre> * * <pre> -Z * verbose on/off.</pre> * <!-- options-end --> * * @author Martin Guetlein (martin.guetlein@gmail.com) * @version $Revision: 1.1 $ */public class SubsetSizeForwardSelection extends ASSearch  implements OptionHandler {  /** search directions */  protected static final int TYPE_FIXED_SET = 0;  protected static final int TYPE_FIXED_WIDTH = 1;  public static final Tag[] TAGS_TYPE = {    new Tag(TYPE_FIXED_SET, "Fixed-set"),    new Tag(TYPE_FIXED_WIDTH, "Fixed-width"),  };  // member variables  /** perform initial ranking to select top-ranked attributes */  protected boolean m_performRanking;  /**   * number of top-ranked attributes that are taken into account for the   * search   */  protected int m_numUsedAttributes;  /** 0 == fixed-set, 1 == fixed-width */  protected int m_linearSelectionType;  /** the subset evaluator to use for subset size determination */  private SubsetEvaluator m_setSizeEval;  /**   * Number of cross validation folds for subset size determination (default =   * 5).   */  protected int m_numFolds;  /** Seed for cross validation subset size determination. (default = 1) */  protected int m_seed;  /** number of attributes in the data */  protected int m_numAttribs;  /** total number of subsets evaluated during a search */  protected int m_totalEvals;  /** for debugging */  protected boolean m_verbose;  /** holds the merit of the best subset found */  protected double m_bestMerit;  /** holds the maximum size of the lookup cache for evaluated subsets */  protected int m_cacheSize;  /**   * Constructor   */  public SubsetSizeForwardSelection() {    resetOptions();  }  /**   * Returns a string describing this search method   *   * @return a description of the search method suitable for displaying in the   *         explorer/experimenter gui   */  public String globalInfo() {    return "SubsetSizeForwardSelection:\n\n" +      "Extension of LinearForwardSelection. The search performs an interior " +      "cross-validation (seed and number of folds can be specified). A " +      "LinearForwardSelection is performed on each foldto determine the optimal " +      "subset-size (using the given SubsetSizeEvaluator). Finally, a " +      "LinearForwardSelection up to the optimal subset-size is performed on " +      "the whole data.\n\n"      + "For more information see:\n\n"      + getTechnicalInformation().toString();  }  /**   * Returns an instance of a TechnicalInformation object, containing    * detailed information about the technical background of this class,   * e.g., paper reference or book this class is based on.   *    * @return the technical information about this class   */  public TechnicalInformation getTechnicalInformation() {    TechnicalInformation        result;        result = new TechnicalInformation(Type.MASTERSTHESIS);    result.setValue(Field.AUTHOR, "Martin Guetlein");    result.setValue(Field.YEAR, "2006");    result.setValue(Field.TITLE, "Large Scale Attribute Selection Using Wrappers");    result.setValue(Field.SCHOOL, "Albert-Ludwigs-Universitat");    result.setValue(Field.ADDRESS, "Freiburg, Germany");        return result;  }  /**   * Returns an enumeration describing the available options.   *   * @return an enumeration of all the available options.   *   */  public Enumeration listOptions() {    Vector newVector = new Vector(9);    newVector.addElement(new Option("\tPerform initial ranking to select the" +                                    "\n\ttop-ranked attributes.", "I", 0, "-I"));    newVector.addElement(new Option(                                    "\tNumber of top-ranked attributes that are " +                                    "\n\ttaken into account by the search.", "K", 1, "-K <num>"));    newVector.addElement(new Option(                                    "\tType of Linear Forward Selection (default = 0).", "T", 1,                                    "-T <0 = fixed-set | 1 = fixed-width>"));    newVector.addElement(new Option(                                    "\tSize of lookup cache for evaluated subsets." +                                    "\n\tExpressed as a multiple of the number of" +                                    "\n\tattributes in the data set. (default = 1)", "S", 1, "-S <num>"));    newVector.addElement(new Option(                                    "\tSubset-evaluator used for subset-size determination." + "-- -M",                                    "E", 1, "-E <subset evaluator>"));    newVector.addElement(new Option("\tNumber of cross validation folds" +                                    "\n\tfor subset size determination (default = 5).", "F", 1, "-F <num>"));    newVector.addElement(new Option("\tSeed for cross validation" +                                    "\n\tsubset size determination. (default = 1)", "R", 1, "-R <num>"));    newVector.addElement(new Option("\tverbose on/off", "Z", 0, "-Z"));    if ((m_setSizeEval != null) && (m_setSizeEval instanceof OptionHandler)) {      newVector.addElement(new Option("", "", 0,                                      "\nOptions specific to " + "evaluator " +                                      m_setSizeEval.getClass().getName() + ":"));      Enumeration enu = ((OptionHandler) m_setSizeEval).listOptions();      while (enu.hasMoreElements()) {        newVector.addElement(enu.nextElement());      }    }    return newVector.elements();  }  /**   * Parses a given list of options.   *   * Valid options are:   * <p>   *   * -I <br>   * Perform initial ranking to select top-ranked attributes.   * <p>   *   * -K <num> <br>   * Number of top-ranked attributes that are taken into account.   * <p>   *   * -T <0 = fixed-set | 1 = fixed-width> <br>   * Typ of Linear Forward Selection (default = 0).   * <p>   *   * -S <num> <br>   * Size of lookup cache for evaluated subsets. Expressed as a multiple of   * the number of attributes in the data set. (default = 1).   * <p>   *   * -E <string> <br>   * class name of subset evaluator to use for subset size determination   * (default = null, same subset evaluator as for ranking and final forward   * selection is used). Place any evaluator options LAST on the command line   * following a "--". eg. -A weka.attributeSelection.ClassifierSubsetEval ... --   * -M   *   * </pre>   *   * -F <num> <br>   * Number of cross validation folds for subset size determination (default =   * 5).   * <p>   *   * -R <num> <br>   * Seed for cross validation subset size determination. (default = 1)   * <p>   *   * -Z <br>   * verbose on/off.   * <p>   *   * @param options   *            the list of options as an array of strings   * @exception Exception   *                if an option is not supported   *   */  public void setOptions(String[] options) throws Exception {    String optionString;    resetOptions();    setPerformRanking(Utils.getFlag('I', options));    optionString = Utils.getOption('K', options);    if (optionString.length() != 0) {      setNumUsedAttributes(Integer.parseInt(optionString));    }    optionString = Utils.getOption('T', options);    if (optionString.length() != 0) {      setType(new SelectedTag(Integer.parseInt(optionString), TAGS_TYPE));    } else {      setType(new SelectedTag(TYPE_FIXED_SET, TAGS_TYPE));    }    optionString = Utils.getOption('S', options);    if (optionString.length() != 0) {      setLookupCacheSize(Integer.parseInt(optionString));    }    optionString = Utils.getOption('E', options);    if (optionString.length() == 0) {      System.out.println(                         "No subset size evaluator given, using evaluator that is used for final search.");      m_setSizeEval = null;    } else {      setSubsetSizeEvaluator(ASEvaluation.forName(optionString,                                                  Utils.partitionOptions(options)));    }    optionString = Utils.getOption('F', options);    if (optionString.length() != 0) {      setNumSubsetSizeCVFolds(Integer.parseInt(optionString));    }    optionString = Utils.getOption('R', options);    if (optionString.length() != 0) {      setSeed(Integer.parseInt(optionString));    }    m_verbose = Utils.getFlag('Z', options);  }  /**   * Set the maximum size of the evaluated subset cache (hashtable). This is   * expressed as a multiplier for the number of attributes in the data set.   * (default = 1).   *   * @param size   *            the maximum size of the hashtable   */  public void setLookupCacheSize(int size) {    if (size >= 0) {      m_cacheSize = size;    }  }  /**   * Return the maximum size of the evaluated subset cache (expressed as a   * multiplier for the number of attributes in a data set.   *   * @return the maximum size of the hashtable.   */  public int getLookupCacheSize() {    return m_cacheSize;  }  /**   * Returns the tip text for this property   *   * @return tip text for this property suitable for displaying in the   *         explorer/experimenter gui   */  public String lookupCacheSizeTipText() {    return "Set the maximum size of the lookup cache of evaluated subsets. This is " +      "expressed as a multiplier of the number of attributes in the data set. " +      "(default = 1).";  }  /**   * Returns the tip text for this property   *   * @return tip text for this property suitable for displaying in the   *         explorer/experimenter gui   */  public String performRankingTipText() {    return "Perform initial ranking to select top-ranked attributes.";  }  /**   * Perform initial ranking to select top-ranked attributes.   *   * @param b   *            true if initial ranking should be performed   */  public void setPerformRanking(boolean b) {    m_performRanking = b;  }  /**   * Get boolean if initial ranking should be performed to select the   * top-ranked attributes   *   * @return true if initial ranking should be performed   */  public boolean getPerformRanking() {    return m_performRanking;  }  /**   * Returns the tip text for this property   *   * @return tip text for this property suitable for displaying in the   *         explorer/experimenter gui   */  public String numUsedAttributesTipText() {    return "Set the amount of top-ranked attributes that are taken into account by the search process.";  }  /**   * Set the number of top-ranked attributes that taken into account by the   * search process.   *   * @param k   *            the number of attributes   * @exception Exception   *                if k is less than 2   */  public void setNumUsedAttributes(int k) throws Exception {    if (k < 2) {      throw new Exception("Value of -K must be >= 2.");    }    m_numUsedAttributes = k;  }  /**   * Get the number of top-ranked attributes that taken into account by the   * search process.   *   * @return the number of top-ranked attributes that taken into account   */  public int getNumUsedAttributes() {    return m_numUsedAttributes;  }  /**   * Returns the tip text for this property   *   * @return tip text for this property suitable for displaying in the   *         explorer/experimenter gui   */  public String typeTipText() {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -