wrappersubseteval.java

来自「Java 编写的多种数据挖掘算法 包括聚类、分类、预处理等」· Java 代码 · 共 657 行 · 第 1/2 页

JAVA
657
字号
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    WrapperSubsetEval.java *    Copyright (C) 1999 Mark Hall * */package weka.attributeSelection;import weka.classifiers.Classifier;import weka.classifiers.Evaluation;import weka.classifiers.rules.ZeroR;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.TechnicalInformation;import weka.core.TechnicalInformation.Type;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformationHandler;import weka.core.UnsupportedAttributeTypeException;import weka.core.Utils;import weka.filters.Filter;import weka.filters.unsupervised.attribute.Remove;import java.util.BitSet;import java.util.Enumeration;import java.util.Random;import java.util.Vector;/**  <!-- globalinfo-start --> * WrapperSubsetEval:<br/> * <br/> * Evaluates attribute sets by using a learning scheme. Cross validation is used to estimate the accuracy of the learning scheme for a set of attributes.<br/> * <br/> * For more information see:<br/> * <br/> * Ron Kohavi, George H. John (1997). Wrappers for feature subset selection. Artificial Intelligence. 97(1-2):273-324. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * &#64;article{Kohavi1997, *    author = {Ron Kohavi and George H. John}, *    journal = {Artificial Intelligence}, *    note = {Special issue on relevance}, *    number = {1-2}, *    pages = {273-324}, *    title = {Wrappers for feature subset selection}, *    volume = {97}, *    year = {1997}, *    ISSN = {0004-3702} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -B &lt;base learner&gt; *  class name of base learner to use for *  accuracy estimation. Place any *  classifier options LAST on the *  command line following a "--". *  eg. -B weka.classifiers.bayes.NaiveBayes ... -- -K</pre> *  * <pre> -F &lt;num&gt; *  number of cross validation folds to use *  for estimating accuracy. *  (default=5)</pre> *  * <pre> -R &lt;seed&gt; *  Seed for cross validation accuracy  *  estimation. *  (default = 1)</pre> *  * <pre> -T &lt;num&gt; *  threshold by which to execute another cross validation *  (standard deviation---expressed as a percentage of the mean). *  (default=0.01(1%))</pre> *  * <pre>  * Options specific to scheme weka.classifiers.rules.ZeroR: * </pre> *  * <pre> -D *  If set, classifier is run in debug mode and *  may output additional info to the console</pre> *  <!-- options-end --> * * @author Mark Hall (mhall@cs.waikato.ac.nz) * @version $Revision: 1.26 $ */public class WrapperSubsetEval  extends SubsetEvaluator  implements OptionHandler, TechnicalInformationHandler {    /** for serialization */  static final long serialVersionUID = -4573057658746728675L;  /** training instances */  private Instances m_trainInstances;  /** class index */  private int m_classIndex;  /** number of attributes in the training data */  private int m_numAttribs;  /** number of instances in the training data */  private int m_numInstances;  /** holds an evaluation object */  private Evaluation m_Evaluation;  /** holds the base classifier object */  private Classifier m_BaseClassifier;  /** number of folds to use for cross validation */  private int m_folds;  /** random number seed */  private int m_seed;  /**    * the threshold by which to do further cross validations when   * estimating the accuracy of a subset   */  private double m_threshold;  /**   * Returns a string describing this attribute evaluator   * @return a description of the evaluator suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return "WrapperSubsetEval:\n\n"      +"Evaluates attribute sets by using a learning scheme. Cross "      +"validation is used to estimate the accuracy of the learning "      +"scheme for a set of attributes.\n\n"      + "For more information see:\n\n"      + getTechnicalInformation().toString();  }  /**   * Returns an instance of a TechnicalInformation object, containing    * detailed information about the technical background of this class,   * e.g., paper reference or book this class is based on.   *    * @return the technical information about this class   */  public TechnicalInformation getTechnicalInformation() {    TechnicalInformation 	result;        result = new TechnicalInformation(Type.ARTICLE);    result.setValue(Field.AUTHOR, "Ron Kohavi and George H. John");    result.setValue(Field.YEAR, "1997");    result.setValue(Field.TITLE, "Wrappers for feature subset selection");    result.setValue(Field.JOURNAL, "Artificial Intelligence");    result.setValue(Field.VOLUME, "97");    result.setValue(Field.NUMBER, "1-2");    result.setValue(Field.PAGES, "273-324");    result.setValue(Field.NOTE, "Special issue on relevance");    result.setValue(Field.ISSN, "0004-3702");        return result;  }  /**   * Constructor. Calls restOptions to set default options   **/  public WrapperSubsetEval () {    resetOptions();  }  /**   * Returns an enumeration describing the available options.   * @return an enumeration of all the available options.   **/  public Enumeration listOptions () {    Vector newVector = new Vector(4);    newVector.addElement(new Option("\tclass name of base learner to use for" 				    + "\n\taccuracy estimation. Place any" 				    + "\n\tclassifier options LAST on the" 				    + "\n\tcommand line following a \"--\"." 				    + "\n\teg. -B weka.classifiers.bayes.NaiveBayes ... " 				    + "-- -K", "B", 1, "-B <base learner>"));    newVector.addElement(new Option("\tnumber of cross validation folds to " 				    + "use\n\tfor estimating accuracy." 				    + "\n\t(default=5)", "F", 1, "-F <num>"));    newVector.addElement(new Option("\tSeed for cross validation accuracy "				    +"\n\testimation."				    +"\n\t(default = 1)", "R", 1,"-R <seed>"));    newVector.addElement(new Option("\tthreshold by which to execute " 				    + "another cross validation" 				    + "\n\t(standard deviation---" 				    + "expressed as a percentage of the " 				    + "mean).\n\t(default=0.01(1%))"				    , "T", 1, "-T <num>"));    if ((m_BaseClassifier != null) && 	(m_BaseClassifier instanceof OptionHandler)) {      newVector.addElement(new Option("", "", 0, "\nOptions specific to scheme " 				      + m_BaseClassifier.getClass().getName() 				      + ":"));      Enumeration enu = ((OptionHandler)m_BaseClassifier).listOptions();      while (enu.hasMoreElements()) {        newVector.addElement(enu.nextElement());      }    }    return  newVector.elements();  }  /**   * Parses a given list of options. <p/>   *   <!-- options-start -->   * Valid options are: <p/>   *    * <pre> -B &lt;base learner&gt;   *  class name of base learner to use for   *  accuracy estimation. Place any   *  classifier options LAST on the   *  command line following a "--".   *  eg. -B weka.classifiers.bayes.NaiveBayes ... -- -K</pre>   *    * <pre> -F &lt;num&gt;   *  number of cross validation folds to use   *  for estimating accuracy.   *  (default=5)</pre>   *    * <pre> -R &lt;seed&gt;   *  Seed for cross validation accuracy    *  estimation.   *  (default = 1)</pre>   *    * <pre> -T &lt;num&gt;   *  threshold by which to execute another cross validation   *  (standard deviation---expressed as a percentage of the mean).   *  (default=0.01(1%))</pre>   *    * <pre>    * Options specific to scheme weka.classifiers.rules.ZeroR:   * </pre>   *    * <pre> -D   *  If set, classifier is run in debug mode and   *  may output additional info to the console</pre>   *    <!-- options-end -->   *   * @param options the list of options as an array of strings   * @throws Exception if an option is not supported   */  public void setOptions (String[] options)    throws Exception {    String optionString;    resetOptions();    optionString = Utils.getOption('B', options);    if (optionString.length() == 0) {      throw  new Exception("A learning scheme must be specified with" 			   + "-B option");    }    setClassifier(Classifier.forName(optionString, 				     Utils.partitionOptions(options)));    optionString = Utils.getOption('F', options);    if (optionString.length() != 0) {      setFolds(Integer.parseInt(optionString));    }    optionString = Utils.getOption('R', options);    if (optionString.length() != 0) {      setSeed(Integer.parseInt(optionString));    }    //       optionString = Utils.getOption('S',options);    //       if (optionString.length() != 0)    //         {    //  	 seed = Integer.parseInt(optionString);    //         }    optionString = Utils.getOption('T', options);    if (optionString.length() != 0) {      Double temp;      temp = Double.valueOf(optionString);      setThreshold(temp.doubleValue());    }  }    /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String thresholdTipText() {    return "Repeat xval if stdev of mean exceeds this value.";  }  /**   * Set the value of the threshold for repeating cross validation   *   * @param t the value of the threshold   */  public void setThreshold (double t) {    m_threshold = t;  }  /**   * Get the value of the threshold   *

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?