⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 metacost.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    MetaCost.java *    Copyright (C) 2002 University of Waikato * */package weka.classifiers.meta;import weka.classifiers.Evaluation;import weka.classifiers.Classifier;import weka.classifiers.CostMatrix;import weka.classifiers.rules.ZeroR;import java.io.*;import java.util.*;import weka.core.*;import weka.filters.Filter;/** * This metaclassifier makes its base classifier cost-sensitive using the * method specified in <p> * * Pedro Domingos (1999). <i>MetaCost: A general method for making classifiers * cost-sensitive</i>, Proceedings of the Fifth International Conference on  * Knowledge Discovery and Data Mining, pp. 155-164. Also available online at * <a href="http://www.cs.washington.edu/homes/pedrod/kdd99.ps.gz"> * http://www.cs.washington.edu/homes/pedrod/kdd99.ps.gz</a>. <p> * * This classifier should produce similar results to one created by * passing the base learner to Bagging, which is in turn passed to a * CostSensitiveClassifier operating on minimum expected cost. The difference * is that MetaCost produces a single cost-sensitive classifier of the * base learner, giving the benefits of fast classification and interpretable * output (if the base learner itself is interpretable). This implementation  * uses all bagging iterations when reclassifying training data (the MetaCost * paper reports a marginal improvement when only those iterations containing * each training instance are used in reclassifying that instance). <p> * * Valid options are:<p> * * -W classname <br> * Specify the full class name of a classifier (required).<p> * * -C cost file <br> * File name of a cost matrix to use. If this is not supplied, a cost * matrix will be loaded on demand. The name of the on-demand file * is the relation name of the training data plus ".cost", and the * path to the on-demand file is specified with the -D option.<p> * * -D directory <br> * Name of a directory to search for cost files when loading costs on demand * (default current directory). <p> * * -I num <br> * Set the number of bagging iterations (default 10). <p> * * -S seed <br> * Random number seed used when reweighting by resampling (default 1).<p> * * -P num <br> * Size of each bag, as a percentage of the training size (default 100). <p> * * Options after -- are passed to the designated classifier.<p> * * @author Len Trigg (len@reeltwo.com) * @version $Revision: 1.1.1.1 $  */public class MetaCost extends Classifier  implements OptionHandler {  /* Specify possible sources of the cost matrix */  public static final int MATRIX_ON_DEMAND = 1;  public static final int MATRIX_SUPPLIED = 2;  public static final Tag [] TAGS_MATRIX_SOURCE = {    new Tag(MATRIX_ON_DEMAND, "Load cost matrix on demand"),    new Tag(MATRIX_SUPPLIED, "Use explicit cost matrix")  };  /** Indicates the current cost matrix source */  protected int m_MatrixSource = MATRIX_ON_DEMAND;  /**    * The directory used when loading cost files on demand, null indicates   * current directory    */  protected File m_OnDemandDirectory = new File(System.getProperty("user.dir"));  /** The name of the cost file, for command line options */  protected String m_CostFile;  /** The classifier */  protected Classifier m_Classifier = new weka.classifiers.rules.ZeroR();  /** The cost matrix */  protected CostMatrix m_CostMatrix = new CostMatrix(1);  /** The number of iterations. */  protected int m_NumIterations = 10;  /** Seed for reweighting using resampling. */  protected int m_Seed = 1;  /** The size of each bag sample, as a percentage of the training size */  protected int m_BagSizePercent = 100;  /**   * Returns an enumeration describing the available options.   *   * @return an enumeration of all the available options.   */  public Enumeration listOptions() {    Vector newVector = new Vector(6);    newVector.addElement(new Option(	      "\tNumber of bagging iterations.\n"	      + "\t(default 10)",	      "I", 1, "-I <num>"));    newVector.addElement(new Option(	      "\tFull class name of classifier to use. (required)\n"	      + "\teg: weka.classifiers.bayes.NaiveBayes",	      "W", 1, "-W <class name>"));    newVector.addElement(new Option(	      "\tFile name of a cost matrix to use. If this is not supplied,\n"              +"\ta cost matrix will be loaded on demand. The name of the\n"              +"\ton-demand file is the relation name of the training data\n"              +"\tplus \".cost\", and the path to the on-demand file is\n"              +"\tspecified with the -D option.",	      "C", 1, "-C <cost file name>"));    newVector.addElement(new Option(              "\tName of a directory to search for cost files when loading\n"              +"\tcosts on demand (default current directory).",              "D", 1, "-D <directory>"));    newVector.addElement(new Option(	      "\tSeed used when reweighting via resampling. (Default 1)",	      "S", 1, "-S <num>"));    newVector.addElement(new Option(              "\tSize of each bag, as a percentage of the\n"               + "\ttraining set size. (default 100)",              "P", 1, "-P"));    return newVector.elements();  }  /**   * Parses a given list of options. Valid options are:<p>   *   * -W classname <br>   * Specify the full class name of a classifier (required).<p>   *   * -C cost file <br>   * File name of a cost matrix to use. If this is not supplied, a cost   * matrix will be loaded on demand. The name of the on-demand file   * is the relation name of the training data plus ".cost", and the   * path to the on-demand file is specified with the -D option.<p>   *   * -D directory <br>   * Name of a directory to search for cost files when loading costs on demand   * (default current directory). <p>   *   * -I num <br>   * Set the number of bagging iterations (default 10). <p>   *   * -S seed <br>   * Random number seed used when reweighting by resampling (default 1).<p>   *   * -P num <br>   * Size of each bag, as a percentage of the training size (default 100). <p>   *   * Options after -- are passed to the designated classifier.<p>   *   * @param options the list of options as an array of strings   * @exception Exception if an option is not supported   */  public void setOptions(String[] options) throws Exception {    String bagIterations = Utils.getOption('I', options);    if (bagIterations.length() != 0) {      setNumIterations(Integer.parseInt(bagIterations));    } else {      setNumIterations(10);    }    String seedString = Utils.getOption('S', options);    if (seedString.length() != 0) {      setSeed(Integer.parseInt(seedString));    } else {      setSeed(1);    }    String bagSize = Utils.getOption('P', options);    if (bagSize.length() != 0) {      setBagSizePercent(Integer.parseInt(bagSize));    } else {      setBagSizePercent(100);    }    String classifierName = Utils.getOption('W', options);    if (classifierName.length() == 0) {      throw new Exception("A classifier must be specified with"			  + " the -W option.");    }    setClassifier(Classifier.forName(classifierName,				     Utils.partitionOptions(options)));    String costFile = Utils.getOption('C', options);    if (costFile.length() != 0) {      setCostMatrix(new CostMatrix(new BufferedReader(                                   new FileReader(costFile))));      setCostMatrixSource(new SelectedTag(MATRIX_SUPPLIED,                                          TAGS_MATRIX_SOURCE));      m_CostFile = costFile;    } else {      setCostMatrixSource(new SelectedTag(MATRIX_ON_DEMAND,                                           TAGS_MATRIX_SOURCE));    }        String demandDir = Utils.getOption('D', options);    if (demandDir.length() != 0) {      setOnDemandDirectory(new File(demandDir));    }  }  /**   * Gets the current settings of the Classifier.   *   * @return an array of strings suitable for passing to setOptions   */  public String [] getOptions() {    String [] classifierOptions = new String [0];    if ((m_Classifier != null) && 	(m_Classifier instanceof OptionHandler)) {      classifierOptions = ((OptionHandler)m_Classifier).getOptions();    }    String [] options = new String [classifierOptions.length + 12];    int current = 0;    if (m_MatrixSource == MATRIX_SUPPLIED) {      if (m_CostFile != null) {        options[current++] = "-C";        options[current++] = "" + m_CostFile;      }    } else {      options[current++] = "-D";      options[current++] = "" + getOnDemandDirectory();    }    options[current++] = "-I"; options[current++] = "" + getNumIterations();    options[current++] = "-S"; options[current++] = "" + getSeed();    options[current++] = "-P"; options[current++] = "" + getBagSizePercent();    if (getClassifier() != null) {      options[current++] = "-W";      options[current++] = getClassifier().getClass().getName();    }    options[current++] = "--";    System.arraycopy(classifierOptions, 0, options, current, 		     classifierOptions.length);    current += classifierOptions.length;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -