📄 metacost.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * MetaCost.java * Copyright (C) 2002 University of Waikato * */package weka.classifiers.meta;import weka.classifiers.Classifier;import weka.classifiers.CostMatrix;import weka.classifiers.RandomizableSingleClassifierEnhancer;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.SelectedTag;import weka.core.Tag;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import java.io.BufferedReader;import java.io.File;import java.io.FileReader;import java.io.StringReader;import java.io.StringWriter;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * This metaclassifier makes its base classifier cost-sensitive using the method specified in<br/> * <br/> * Pedro Domingos: MetaCost: A general method for making classifiers cost-sensitive. In: Fifth International Conference on Knowledge Discovery and Data Mining, 155-164, 1999.<br/> * <br/> * This classifier should produce similar results to one created by passing the base learner to Bagging, which is in turn passed to a CostSensitiveClassifier operating on minimum expected cost. The difference is that MetaCost produces a single cost-sensitive classifier of the base learner, giving the benefits of fast classification and interpretable output (if the base learner itself is interpretable). This implementation uses all bagging iterations when reclassifying training data (the MetaCost paper reports a marginal improvement when only those iterations containing each training instance are used in reclassifying that instance). * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * @inproceedings{Domingos1999, * author = {Pedro Domingos}, * booktitle = {Fifth International Conference on Knowledge Discovery and Data Mining}, * pages = {155-164}, * title = {MetaCost: A general method for making classifiers cost-sensitive}, * year = {1999} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -I <num> * Number of bagging iterations. * (default 10)</pre> * * <pre> -C <cost file name> * File name of a cost matrix to use. If this is not supplied, * a cost matrix will be loaded on demand. The name of the * on-demand file is the relation name of the training data * plus ".cost", and the path to the on-demand file is * specified with the -N option.</pre> * * <pre> -N <directory> * Name of a directory to search for cost files when loading * costs on demand (default current directory).</pre> * * <pre> -cost-matrix <matrix> * The cost matrix in Matlab single line format.</pre> * * <pre> -P * Size of each bag, as a percentage of the * training set size. (default 100)</pre> * * <pre> -S <num> * Random number seed. * (default 1)</pre> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * * <pre> -W * Full name of base classifier. * (default: weka.classifiers.rules.ZeroR)</pre> * * <pre> * Options specific to classifier weka.classifiers.rules.ZeroR: * </pre> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * <!-- options-end --> * * Options after -- are passed to the designated classifier.<p> * * @author Len Trigg (len@reeltwo.com) * @version $Revision: 1.22 $ */public class MetaCost extends RandomizableSingleClassifierEnhancer implements TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = 1205317833344726855L; /** load cost matrix on demand */ public static final int MATRIX_ON_DEMAND = 1; /** use explicit matrix */ public static final int MATRIX_SUPPLIED = 2; /** Specify possible sources of the cost matrix */ public static final Tag [] TAGS_MATRIX_SOURCE = { new Tag(MATRIX_ON_DEMAND, "Load cost matrix on demand"), new Tag(MATRIX_SUPPLIED, "Use explicit cost matrix") }; /** Indicates the current cost matrix source */ protected int m_MatrixSource = MATRIX_ON_DEMAND; /** * The directory used when loading cost files on demand, null indicates * current directory */ protected File m_OnDemandDirectory = new File(System.getProperty("user.dir")); /** The name of the cost file, for command line options */ protected String m_CostFile; /** The cost matrix */ protected CostMatrix m_CostMatrix = new CostMatrix(1); /** The number of iterations. */ protected int m_NumIterations = 10; /** The size of each bag sample, as a percentage of the training size */ protected int m_BagSizePercent = 100; /** * Returns a string describing classifier * @return a description suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "This metaclassifier makes its base classifier cost-sensitive using the " + "method specified in\n\n" + getTechnicalInformation().toString() + "\n\n" + "This classifier should produce similar results to one created by " + "passing the base learner to Bagging, which is in turn passed to a " + "CostSensitiveClassifier operating on minimum expected cost. The difference " + "is that MetaCost produces a single cost-sensitive classifier of the " + "base learner, giving the benefits of fast classification and interpretable " + "output (if the base learner itself is interpretable). This implementation " + "uses all bagging iterations when reclassifying training data (the MetaCost " + "paper reports a marginal improvement when only those iterations containing " + "each training instance are used in reclassifying that instance)."; } /** * Returns an instance of a TechnicalInformation object, containing * detailed information about the technical background of this class, * e.g., paper reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.INPROCEEDINGS); result.setValue(Field.AUTHOR, "Pedro Domingos"); result.setValue(Field.TITLE, "MetaCost: A general method for making classifiers cost-sensitive"); result.setValue(Field.BOOKTITLE, "Fifth International Conference on Knowledge Discovery and Data Mining"); result.setValue(Field.YEAR, "1999"); result.setValue(Field.PAGES, "155-164"); return result; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector newVector = new Vector(6); newVector.addElement(new Option( "\tNumber of bagging iterations.\n" + "\t(default 10)", "I", 1, "-I <num>")); newVector.addElement(new Option( "\tFile name of a cost matrix to use. If this is not supplied,\n" +"\ta cost matrix will be loaded on demand. The name of the\n" +"\ton-demand file is the relation name of the training data\n" +"\tplus \".cost\", and the path to the on-demand file is\n" +"\tspecified with the -N option.", "C", 1, "-C <cost file name>")); newVector.addElement(new Option( "\tName of a directory to search for cost files when loading\n" +"\tcosts on demand (default current directory).", "N", 1, "-N <directory>")); newVector.addElement(new Option( "\tThe cost matrix in Matlab single line format.", "cost-matrix", 1, "-cost-matrix <matrix>")); newVector.addElement(new Option( "\tSize of each bag, as a percentage of the\n" + "\ttraining set size. (default 100)", "P", 1, "-P")); Enumeration enu = super.listOptions(); while (enu.hasMoreElements()) { newVector.addElement(enu.nextElement()); } return newVector.elements(); } /** * Parses a given list of options. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -I <num> * Number of bagging iterations. * (default 10)</pre> * * <pre> -C <cost file name> * File name of a cost matrix to use. If this is not supplied, * a cost matrix will be loaded on demand. The name of the * on-demand file is the relation name of the training data * plus ".cost", and the path to the on-demand file is * specified with the -N option.</pre> * * <pre> -N <directory> * Name of a directory to search for cost files when loading * costs on demand (default current directory).</pre> * * <pre> -cost-matrix <matrix> * The cost matrix in Matlab single line format.</pre> * * <pre> -P * Size of each bag, as a percentage of the * training set size. (default 100)</pre> * * <pre> -S <num> * Random number seed. * (default 1)</pre> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * * <pre> -W * Full name of base classifier. * (default: weka.classifiers.rules.ZeroR)</pre> * * <pre> * Options specific to classifier weka.classifiers.rules.ZeroR: * </pre> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * <!-- options-end --> * * Options after -- are passed to the designated classifier.<p> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String bagIterations = Utils.getOption('I', options); if (bagIterations.length() != 0) { setNumIterations(Integer.parseInt(bagIterations)); } else { setNumIterations(10); } String bagSize = Utils.getOption('P', options); if (bagSize.length() != 0) { setBagSizePercent(Integer.parseInt(bagSize)); } else { setBagSizePercent(100); } String costFile = Utils.getOption('C', options); if (costFile.length() != 0) { setCostMatrix(new CostMatrix(new BufferedReader( new FileReader(costFile)))); setCostMatrixSource(new SelectedTag(MATRIX_SUPPLIED, TAGS_MATRIX_SOURCE)); m_CostFile = costFile; } else { setCostMatrixSource(new SelectedTag(MATRIX_ON_DEMAND, TAGS_MATRIX_SOURCE)); } String demandDir = Utils.getOption('N', options); if (demandDir.length() != 0) { setOnDemandDirectory(new File(demandDir));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -