bvdecomposesegcvsub.java
来自「Java 编写的多种数据挖掘算法 包括聚类、分类、预处理等」· Java 代码 · 共 1,109 行 · 第 1/3 页
JAVA
1,109 行
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * BVDecomposeSegCVSub.java * Copyright (C) 2003 Paul Conilione * * Based on the class: BVDecompose.java by Len Trigg (1999) *//* * DEDICATION * * Paul Conilione would like to express his deep gratitude and appreciation * to his Chinese Buddhist Taoist Master Sifu Chow Yuk Nen for the abilities * and insight that he has been taught, which have allowed him to program in * a clear and efficient manner. * * Master Sifu Chow Yuk Nen's Teachings are unique and precious. They are * applicable to any field of human endeavour. Through his unique and powerful * ability to skilfully apply Chinese Buddhist Teachings, people have achieved * success in; Computing, chemical engineering, business, accounting, philosophy * and more. * */package weka.classifiers;import weka.core.Attribute;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.TechnicalInformation;import weka.core.TechnicalInformation.Type;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import java.io.BufferedReader;import java.io.FileReader;import java.io.Reader;import java.util.Enumeration;import java.util.Random;import java.util.Vector;/** <!-- globalinfo-start --> * This class performs Bias-Variance decomposion on any classifier using the sub-sampled cross-validation procedure as specified in (1).<br/> * The Kohavi and Wolpert definition of bias and variance is specified in (2).<br/> * The Webb definition of bias and variance is specified in (3).<br/> * <br/> * Geoffrey I. Webb, Paul Conilione (2002). Estimating bias and variance from data. School of Computer Science and Software Engineering, Victoria, Australia.<br/> * <br/> * Ron Kohavi, David H. Wolpert: Bias Plus Variance Decomposition for Zero-One Loss Functions. In: Machine Learning: Proceedings of the Thirteenth International Conference, 275-283, 1996.<br/> * <br/> * Geoffrey I. Webb (2000). MultiBoosting: A Technique for Combining Boosting and Wagging. Machine Learning. 40(2):159-196. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * @misc{Webb2002, * address = {School of Computer Science and Software Engineering, Victoria, Australia}, * author = {Geoffrey I. Webb and Paul Conilione}, * institution = {Monash University}, * title = {Estimating bias and variance from data}, * year = {2002}, * PDF = {http://www.csse.monash.edu.au/~webb/Files/WebbConilione04.pdf} * } * * @inproceedings{Kohavi1996, * author = {Ron Kohavi and David H. Wolpert}, * booktitle = {Machine Learning: Proceedings of the Thirteenth International Conference}, * editor = {Lorenza Saitta}, * pages = {275-283}, * publisher = {Morgan Kaufmann}, * title = {Bias Plus Variance Decomposition for Zero-One Loss Functions}, * year = {1996}, * PS = {http://robotics.stanford.edu/~ronnyk/biasVar.ps} * } * * @article{Webb2000, * author = {Geoffrey I. Webb}, * journal = {Machine Learning}, * number = {2}, * pages = {159-196}, * title = {MultiBoosting: A Technique for Combining Boosting and Wagging}, * volume = {40}, * year = {2000} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -c <class index> * The index of the class attribute. * (default last)</pre> * * <pre> -D * Turn on debugging output.</pre> * * <pre> -l <num> * The number of times each instance is classified. * (default 10)</pre> * * <pre> -p <proportion of objects in common> * The average proportion of instances common between any two training sets</pre> * * <pre> -s <seed> * The random number seed used.</pre> * * <pre> -t <name of arff file> * The name of the arff file used for the decomposition.</pre> * * <pre> -T <number of instances in training set> * The number of instances in the training set.</pre> * * <pre> -W <classifier class name> * Full class name of the learner used in the decomposition. * eg: weka.classifiers.bayes.NaiveBayes</pre> * * <pre> * Options specific to learner weka.classifiers.rules.ZeroR: * </pre> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * <!-- options-end --> * * Options after -- are passed to the designated sub-learner. <p> * * @author Paul Conilione (paulc4321@yahoo.com.au) * @version $Revision: 1.5 $ */public class BVDecomposeSegCVSub implements OptionHandler, TechnicalInformationHandler { /** Debugging mode, gives extra output if true. */ protected boolean m_Debug; /** An instantiated base classifier used for getting and testing options. */ protected Classifier m_Classifier = new weka.classifiers.rules.ZeroR(); /** The options to be passed to the base classifier. */ protected String [] m_ClassifierOptions; /** The number of times an instance is classified*/ protected int m_ClassifyIterations; /** The name of the data file used for the decomposition */ protected String m_DataFileName; /** The index of the class attribute */ protected int m_ClassIndex = -1; /** The random number seed */ protected int m_Seed = 1; /** The calculated Kohavi & Wolpert bias (squared) */ protected double m_KWBias; /** The calculated Kohavi & Wolpert variance */ protected double m_KWVariance; /** The calculated Kohavi & Wolpert sigma */ protected double m_KWSigma; /** The calculated Webb bias */ protected double m_WBias; /** The calculated Webb variance */ protected double m_WVariance; /** The error rate */ protected double m_Error; /** The training set size */ protected int m_TrainSize; /** Proportion of instances common between any two training sets. */ protected double m_P; /** * Returns a string describing this object * @return a description of the classifier suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "This class performs Bias-Variance decomposion on any classifier using the " + "sub-sampled cross-validation procedure as specified in (1).\n" + "The Kohavi and Wolpert definition of bias and variance is specified in (2).\n" + "The Webb definition of bias and variance is specified in (3).\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing * detailed information about the technical background of this class, * e.g., paper reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; TechnicalInformation additional; result = new TechnicalInformation(Type.MISC); result.setValue(Field.AUTHOR, "Geoffrey I. Webb and Paul Conilione"); result.setValue(Field.YEAR, "2002"); result.setValue(Field.TITLE, "Estimating bias and variance from data"); result.setValue(Field.INSTITUTION, "Monash University"); result.setValue(Field.ADDRESS, "School of Computer Science and Software Engineering, Victoria, Australia"); result.setValue(Field.PDF, "http://www.csse.monash.edu.au/~webb/Files/WebbConilione04.pdf"); additional = result.add(Type.INPROCEEDINGS); additional.setValue(Field.AUTHOR, "Ron Kohavi and David H. Wolpert"); additional.setValue(Field.YEAR, "1996"); additional.setValue(Field.TITLE, "Bias Plus Variance Decomposition for Zero-One Loss Functions"); additional.setValue(Field.BOOKTITLE, "Machine Learning: Proceedings of the Thirteenth International Conference"); additional.setValue(Field.PUBLISHER, "Morgan Kaufmann"); additional.setValue(Field.EDITOR, "Lorenza Saitta"); additional.setValue(Field.PAGES, "275-283"); additional.setValue(Field.PS, "http://robotics.stanford.edu/~ronnyk/biasVar.ps"); additional = result.add(Type.ARTICLE); additional.setValue(Field.AUTHOR, "Geoffrey I. Webb"); additional.setValue(Field.YEAR, "2000"); additional.setValue(Field.TITLE, "MultiBoosting: A Technique for Combining Boosting and Wagging"); additional.setValue(Field.JOURNAL, "Machine Learning"); additional.setValue(Field.VOLUME, "40"); additional.setValue(Field.NUMBER, "2"); additional.setValue(Field.PAGES, "159-196"); return result; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector newVector = new Vector(8); newVector.addElement(new Option( "\tThe index of the class attribute.\n"+ "\t(default last)", "c", 1, "-c <class index>")); newVector.addElement(new Option( "\tTurn on debugging output.", "D", 0, "-D")); newVector.addElement(new Option( "\tThe number of times each instance is classified.\n" +"\t(default 10)", "l", 1, "-l <num>")); newVector.addElement(new Option( "\tThe average proportion of instances common between any two training sets", "p", 1, "-p <proportion of objects in common>")); newVector.addElement(new Option( "\tThe random number seed used.", "s", 1, "-s <seed>")); newVector.addElement(new Option( "\tThe name of the arff file used for the decomposition.", "t", 1, "-t <name of arff file>")); newVector.addElement(new Option( "\tThe number of instances in the training set.", "T", 1, "-T <number of instances in training set>")); newVector.addElement(new Option( "\tFull class name of the learner used in the decomposition.\n" +"\teg: weka.classifiers.bayes.NaiveBayes", "W", 1, "-W <classifier class name>")); if ((m_Classifier != null) && (m_Classifier instanceof OptionHandler)) { newVector.addElement(new Option( "", "", 0, "\nOptions specific to learner " + m_Classifier.getClass().getName() + ":")); Enumeration enu = ((OptionHandler)m_Classifier).listOptions(); while (enu.hasMoreElements()) { newVector.addElement(enu.nextElement()); } } return newVector.elements(); } /** * Sets the OptionHandler's options using the given list. All options * will be set (or reset) during this call (i.e. incremental setting * of options is not possible). <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -c <class index> * The index of the class attribute. * (default last)</pre> * * <pre> -D * Turn on debugging output.</pre> * * <pre> -l <num> * The number of times each instance is classified. * (default 10)</pre> * * <pre> -p <proportion of objects in common> * The average proportion of instances common between any two training sets</pre> * * <pre> -s <seed> * The random number seed used.</pre> * * <pre> -t <name of arff file> * The name of the arff file used for the decomposition.</pre> * * <pre> -T <number of instances in training set> * The number of instances in the training set.</pre> * * <pre> -W <classifier class name> * Full class name of the learner used in the decomposition. * eg: weka.classifiers.bayes.NaiveBayes</pre> * * <pre> * Options specific to learner weka.classifiers.rules.ZeroR: * </pre> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { setDebug(Utils.getFlag('D', options)); String classIndex = Utils.getOption('c', options); if (classIndex.length() != 0) { if (classIndex.toLowerCase().equals("last")) { setClassIndex(0); } else if (classIndex.toLowerCase().equals("first")) { setClassIndex(1); } else { setClassIndex(Integer.parseInt(classIndex));
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?