bvdecomposesegcvsub.java

来自「Java 编写的多种数据挖掘算法 包括聚类、分类、预处理等」· Java 代码 · 共 1,109 行 · 第 1/3 页

JAVA
1,109
字号
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    BVDecomposeSegCVSub.java *    Copyright (C) 2003 Paul Conilione * *    Based on the class: BVDecompose.java by Len Trigg (1999) *//* *    DEDICATION * *    Paul Conilione would like to express his deep gratitude and appreciation *    to his Chinese Buddhist Taoist Master Sifu Chow Yuk Nen for the abilities *    and insight that he has been taught, which have allowed him to program in  *    a clear and efficient manner. * *    Master Sifu Chow Yuk Nen's Teachings are unique and precious. They are *    applicable to any field of human endeavour. Through his unique and powerful *    ability to skilfully apply Chinese Buddhist Teachings, people have achieved *    success in; Computing, chemical engineering, business, accounting, philosophy *    and more. * */package weka.classifiers;import weka.core.Attribute;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.TechnicalInformation;import weka.core.TechnicalInformation.Type;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import java.io.BufferedReader;import java.io.FileReader;import java.io.Reader;import java.util.Enumeration;import java.util.Random;import java.util.Vector;/** <!-- globalinfo-start --> * This class performs Bias-Variance decomposion on any classifier using the sub-sampled cross-validation procedure as specified in (1).<br/> * The Kohavi and Wolpert definition of bias and variance is specified in (2).<br/> * The Webb definition of bias and variance is specified in (3).<br/> * <br/> * Geoffrey I. Webb, Paul Conilione (2002). Estimating bias and variance from data. School of Computer Science and Software Engineering, Victoria, Australia.<br/> * <br/> * Ron Kohavi, David H. Wolpert: Bias Plus Variance Decomposition for Zero-One Loss Functions. In: Machine Learning: Proceedings of the Thirteenth International Conference, 275-283, 1996.<br/> * <br/> * Geoffrey I. Webb (2000). MultiBoosting: A Technique for Combining Boosting and Wagging. Machine Learning. 40(2):159-196. * <p/> <!-- globalinfo-end --> *  <!-- technical-bibtex-start --> * BibTeX: * <pre> * &#64;misc{Webb2002, *    address = {School of Computer Science and Software Engineering, Victoria, Australia}, *    author = {Geoffrey I. Webb and Paul Conilione}, *    institution = {Monash University}, *    title = {Estimating bias and variance from data}, *    year = {2002}, *    PDF = {http://www.csse.monash.edu.au/~webb/Files/WebbConilione04.pdf} * } *  * &#64;inproceedings{Kohavi1996, *    author = {Ron Kohavi and David H. Wolpert}, *    booktitle = {Machine Learning: Proceedings of the Thirteenth International Conference}, *    editor = {Lorenza Saitta}, *    pages = {275-283}, *    publisher = {Morgan Kaufmann}, *    title = {Bias Plus Variance Decomposition for Zero-One Loss Functions}, *    year = {1996}, *    PS = {http://robotics.stanford.edu/~ronnyk/biasVar.ps} * } *  * &#64;article{Webb2000, *    author = {Geoffrey I. Webb}, *    journal = {Machine Learning}, *    number = {2}, *    pages = {159-196}, *    title = {MultiBoosting: A Technique for Combining Boosting and Wagging}, *    volume = {40}, *    year = {2000} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -c &lt;class index&gt; *  The index of the class attribute. *  (default last)</pre> *  * <pre> -D *  Turn on debugging output.</pre> *  * <pre> -l &lt;num&gt; *  The number of times each instance is classified. *  (default 10)</pre> *  * <pre> -p &lt;proportion of objects in common&gt; *  The average proportion of instances common between any two training sets</pre> *  * <pre> -s &lt;seed&gt; *  The random number seed used.</pre> *  * <pre> -t &lt;name of arff file&gt; *  The name of the arff file used for the decomposition.</pre> *  * <pre> -T &lt;number of instances in training set&gt; *  The number of instances in the training set.</pre> *  * <pre> -W &lt;classifier class name&gt; *  Full class name of the learner used in the decomposition. *  eg: weka.classifiers.bayes.NaiveBayes</pre> *  * <pre>  * Options specific to learner weka.classifiers.rules.ZeroR: * </pre> *  * <pre> -D *  If set, classifier is run in debug mode and *  may output additional info to the console</pre> *  <!-- options-end --> * * Options after -- are passed to the designated sub-learner. <p> * * @author Paul Conilione (paulc4321@yahoo.com.au) * @version $Revision: 1.5 $ */public class BVDecomposeSegCVSub    implements OptionHandler, TechnicalInformationHandler {        /** Debugging mode, gives extra output if true. */    protected boolean m_Debug;        /** An instantiated base classifier used for getting and testing options. */    protected Classifier m_Classifier = new weka.classifiers.rules.ZeroR();        /** The options to be passed to the base classifier. */    protected String [] m_ClassifierOptions;        /** The number of times an instance is classified*/    protected int m_ClassifyIterations;        /** The name of the data file used for the decomposition */    protected String m_DataFileName;        /** The index of the class attribute */    protected int m_ClassIndex = -1;        /** The random number seed */    protected int m_Seed = 1;        /** The calculated Kohavi & Wolpert bias (squared) */    protected double m_KWBias;        /** The calculated Kohavi & Wolpert variance */    protected double m_KWVariance;        /** The calculated Kohavi & Wolpert sigma */    protected double m_KWSigma;        /** The calculated Webb bias */    protected double m_WBias;        /** The calculated Webb variance */    protected double m_WVariance;        /** The error rate */    protected double m_Error;        /** The training set size */    protected int m_TrainSize;        /** Proportion of instances common between any two training sets. */    protected double m_P;        /**     * Returns a string describing this object     * @return a description of the classifier suitable for     * displaying in the explorer/experimenter gui     */    public String globalInfo() {      return           "This class performs Bias-Variance decomposion on any classifier using the "        + "sub-sampled cross-validation procedure as specified in (1).\n"        + "The Kohavi and Wolpert definition of bias and variance is specified in (2).\n"        + "The Webb definition of bias and variance is specified in (3).\n\n"        + getTechnicalInformation().toString();    }    /**     * Returns an instance of a TechnicalInformation object, containing      * detailed information about the technical background of this class,     * e.g., paper reference or book this class is based on.     *      * @return the technical information about this class     */    public TechnicalInformation getTechnicalInformation() {      TechnicalInformation 	result;      TechnicalInformation 	additional;            result = new TechnicalInformation(Type.MISC);      result.setValue(Field.AUTHOR, "Geoffrey I. Webb and Paul Conilione");      result.setValue(Field.YEAR, "2002");      result.setValue(Field.TITLE, "Estimating bias and variance from data");      result.setValue(Field.INSTITUTION, "Monash University");      result.setValue(Field.ADDRESS, "School of Computer Science and Software Engineering, Victoria, Australia");      result.setValue(Field.PDF, "http://www.csse.monash.edu.au/~webb/Files/WebbConilione04.pdf");      additional = result.add(Type.INPROCEEDINGS);      additional.setValue(Field.AUTHOR, "Ron Kohavi and David H. Wolpert");      additional.setValue(Field.YEAR, "1996");      additional.setValue(Field.TITLE, "Bias Plus Variance Decomposition for Zero-One Loss Functions");      additional.setValue(Field.BOOKTITLE, "Machine Learning: Proceedings of the Thirteenth International Conference");      additional.setValue(Field.PUBLISHER, "Morgan Kaufmann");      additional.setValue(Field.EDITOR, "Lorenza Saitta");      additional.setValue(Field.PAGES, "275-283");      additional.setValue(Field.PS, "http://robotics.stanford.edu/~ronnyk/biasVar.ps");      additional = result.add(Type.ARTICLE);      additional.setValue(Field.AUTHOR, "Geoffrey I. Webb");      additional.setValue(Field.YEAR, "2000");      additional.setValue(Field.TITLE, "MultiBoosting: A Technique for Combining Boosting and Wagging");      additional.setValue(Field.JOURNAL, "Machine Learning");      additional.setValue(Field.VOLUME, "40");      additional.setValue(Field.NUMBER, "2");      additional.setValue(Field.PAGES, "159-196");      return result;    }        /**     * Returns an enumeration describing the available options.     *     * @return an enumeration of all the available options.     */    public Enumeration listOptions() {                Vector newVector = new Vector(8);                newVector.addElement(new Option(        "\tThe index of the class attribute.\n"+        "\t(default last)",        "c", 1, "-c <class index>"));        newVector.addElement(new Option(        "\tTurn on debugging output.",        "D", 0, "-D"));        newVector.addElement(new Option(        "\tThe number of times each instance is classified.\n"        +"\t(default 10)",        "l", 1, "-l <num>"));        newVector.addElement(new Option(        "\tThe average proportion of instances common between any two training sets",        "p", 1, "-p <proportion of objects in common>"));        newVector.addElement(new Option(        "\tThe random number seed used.",        "s", 1, "-s <seed>"));        newVector.addElement(new Option(        "\tThe name of the arff file used for the decomposition.",        "t", 1, "-t <name of arff file>"));        newVector.addElement(new Option(        "\tThe number of instances in the training set.",        "T", 1, "-T <number of instances in training set>"));        newVector.addElement(new Option(        "\tFull class name of the learner used in the decomposition.\n"        +"\teg: weka.classifiers.bayes.NaiveBayes",        "W", 1, "-W <classifier class name>"));                if ((m_Classifier != null) &&        (m_Classifier instanceof OptionHandler)) {            newVector.addElement(new Option(            "",            "", 0, "\nOptions specific to learner "            + m_Classifier.getClass().getName()            + ":"));            Enumeration enu = ((OptionHandler)m_Classifier).listOptions();            while (enu.hasMoreElements()) {                newVector.addElement(enu.nextElement());            }        }        return newVector.elements();    }            /**      * Sets the OptionHandler's options using the given list. All options     * will be set (or reset) during this call (i.e. incremental setting     * of options is not possible). <p/>     *     <!-- options-start -->     * Valid options are: <p/>     *      * <pre> -c &lt;class index&gt;     *  The index of the class attribute.     *  (default last)</pre>     *      * <pre> -D     *  Turn on debugging output.</pre>     *      * <pre> -l &lt;num&gt;     *  The number of times each instance is classified.     *  (default 10)</pre>     *      * <pre> -p &lt;proportion of objects in common&gt;     *  The average proportion of instances common between any two training sets</pre>     *      * <pre> -s &lt;seed&gt;     *  The random number seed used.</pre>     *      * <pre> -t &lt;name of arff file&gt;     *  The name of the arff file used for the decomposition.</pre>     *      * <pre> -T &lt;number of instances in training set&gt;     *  The number of instances in the training set.</pre>     *      * <pre> -W &lt;classifier class name&gt;     *  Full class name of the learner used in the decomposition.     *  eg: weka.classifiers.bayes.NaiveBayes</pre>     *      * <pre>      * Options specific to learner weka.classifiers.rules.ZeroR:     * </pre>     *      * <pre> -D     *  If set, classifier is run in debug mode and     *  may output additional info to the console</pre>     *      <!-- options-end -->     *     * @param options the list of options as an array of strings     * @throws Exception if an option is not supported     */    public void setOptions(String[] options) throws Exception {        setDebug(Utils.getFlag('D', options));                String classIndex = Utils.getOption('c', options);        if (classIndex.length() != 0) {            if (classIndex.toLowerCase().equals("last")) {                setClassIndex(0);            } else if (classIndex.toLowerCase().equals("first")) {                setClassIndex(1);            } else {                setClassIndex(Integer.parseInt(classIndex));

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?