⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 bvdecomposesegcvsub.java

📁 MacroWeka扩展了著名数据挖掘工具weka
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    BVDecomposeSegCVSub.java
 *    Copyright (C) 2003 Paul Conilione
 *
 *    Based on the class: BVDecompose.java by Len Trigg (1999)
 */


/*
 *    DEDICATION
 *
 *    Paul Conilione would like to express his deep gratitude and appreciation
 *    to his Chinese Buddhist Taoist Master Sifu Chow Yuk Nen for the abilities
 *    and insight that he has been taught, which have allowed him to program in 
 *    a clear and efficient manner.
 *
 *    Master Sifu Chow Yuk Nen's Teachings are unique and precious. They are
 *    applicable to any field of human endeavour. Through his unique and powerful
 *    ability to skilfully apply Chinese Buddhist Teachings, people have achieved
 *    success in; Computing, chemical engineering, business, accounting, philosophy
 *    and more.
 *
 */

package weka.classifiers;

import weka.classifiers.bayes.NaiveBayes;
import weka.classifiers.rules.ZeroR;
import weka.classifiers.Classifier;
import java.io.*;
import java.util.*;
import weka.core.*;

/**
 * This class performs Bias-Variance decomposion on any classifier using the
 * sub-sampled cross-validation procedure as specified in:<p>
 *
 * Geoffrey I. Webb & Paul Conilione (2002), <i> Estimating bias and variance
 * from data </i>, School of Computer Science and Software Engineering,
 * Monash University, Australia <p>
 *
 *
 * The Kohavi and Wolpert definition of bias and variance is specified in:<p>
 * R. Kohavi & D. Wolpert (1996), <i>Bias plus variance decomposition for
 * zero-one loss functions</i>, in Proc. of the Thirteenth International
 * Machine Learning Conference (ICML96)
 * <a href="http://robotics.stanford.edu/~ronnyk/biasVar.ps">
 * download postscript</a>.<p>
 *
 * The Webb definition of bias and variance is specified in:<p>
 * Geoffrey I. Webb (2000), <i> MultiBoosting: A Technique for Combining
 * Boosting and Wagging</i>, Machine Learning, 40(2), pages 159-196<p>
 *
 * Valid options are:<p>
 *
 * -c num <br>
 * Specify the index of the class attribute (default last).<p>
 *
 * -D <br>
 * Turn on debugging output.<p>
 *
 * -l num <br>
 * Set the number times each instance is to be classified (default 10). <p>
 *
 * -p num <br>
 * Set the proportion of instances that are the same between any two
 * training sets. Training set size/(Dataset size - 1) < num < 1.0
 * (Default is Training set size/(Dataset size - 1) ) <p>
 *
 * -s num <br>
 * Set the seed for the dataset randomisation (default 1). <p>
 *
 * -t filename <br>
 * Set the arff file to use for the decomposition (required).<p>
 *
 * -T num <br>
 * Set the size of the training sets. Must be greater than 0 and
 * less size of the dataset. (default half of dataset size) <p>
 *
 * -W classname <br>
 * Specify the full class name of a learner to perform the
 * decomposition on (required).<p>
 *
 * Options after -- are passed to the designated sub-learner. <p>
 *
 * @author Paul Conilione (paulc4321@yahoo.com.au)
 * @version $Revision: 1.1 $
 */

public class BVDecomposeSegCVSub implements OptionHandler {
    
    /** Debugging mode, gives extra output if true. */
    protected boolean m_Debug;
    
    /** An instantiated base classifier used for getting and testing options. */
    protected Classifier m_Classifier = new weka.classifiers.rules.ZeroR();
    
    /** The options to be passed to the base classifier. */
    protected String [] m_ClassifierOptions;
    
    /** The number of times an instance is classified*/
    protected int m_ClassifyIterations;
    
    /** The name of the data file used for the decomposition */
    protected String m_DataFileName;
    
    /** The index of the class attribute */
    protected int m_ClassIndex = -1;
    
    /** The random number seed */
    protected int m_Seed = 1;
    
    /** The calculated Kohavi & Wolpert bias (squared) */
    protected double m_KWBias;
    
    /** The calculated Kohavi & Wolpert variance */
    protected double m_KWVariance;
    
    /** The calculated Kohavi & Wolpert sigma */
    protected double m_KWSigma;
    
    /** The calculated Webb bias */
    protected double m_WBias;
    
    /** The calculated Webb variance */
    protected double m_WVariance;
    
    /** The error rate */
    protected double m_Error;
    
    /** The training set size */
    protected int m_TrainSize;
    
    /** Proportion of instances common between any two training sets. */
    protected double m_P;
    
    /**
     * Returns an enumeration describing the available options.
     *
     * @return an enumeration of all the available options.
     */
    public Enumeration listOptions() {
        
        Vector newVector = new Vector(8);
        
        newVector.addElement(new Option(
        "\tThe index of the class attribute.\n"+
        "\t(default last)",
        "c", 1, "-c <class index>"));
        newVector.addElement(new Option(
        "\tTurn on debugging output.",
        "D", 0, "-D"));
        newVector.addElement(new Option(
        "\tThe number of times each instance is classified.\n"
        +"\t(default 10)",
        "l", 1, "-l <num>"));
        newVector.addElement(new Option(
        "\tThe average proportion of instances common between any two training sets\n",
        "p", 1, "-p <proportion of objects in common>"));
        newVector.addElement(new Option(
        "\tThe random number seed used.",
        "s", 1, "-s <seed>"));
        newVector.addElement(new Option(
        "\tThe name of the arff file used for the decomposition.",
        "t", 1, "-t <name of arff file>"));
        newVector.addElement(new Option(
        "\tThe number of instances in the training set.",
        "T", 1, "-T <number of instances in training set>"));
        newVector.addElement(new Option(
        "\tFull class name of the learner used in the decomposition.\n"
        +"\teg: weka.classifiers.bayes.NaiveBayes",
        "W", 1, "-W <classifier class name>"));
        
        if ((m_Classifier != null) &&
        (m_Classifier instanceof OptionHandler)) {
            newVector.addElement(new Option(
            "",
            "", 0, "\nOptions specific to learner "
            + m_Classifier.getClass().getName()
            + ":"));
            Enumeration enu = ((OptionHandler)m_Classifier).listOptions();
            while (enu.hasMoreElements()) {
                newVector.addElement(enu.nextElement());
            }
        }
        return newVector.elements();
    }
    
    
    /** Sets the OptionHandler's options using the given list. All options
     * will be set (or reset) during this call (i.e. incremental setting
     * of options is not possible).
     *
     * @param options the list of options as an array of strings
     *
     * @exception Exception if an option is not supported
     */
    public void setOptions(String[] options) throws Exception {
        
        /**
         * Parses a given list of options. Valid options are:<p>
         *
         * -c num <br>
         * Specify the index of the class attribute (default last).<p>
         *
         * -D <br>
         * Turn on debugging output.<p>
         *
         * -l num <br>
         * Set the number times each instance is to be classified (default 10). <p>
         *
         * -p num <br>
         * Set the proportion of instances that are the same between any two
         * training sets. Training set size/(Dataset size - 1) < num < 1.0
         * (Default Training set size/(Dataset size - 1)) <p>
         *
         * -s num <br>
         * Set the seed for the dataset randomisation (default 1). <p>
         *
         * -t filename <br>
         * Set the arff file to use for the decomposition (required).<p>
         *
         * -T num <br>
         * Set the size of the training sets. Must be greater than 0 and
         * less size of the dataset. (default half of dataset size) <p>
         *
         * -W classname <br>
         * Specify the full class name of a learner to perform the
         * decomposition on (required).<p>
         *
         * Options after -- are passed to the designated sub-learner. <p>
         *
         * @param options the list of options as an array of strings
         * @exception Exception if an option is not supported
         */
        
        setDebug(Utils.getFlag('D', options));
        
        String classIndex = Utils.getOption('c', options);
        if (classIndex.length() != 0) {
            if (classIndex.toLowerCase().equals("last")) {
                setClassIndex(0);
            } else if (classIndex.toLowerCase().equals("first")) {
                setClassIndex(1);
            } else {
                setClassIndex(Integer.parseInt(classIndex));
            }
        } else {
            setClassIndex(0);
        }
        
        String classifyIterations = Utils.getOption('l', options);
        if (classifyIterations.length() != 0) {
            setClassifyIterations(Integer.parseInt(classifyIterations));
        } else {
            setClassifyIterations(10);
        }
        
        String prob = Utils.getOption('p', options);
        if (prob.length() != 0) {
            setP( Double.parseDouble(prob));
        } else {
            setP(-1);
        }
        //throw new Exception("A proportion must be specified" + " with a -p option.");
        
        String seedString = Utils.getOption('s', options);
        if (seedString.length() != 0) {
            setSeed(Integer.parseInt(seedString));
        } else {
            setSeed(1);
        }
        
        String dataFile = Utils.getOption('t', options);
        if (dataFile.length() != 0) {
            setDataFileName(dataFile);
        } else {
            throw new Exception("An arff file must be specified"
            + " with the -t option.");
        }
        
        String trainSize = Utils.getOption('T', options);
        if (trainSize.length() != 0) {
            setTrainSize(Integer.parseInt(trainSize));
        } else {
            setTrainSize(-1);
        }
        //throw new Exception("A training set size must be specified" + " with a -T option.");
        
        String classifierName = Utils.getOption('W', options);
        if (classifierName.length() != 0) {
            setClassifier(Classifier.forName(classifierName, Utils.partitionOptions(options)));
        } else {
            throw new Exception("A learner must be specified with the -W option.");
        }
    }
    
    /**
     * Gets the current settings of the CheckClassifier.
     *
     * @return an array of strings suitable for passing to setOptions
     */
    public String [] getOptions() {
        
        String [] classifierOptions = new String [0];
        if ((m_Classifier != null) &&
        (m_Classifier instanceof OptionHandler)) {
            classifierOptions = ((OptionHandler)m_Classifier).getOptions();
        }
        String [] options = new String [classifierOptions.length + 14];
        int current = 0;
        if (getDebug()) {
            options[current++] = "-D";
        }
        options[current++] = "-c"; options[current++] = "" + getClassIndex();
        options[current++] = "-l"; options[current++] = "" + getClassifyIterations();
        options[current++] = "-p"; options[current++] = "" + getP();
        options[current++] = "-s"; options[current++] = "" + getSeed();

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -