📄 bvdecomposesegcvsub.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* BVDecomposeSegCVSub.java
* Copyright (C) 2003 Paul Conilione
*
* Based on the class: BVDecompose.java by Len Trigg (1999)
*/
/*
* DEDICATION
*
* Paul Conilione would like to express his deep gratitude and appreciation
* to his Chinese Buddhist Taoist Master Sifu Chow Yuk Nen for the abilities
* and insight that he has been taught, which have allowed him to program in
* a clear and efficient manner.
*
* Master Sifu Chow Yuk Nen's Teachings are unique and precious. They are
* applicable to any field of human endeavour. Through his unique and powerful
* ability to skilfully apply Chinese Buddhist Teachings, people have achieved
* success in; Computing, chemical engineering, business, accounting, philosophy
* and more.
*
*/
package weka.classifiers;
import weka.classifiers.bayes.NaiveBayes;
import weka.classifiers.rules.ZeroR;
import weka.classifiers.Classifier;
import java.io.*;
import java.util.*;
import weka.core.*;
/**
* This class performs Bias-Variance decomposion on any classifier using the
* sub-sampled cross-validation procedure as specified in:<p>
*
* Geoffrey I. Webb & Paul Conilione (2002), <i> Estimating bias and variance
* from data </i>, School of Computer Science and Software Engineering,
* Monash University, Australia <p>
*
*
* The Kohavi and Wolpert definition of bias and variance is specified in:<p>
* R. Kohavi & D. Wolpert (1996), <i>Bias plus variance decomposition for
* zero-one loss functions</i>, in Proc. of the Thirteenth International
* Machine Learning Conference (ICML96)
* <a href="http://robotics.stanford.edu/~ronnyk/biasVar.ps">
* download postscript</a>.<p>
*
* The Webb definition of bias and variance is specified in:<p>
* Geoffrey I. Webb (2000), <i> MultiBoosting: A Technique for Combining
* Boosting and Wagging</i>, Machine Learning, 40(2), pages 159-196<p>
*
* Valid options are:<p>
*
* -c num <br>
* Specify the index of the class attribute (default last).<p>
*
* -D <br>
* Turn on debugging output.<p>
*
* -l num <br>
* Set the number times each instance is to be classified (default 10). <p>
*
* -p num <br>
* Set the proportion of instances that are the same between any two
* training sets. Training set size/(Dataset size - 1) < num < 1.0
* (Default is Training set size/(Dataset size - 1) ) <p>
*
* -s num <br>
* Set the seed for the dataset randomisation (default 1). <p>
*
* -t filename <br>
* Set the arff file to use for the decomposition (required).<p>
*
* -T num <br>
* Set the size of the training sets. Must be greater than 0 and
* less size of the dataset. (default half of dataset size) <p>
*
* -W classname <br>
* Specify the full class name of a learner to perform the
* decomposition on (required).<p>
*
* Options after -- are passed to the designated sub-learner. <p>
*
* @author Paul Conilione (paulc4321@yahoo.com.au)
* @version $Revision: 1.1 $
*/
public class BVDecomposeSegCVSub implements OptionHandler {
/** Debugging mode, gives extra output if true. */
protected boolean m_Debug;
/** An instantiated base classifier used for getting and testing options. */
protected Classifier m_Classifier = new weka.classifiers.rules.ZeroR();
/** The options to be passed to the base classifier. */
protected String [] m_ClassifierOptions;
/** The number of times an instance is classified*/
protected int m_ClassifyIterations;
/** The name of the data file used for the decomposition */
protected String m_DataFileName;
/** The index of the class attribute */
protected int m_ClassIndex = -1;
/** The random number seed */
protected int m_Seed = 1;
/** The calculated Kohavi & Wolpert bias (squared) */
protected double m_KWBias;
/** The calculated Kohavi & Wolpert variance */
protected double m_KWVariance;
/** The calculated Kohavi & Wolpert sigma */
protected double m_KWSigma;
/** The calculated Webb bias */
protected double m_WBias;
/** The calculated Webb variance */
protected double m_WVariance;
/** The error rate */
protected double m_Error;
/** The training set size */
protected int m_TrainSize;
/** Proportion of instances common between any two training sets. */
protected double m_P;
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector newVector = new Vector(8);
newVector.addElement(new Option(
"\tThe index of the class attribute.\n"+
"\t(default last)",
"c", 1, "-c <class index>"));
newVector.addElement(new Option(
"\tTurn on debugging output.",
"D", 0, "-D"));
newVector.addElement(new Option(
"\tThe number of times each instance is classified.\n"
+"\t(default 10)",
"l", 1, "-l <num>"));
newVector.addElement(new Option(
"\tThe average proportion of instances common between any two training sets\n",
"p", 1, "-p <proportion of objects in common>"));
newVector.addElement(new Option(
"\tThe random number seed used.",
"s", 1, "-s <seed>"));
newVector.addElement(new Option(
"\tThe name of the arff file used for the decomposition.",
"t", 1, "-t <name of arff file>"));
newVector.addElement(new Option(
"\tThe number of instances in the training set.",
"T", 1, "-T <number of instances in training set>"));
newVector.addElement(new Option(
"\tFull class name of the learner used in the decomposition.\n"
+"\teg: weka.classifiers.bayes.NaiveBayes",
"W", 1, "-W <classifier class name>"));
if ((m_Classifier != null) &&
(m_Classifier instanceof OptionHandler)) {
newVector.addElement(new Option(
"",
"", 0, "\nOptions specific to learner "
+ m_Classifier.getClass().getName()
+ ":"));
Enumeration enu = ((OptionHandler)m_Classifier).listOptions();
while (enu.hasMoreElements()) {
newVector.addElement(enu.nextElement());
}
}
return newVector.elements();
}
/** Sets the OptionHandler's options using the given list. All options
* will be set (or reset) during this call (i.e. incremental setting
* of options is not possible).
*
* @param options the list of options as an array of strings
*
* @exception Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
/**
* Parses a given list of options. Valid options are:<p>
*
* -c num <br>
* Specify the index of the class attribute (default last).<p>
*
* -D <br>
* Turn on debugging output.<p>
*
* -l num <br>
* Set the number times each instance is to be classified (default 10). <p>
*
* -p num <br>
* Set the proportion of instances that are the same between any two
* training sets. Training set size/(Dataset size - 1) < num < 1.0
* (Default Training set size/(Dataset size - 1)) <p>
*
* -s num <br>
* Set the seed for the dataset randomisation (default 1). <p>
*
* -t filename <br>
* Set the arff file to use for the decomposition (required).<p>
*
* -T num <br>
* Set the size of the training sets. Must be greater than 0 and
* less size of the dataset. (default half of dataset size) <p>
*
* -W classname <br>
* Specify the full class name of a learner to perform the
* decomposition on (required).<p>
*
* Options after -- are passed to the designated sub-learner. <p>
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*/
setDebug(Utils.getFlag('D', options));
String classIndex = Utils.getOption('c', options);
if (classIndex.length() != 0) {
if (classIndex.toLowerCase().equals("last")) {
setClassIndex(0);
} else if (classIndex.toLowerCase().equals("first")) {
setClassIndex(1);
} else {
setClassIndex(Integer.parseInt(classIndex));
}
} else {
setClassIndex(0);
}
String classifyIterations = Utils.getOption('l', options);
if (classifyIterations.length() != 0) {
setClassifyIterations(Integer.parseInt(classifyIterations));
} else {
setClassifyIterations(10);
}
String prob = Utils.getOption('p', options);
if (prob.length() != 0) {
setP( Double.parseDouble(prob));
} else {
setP(-1);
}
//throw new Exception("A proportion must be specified" + " with a -p option.");
String seedString = Utils.getOption('s', options);
if (seedString.length() != 0) {
setSeed(Integer.parseInt(seedString));
} else {
setSeed(1);
}
String dataFile = Utils.getOption('t', options);
if (dataFile.length() != 0) {
setDataFileName(dataFile);
} else {
throw new Exception("An arff file must be specified"
+ " with the -t option.");
}
String trainSize = Utils.getOption('T', options);
if (trainSize.length() != 0) {
setTrainSize(Integer.parseInt(trainSize));
} else {
setTrainSize(-1);
}
//throw new Exception("A training set size must be specified" + " with a -T option.");
String classifierName = Utils.getOption('W', options);
if (classifierName.length() != 0) {
setClassifier(Classifier.forName(classifierName, Utils.partitionOptions(options)));
} else {
throw new Exception("A learner must be specified with the -W option.");
}
}
/**
* Gets the current settings of the CheckClassifier.
*
* @return an array of strings suitable for passing to setOptions
*/
public String [] getOptions() {
String [] classifierOptions = new String [0];
if ((m_Classifier != null) &&
(m_Classifier instanceof OptionHandler)) {
classifierOptions = ((OptionHandler)m_Classifier).getOptions();
}
String [] options = new String [classifierOptions.length + 14];
int current = 0;
if (getDebug()) {
options[current++] = "-D";
}
options[current++] = "-c"; options[current++] = "" + getClassIndex();
options[current++] = "-l"; options[current++] = "" + getClassifyIterations();
options[current++] = "-p"; options[current++] = "" + getP();
options[current++] = "-s"; options[current++] = "" + getSeed();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -