⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 bvdecompose.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    BVDecompose.java
 *    Copyright (C) 1999 Len Trigg
 *
 */

package weka.classifiers;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.Reader;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Utils;

/**
 * Class for performing a Bias-Variance decomposition on any classifier 
 * using the method specified in:<p>
 * 
 * R. Kohavi & D. Wolpert (1996), <i>Bias plus variance decomposition for 
 * zero-one loss functions</i>, in Proc. of the Thirteenth International 
 * Machine Learning Conference (ICML96) 
 * <a href="http://robotics.stanford.edu/~ronnyk/biasVar.ps">
 * download postscript</a>.<p>
 *
 * Valid options are:<p>
 *
 * -D <br>
 * Turn on debugging output.<p>
 *
 * -W classname <br>
 * Specify the full class name of a learner to perform the 
 * decomposition on (required).<p>
 *
 * -t filename <br>
 * Set the arff file to use for the decomposition (required).<p>
 *
 * -T num <br>
 * Specify the number of instances in the training pool (default 100).<p>
 *
 * -c num <br>
 * Specify the index of the class attribute (default last).<p>
 *
 * -x num <br>
 * Set the number of train iterations (default 50). <p>
 *
 * -s num <br>
 * Set the seed for the dataset randomisation (default 1). <p>
 *
 * Options after -- are passed to the designated sub-learner. <p>
 *
 * @author Len Trigg (trigg@cs.waikato.ac.nz)
 * @version $Revision$
 */
public class BVDecompose implements OptionHandler {

  /** Debugging mode, gives extra output if true */
  protected boolean m_Debug;

  /** An instantiated base classifier used for getting and testing options. */
  protected Classifier m_Classifier = new weka.classifiers.rules.ZeroR();

  /** The options to be passed to the base classifier. */
  protected String [] m_ClassifierOptions;

  /** The number of train iterations */
  protected int m_TrainIterations = 50;

  /** The name of the data file used for the decomposition */
  protected String m_DataFileName;

  /** The index of the class attribute */
  protected int m_ClassIndex = -1;

  /** The random number seed */
  protected int m_Seed = 1;

  /** The calculated bias (squared) */
  protected double m_Bias;

  /** The calculated variance */
  protected double m_Variance;

  /** The calculated sigma (squared) */
  protected double m_Sigma;

  /** The error rate */
  protected double m_Error;

  /** The number of instances used in the training pool */
  protected int m_TrainPoolSize = 100;

  /**
   * Returns an enumeration describing the available options.
   *
   * @return an enumeration of all the available options.
   */
  public Enumeration listOptions() {

    Vector newVector = new Vector(7);

    newVector.addElement(new Option(
	      "\tThe index of the class attribute.\n"+
	      "\t(default last)",
	      "c", 1, "-c <class index>"));
    newVector.addElement(new Option(
	      "\tThe name of the arff file used for the decomposition.",
	      "t", 1, "-t <name of arff file>"));
    newVector.addElement(new Option(
	      "\tThe number of instances placed in the training pool.\n"
	      + "\tThe remainder will be used for testing. (default 100)",
	      "T", 1, "-T <training pool size>"));
    newVector.addElement(new Option(
	      "\tThe random number seed used.",
	      "s", 1, "-s <seed>"));
    newVector.addElement(new Option(
	      "\tThe number of training repetitions used.\n"
	      +"\t(default 50)",
	      "x", 1, "-x <num>"));
    newVector.addElement(new Option(
	      "\tTurn on debugging output.",
	      "D", 0, "-D"));
    newVector.addElement(new Option(
	      "\tFull class name of the learner used in the decomposition.\n"
	      +"\teg: weka.classifiers.bayes.NaiveBayes",
	      "W", 1, "-W <classifier class name>"));

    if ((m_Classifier != null) &&
	(m_Classifier instanceof OptionHandler)) {
      newVector.addElement(new Option(
				      "",
				      "", 0, "\nOptions specific to learner "
				      + m_Classifier.getClass().getName()
				      + ":"));
      Enumeration em = ((OptionHandler)m_Classifier).listOptions();
      while (em.hasMoreElements()) {
	newVector.addElement(em.nextElement());
      }
    }
    return newVector.elements();
  }

  /**
   * Parses a given list of options. Valid options are:<p>
   *
   * -D <br>
   * Turn on debugging output.<p>
   *
   * -W classname <br>
   * Specify the full class name of a learner to perform the 
   * decomposition on (required).<p>
   *
   * -t filename <br>
   * Set the arff file to use for the decomposition (required).<p>
   *
   * -T num <br>
   * Specify the number of instances in the training pool (default 100).<p>
   *
   * -c num <br>
   * Specify the index of the class attribute (default last).<p>
   *
   * -x num <br>
   * Set the number of train iterations (default 50). <p>
   *
   * -s num <br>
   * Set the seed for the dataset randomisation (default 1). <p>
   *
   * Options after -- are passed to the designated sub-learner. <p>
   *
   * @param options the list of options as an array of strings
   * @exception Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {

    setDebug(Utils.getFlag('D', options));
        
    String classIndex = Utils.getOption('c', options);
    if (classIndex.length() != 0) {
      if (classIndex.toLowerCase().equals("last")) {
	setClassIndex(0);
      } else if (classIndex.toLowerCase().equals("first")) {
	setClassIndex(1);
      } else {
	setClassIndex(Integer.parseInt(classIndex));
      }
    } else {
      setClassIndex(0);
    }

    String trainIterations = Utils.getOption('x', options);
    if (trainIterations.length() != 0) {
      setTrainIterations(Integer.parseInt(trainIterations));
    } else {
      setTrainIterations(50);
    }

    String trainPoolSize = Utils.getOption('T', options);
    if (trainPoolSize.length() != 0) {
      setTrainPoolSize(Integer.parseInt(trainPoolSize));
    } else {
      setTrainPoolSize(100);
    }

    String seedString = Utils.getOption('s', options);
    if (seedString.length() != 0) {
      setSeed(Integer.parseInt(seedString));
    } else {
      setSeed(1);
    }

    String dataFile = Utils.getOption('t', options);
    if (dataFile.length() == 0) {
      throw new Exception("An arff file must be specified"
			  + " with the -t option.");
    }
    setDataFileName(dataFile);

    String classifierName = Utils.getOption('W', options);
    if (classifierName.length() == 0) {
      throw new Exception("A learner must be specified with the -W option.");
    }
    setClassifier(Classifier.forName(classifierName,
				     Utils.partitionOptions(options)));
  }

  /**
   * Gets the current settings of the CheckClassifier.
   *
   * @return an array of strings suitable for passing to setOptions
   */
  public String [] getOptions() {

    String [] classifierOptions = new String [0];
    if ((m_Classifier != null) && 
	(m_Classifier instanceof OptionHandler)) {
      classifierOptions = ((OptionHandler)m_Classifier).getOptions();
    }
    String [] options = new String [classifierOptions.length + 14];
    int current = 0;
    if (getDebug()) {
      options[current++] = "-D";
    }
    options[current++] = "-c"; options[current++] = "" + getClassIndex();
    options[current++] = "-x"; options[current++] = "" + getTrainIterations();
    options[current++] = "-T"; options[current++] = "" + getTrainPoolSize();
    options[current++] = "-s"; options[current++] = "" + getSeed();
    if (getDataFileName() != null) {
      options[current++] = "-t"; options[current++] = "" + getDataFileName();
    }
    if (getClassifier() != null) {
      options[current++] = "-W";
      options[current++] = getClassifier().getClass().getName();
    }
    options[current++] = "--";
    System.arraycopy(classifierOptions, 0, options, current, 
		     classifierOptions.length);
    current += classifierOptions.length;
    while (current < options.length) {
      options[current++] = "";
    }
    return options;
  }
  
  /**
   * Get the number of instances in the training pool.
   *
   * @return number of instances in the training pool.
   */
  public int getTrainPoolSize() {
    
    return m_TrainPoolSize;
  }
  
  /**
   * Set the number of instances in the training pool.
   *
   * @param numTrain number of instances in the training pool.
   */
  public void setTrainPoolSize(int numTrain) {
    
    m_TrainPoolSize = numTrain;
  }
  
  /**
   * Set the classifiers being analysed
   *

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -