📄 classifiersubseteval.java
字号:
/**
*
* AgentAcademy - an open source Data Mining framework for
* training intelligent agents
*
* Copyright (C) 2001-2003 AA Consortium.
*
* This library is open source software; you can redistribute it
* and/or modify it under the terms of the GNU Lesser General
* Public License as published by the Free Software Foundation;
* either version 2.0 of the License, or (at your option) any later
* version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
*/
package org.agentacademy.modules.dataminer.attributeSelection;
import java.io.File;
import java.util.BitSet;
import java.util.Enumeration;
import java.util.Vector;
import org.agentacademy.modules.dataminer.classifiers.ZeroR;
import org.agentacademy.modules.dataminer.classifiers.evaluation.Classifier;
import org.agentacademy.modules.dataminer.classifiers.evaluation.DistributionClassifier;
import org.agentacademy.modules.dataminer.classifiers.evaluation.Evaluation;
import org.agentacademy.modules.dataminer.core.Instance;
import org.agentacademy.modules.dataminer.core.Instances;
import org.agentacademy.modules.dataminer.core.Option;
import org.agentacademy.modules.dataminer.core.OptionHandler;
import org.agentacademy.modules.dataminer.core.Utils;
import org.agentacademy.modules.dataminer.filters.AttributeFilter;
import org.agentacademy.modules.dataminer.filters.Filter;
import org.apache.log4j.Logger;
/**
* Classifier subset evaluator. Uses a classifier to estimate the "merit"
* of a set of attributes.
*
* Valid options are:<p>
*
* -B <classifier> <br>
* Class name of the classifier to use for accuracy estimation.
* Place any classifier options last on the command line following a
* "--". Eg -B weka.classifiers.bayes.NaiveBayes ... -- -K <p>
*
* -T <br>
* Use the training data for accuracy estimation rather than a hold out/
* test set. <p>
*
* -H <filename> <br>
* The file containing hold out/test instances to use for accuracy estimation
* <p>
*
* @author Mark Hall (mhall@cs.waikato.ac.nz)
* @version $Revision: 1.3 $
*/
public class ClassifierSubsetEval
extends HoldOutSubsetEvaluator
implements OptionHandler, ErrorBasedMeritEvaluator {
public static Logger log = Logger.getLogger(ClassifierSubsetEval.class);
/** training instances */
private Instances m_trainingInstances;
/** class index */
private int m_classIndex;
/** number of attributes in the training data */
private int m_numAttribs;
/** number of training instances */
private int m_numInstances;
/** holds the classifier to use for error estimates */
private Classifier m_Classifier = new ZeroR();
/** holds the evaluation object to use for evaluating the classifier */
private Evaluation m_Evaluation;
/** the file that containts hold out/test instances */
private File m_holdOutFile = new File("Click to set hold out or "
+"test instances");
/** the instances to test on */
private Instances m_holdOutInstances = null;
/** evaluate on training data rather than seperate hold out/test set */
private boolean m_useTraining = false;
/**
* Returns a string describing this attribute evaluator
* @return a description of the evaluator suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "Evaluates attribute subsets on training data or a seperate "
+"hold out testing set";
}
/**
* Returns an enumeration describing the available options. <p>
*
* -B <classifier> <br>
* Class name of the classifier to use for accuracy estimation.
* Place any classifier options last on the command line following a
* "--". Eg -B weka.classifiers.bayes.NaiveBayes ... -- -K <p>
*
* -T <br>
* Use the training data for accuracy estimation rather than a hold out/
* test set. <p>
*
* -H <filename> <br>
* The file containing hold out/test instances to use for accuracy estimation
* <p>
*
* @return an enumeration of all the available options.
**/
public Enumeration listOptions () {
Vector newVector = new Vector(3);
newVector.addElement(new Option("\tclass name of the classifier to use for"
+ "\n\taccuracy estimation. Place any"
+ "\n\tclassifier options LAST on the"
+ "\n\tcommand line following a \"--\"."
+ "\n\teg. -C weka.classifiers.bayes.NaiveBayes ... "
+ "-- -K", "B", 1, "-B <classifier>"));
newVector.addElement(new Option("\tUse the training data to estimate"
+" accuracy."
,"T",0,"-T"));
newVector.addElement(new Option("\tName of the hold out/test set to "
+"\n\testimate accuracy on."
,"H", 1,"-H <filename>"));
if ((m_Classifier != null) &&
(m_Classifier instanceof OptionHandler)) {
newVector.addElement(new Option("", "", 0, "\nOptions specific to "
+ "scheme "
+ m_Classifier.getClass().getName()
+ ":"));
Enumeration enum = ((OptionHandler)m_Classifier).listOptions();
while (enum.hasMoreElements()) {
newVector.addElement(enum.nextElement());
}
}
return newVector.elements();
}
/**
* Parses a given list of options.
*
* Valid options are:<p>
*
* -C <classifier> <br>
* Class name of classifier to use for accuracy estimation.
* Place any classifier options last on the command line following a
* "--". Eg -B weka.classifiers.bayes.NaiveBayes ... -- -K <p>
*
* -T <br>
* Use training data instead of a hold out/test set for accuracy estimation.
* <p>
*
* -H <filname> <br>
* Name of the hold out/test set to estimate classifier accuracy on.
* <p>
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*
**/
public void setOptions (String[] options)
throws Exception
{
String optionString;
resetOptions();
optionString = Utils.getOption('B', options);
if (optionString.length() == 0) {
throw new Exception("A classifier must be specified with -B option");
}
setClassifier(Classifier.forName(optionString,
Utils.partitionOptions(options)));
optionString = Utils.getOption('H',options);
if (optionString.length() != 0) {
setHoldOutFile(new File(optionString));
}
setUseTraining(Utils.getFlag('T',options));
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String classifierTipText() {
return "Classifier to use for estimating the accuracy of subsets";
}
/**
* Set the classifier to use for accuracy estimation
*
* @param newClassifier the Classifier to use.
*/
public void setClassifier (Classifier newClassifier) {
m_Classifier = newClassifier;
}
/**
* Get the classifier used as the base learner.
*
* @return the classifier used as the classifier
*/
public Classifier getClassifier () {
return m_Classifier;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String holdOutFileTipText() {
return "File containing hold out/test instances.";
}
/**
* Gets the file that holds hold out/test instances.
* @return File that contains hold out instances
*/
public File getHoldOutFile() {
return m_holdOutFile;
}
/**
* Set the file that contains hold out/test instances
* @param h the hold out file
*/
public void setHoldOutFile(File h) {
m_holdOutFile = h;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String useTrainingTipText() {
return "Use training data instead of hold out/test instances.";
}
/**
* Get if training data is to be used instead of hold out/test data
* @return true if training data is to be used instead of hold out data
*/
public boolean getUseTraining() {
return m_useTraining;
}
/**
* Set if training data is to be used instead of hold out/test data
* @return true if training data is to be used instead of hold out data
*/
public void setUseTraining(boolean t) {
m_useTraining = t;
}
/**
* Gets the current settings of ClassifierSubsetEval
*
* @return an array of strings suitable for passing to setOptions()
*/
public String[] getOptions () {
String[] classifierOptions = new String[0];
if ((m_Classifier != null) &&
(m_Classifier instanceof OptionHandler)) {
classifierOptions = ((OptionHandler)m_Classifier).getOptions();
}
String[] options = new String[6 + classifierOptions.length];
int current = 0;
if (getClassifier() != null) {
options[current++] = "-B";
options[current++] = getClassifier().getClass().getName();
}
if (getUseTraining()) {
options[current++] = "-T";
}
options[current++] = "-H"; options[current++] = getHoldOutFile().getPath();
options[current++] = "--";
System.arraycopy(classifierOptions, 0, options, current,
classifierOptions.length);
current += classifierOptions.length;
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Generates a attribute evaluator. Has to initialize all fields of the
* evaluator that are not being set via options.
*
* @param data set of instances serving as training data
* @exception Exception if the evaluator has not been
* generated successfully
*/
public void buildEvaluator (Instances data)
throws Exception
{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -