📄 checkclassifier.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* CheckClassifier.java
* Copyright (C) 1999 Len Trigg
*
*/
package weka.classifiers;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;
import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
/**
* Class for examining the capabilities and finding problems with
* classifiers. If you implement a classifier using the WEKA.libraries,
* you should run the checks on it to ensure robustness and correct
* operation. Passing all the tests of this object does not mean
* bugs in the classifier don't exist, but this will help find some
* common ones. <p>
*
* Typical usage: <p>
* <code>java weka.classifiers.CheckClassifier -W classifier_name
* classifier_options </code><p>
*
* CheckClassifier reports on the following:
* <ul>
* <li> Classifier abilities <ul>
* <li> Possible command line options to the classifier
* <li> Whether the classifier can predict nominal and/or predict
* numeric class attributes. Warnings will be displayed if
* performance is worse than ZeroR
* <li> Whether the classifier can be trained incrementally
* <li> Whether the classifier can handle numeric predictor attributes
* <li> Whether the classifier can handle nominal predictor attributes
* <li> Whether the classifier can handle string predictor attributes
* <li> Whether the classifier can handle missing predictor values
* <li> Whether the classifier can handle missing class values
* <li> Whether a nominal classifier only handles 2 class problems
* <li> Whether the classifier can handle instance weights
* </ul>
* <li> Correct functioning <ul>
* <li> Correct initialisation during buildClassifier (i.e. no result
* changes when buildClassifier called repeatedly)
* <li> Whether incremental training produces the same results
* as during non-incremental training (which may or may not
* be OK)
* <li> Whether the classifier alters the data pased to it
* (number of instances, instance order, instance weights, etc)
* </ul>
* <li> Degenerate cases <ul>
* <li> building classifier with zero training instances
* <li> all but one predictor attribute values missing
* <li> all predictor attribute values missing
* <li> all but one class values missing
* <li> all class values missing
* </ul>
* </ul>
* Running CheckClassifier with the debug option set will output the
* training and test datasets for any failed tests.<p>
*
* Valid options are:<p>
*
* -D <br>
* Turn on debugging output.<p>
*
* -W classname <br>
* Specify the full class name of a classifier to perform the
* tests on (required).<p>
*
* Options after -- are passed to the designated classifier.<p>
*
* @author Len Trigg (trigg@cs.waikato.ac.nz)
* @version $Revision$
*/
public class CheckClassifier implements OptionHandler {
/*** The classifier to be examined */
protected Classifier m_Classifier = new weka.classifiers.rules.ZeroR();
/** The options to be passed to the base classifier. */
protected String [] m_ClassifierOptions;
/** The results of the analysis as a string */
protected String m_AnalysisResults;
/** Debugging mode, gives extra output if true */
protected boolean m_Debug;
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration<Option> listOptions() {
Vector<Option> newVector = new Vector<Option>(2);
newVector.addElement(new Option(
"\tTurn on debugging output.",
"D", 0, "-D"));
newVector.addElement(new Option(
"\tFull name of the classifier analysed.\n"
+"\teg: weka.classifiers.bayes.NaiveBayes",
"W", 1, "-W"));
if ((m_Classifier != null)
&& (m_Classifier instanceof OptionHandler)) {
newVector.addElement(new Option("", "", 0,
"\nOptions specific to classifier "
+ m_Classifier.getClass().getName()
+ ":"));
Enumeration<Option> em = ((OptionHandler)m_Classifier).listOptions();
while (em.hasMoreElements())
newVector.addElement(em.nextElement());
}
return newVector.elements();
}
/**
* Parses a given list of options. Valid options are:<p>
*
* -D <br>
* Turn on debugging output.<p>
*
* -W classname <br>
* Specify the full class name of a classifier to perform the
* tests on (required).<p>
*
* Options after -- are passed to the designated classifier
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
setDebug(Utils.getFlag('D', options));
String classifierName = Utils.getOption('W', options);
if (classifierName.length() == 0) {
throw new Exception("A classifier must be specified with"
+ " the -W option.");
}
setClassifier(Classifier.forName(classifierName,
Utils.partitionOptions(options)));
}
/**
* Gets the current settings of the CheckClassifier.
*
* @return an array of strings suitable for passing to setOptions
*/
public String [] getOptions() {
String [] classifierOptions = new String [0];
if ((m_Classifier != null) &&
(m_Classifier instanceof OptionHandler)) {
classifierOptions = ((OptionHandler)m_Classifier).getOptions();
}
String [] options = new String [classifierOptions.length + 4];
int current = 0;
if (getDebug()) {
options[current++] = "-D";
}
if (getClassifier() != null) {
options[current++] = "-W";
options[current++] = getClassifier().getClass().getName();
}
options[current++] = "--";
System.arraycopy(classifierOptions, 0, options, current,
classifierOptions.length);
current += classifierOptions.length;
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Begin the tests, reporting results to System.out
*/
public void doTests() {
if (getClassifier() == null) {
System.out.println("\n=== No classifier set ===");
return;
}
System.out.println("\n=== Check on Classifier: "
+ getClassifier().getClass().getName()
+ " ===\n");
// Start tests
canTakeOptions();
boolean updateableClassifier = updateableClassifier();
boolean weightedInstancesHandler = weightedInstancesHandler();
testsPerClassType(false, updateableClassifier, weightedInstancesHandler);
testsPerClassType(true, updateableClassifier, weightedInstancesHandler);
}
/**
* Set debugging mode
*
* @param debug true if debug output should be printed
*/
public void setDebug(boolean debug) {
m_Debug = debug;
}
/**
* Get whether debugging is turned on
*
* @return true if debugging output is on
*/
public boolean getDebug() {
return m_Debug;
}
/**
* Set the classifier for boosting.
*
* @param newClassifier the Classifier to use.
*/
public void setClassifier(Classifier newClassifier) {
m_Classifier = newClassifier;
}
/**
* Get the classifier used as the classifier
*
* @return the classifier used as the classifier
*/
public Classifier getClassifier() {
return m_Classifier;
}
/**
* Test method for this class
*/
public static void main(String [] args) {
try {
CheckClassifier check = new CheckClassifier();
try {
check.setOptions(args);
Utils.checkForRemainingOptions(args);
} catch (Exception ex) {
String result = ex.getMessage() + "\nCheckClassifier Options:\n\n";
Enumeration<Option> em = check.listOptions();
while (em.hasMoreElements()) {
Option option = em.nextElement();
result += option.synopsis() + "\n" + option.description() + "\n";
}
throw new Exception(result);
}
check.doTests();
} catch (Exception ex) {
System.err.println(ex.getMessage());
}
}
/**
* Run a battery of tests for a given class attribute type
*
* @param numericClass true if the class attribute should be numeric
* @param updateable true if the classifier is updateable
* @param weighted true if the classifier says it handles weights
*/
protected void testsPerClassType(boolean numericClass, boolean updateable,
boolean weighted) {
boolean PNom = canPredict(true, false, numericClass);
boolean PNum = canPredict(false, true, numericClass);
if (PNom || PNum) {
if (weighted) {
instanceWeights(PNom, PNum, numericClass);
}
if (!numericClass) {
canHandleNClasses(PNom, PNum, 4);
}
canHandleZeroTraining(PNom, PNum, numericClass);
boolean handleMissingPredictors = canHandleMissing(PNom, PNum,
numericClass,
true, false, 20);
if (handleMissingPredictors) {
canHandleMissing(PNom, PNum, numericClass, true, false, 100);
}
boolean handleMissingClass = canHandleMissing(PNom, PNum, numericClass,
false, true, 20);
if (handleMissingClass) {
canHandleMissing(PNom, PNum, numericClass, false, true, 100);
}
correctBuildInitialisation(PNom, PNum, numericClass);
datasetIntegrity(PNom, PNum, numericClass,
handleMissingPredictors, handleMissingClass);
doesntUseTestClassVal(PNom, PNum, numericClass);
if (updateable) {
updatingEquality(PNom, PNum, numericClass);
}
}
/*
* Robustness / Correctness:
* Whether the classifier can handle string predictor attributes
*/
}
/**
* Checks whether the scheme can take command line options.
*
* @return true if the classifier can take options
*/
protected boolean canTakeOptions() {
System.out.print("options...");
if (m_Classifier instanceof OptionHandler) {
System.out.println("yes");
if (m_Debug) {
System.out.println("\n=== Full report ===");
Enumeration<Option> em = ((OptionHandler)m_Classifier).listOptions();
while (em.hasMoreElements()) {
Option option = em.nextElement();
System.out.print(option.synopsis() + "\n"
+ option.description() + "\n");
}
System.out.println("\n");
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -