📄 checkclassifier.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* CheckClassifier.java
* Copyright (C) 1999 Len Trigg
*
*/
package weka.classifiers;
import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;
/**
* Class for examining the capabilities and finding problems with
* classifiers. If you implement a classifier using the WEKA.libraries,
* you should run the checks on it to ensure robustness and correct
* operation. Passing all the tests of this object does not mean
* bugs in the classifier don't exist, but this will help find some
* common ones. <p/>
*
* Typical usage: <p/>
* <code>java weka.classifiers.CheckClassifier -W classifier_name
* classifier_options </code><p/>
*
* CheckClassifier reports on the following:
* <ul>
* <li> Classifier abilities <ul>
* <li> Possible command line options to the classifier
* <li> Whether the classifier can predict nominal and/or predict
* numeric class attributes. Warnings will be displayed if
* performance is worse than ZeroR
* <li> Whether the classifier can be trained incrementally
* <li> Whether the classifier can handle numeric predictor attributes
* <li> Whether the classifier can handle nominal predictor attributes
* <li> Whether the classifier can handle string predictor attributes
* <li> Whether the classifier can handle missing predictor values
* <li> Whether the classifier can handle missing class values
* <li> Whether a nominal classifier only handles 2 class problems
* <li> Whether the classifier can handle instance weights
* </ul>
* <li> Correct functioning <ul>
* <li> Correct initialisation during buildClassifier (i.e. no result
* changes when buildClassifier called repeatedly)
* <li> Whether incremental training produces the same results
* as during non-incremental training (which may or may not
* be OK)
* <li> Whether the classifier alters the data pased to it
* (number of instances, instance order, instance weights, etc)
* </ul>
* <li> Degenerate cases <ul>
* <li> building classifier with zero training instances
* <li> all but one predictor attribute values missing
* <li> all predictor attribute values missing
* <li> all but one class values missing
* <li> all class values missing
* </ul>
* </ul>
* Running CheckClassifier with the debug option set will output the
* training and test datasets for any failed tests.<p/>
*
* The <code>weka.classifiers.AbstractClassifierTest</code> uses this
* class to test all the classifiers. Any changes here, have to be
* checked in that abstract test class, too. <p/>
*
* Valid options are:<p/>
*
* -D <br/>
* Turn on debugging output.<p/>
*
* -S <br/>
* Silent mode, i.e., no output at all.<p/>
*
* -N num <br/>
* Number of instances to use for datasets (default 20).<p/>
*
* -W classname <br/>
* Specify the full class name of a classifier to perform the
* tests on (required).<p/>
*
* Options after -- are passed to the designated classifier.<p/>
*
* @author Len Trigg (trigg@cs.waikato.ac.nz)
* @version $Revision: 1.1 $
*/
public class CheckClassifier implements OptionHandler {
/*
* Note about test methods:
* - return array of booleans
* - first index: success or not
* - second index: acceptable or not (e.g., Exception is OK)
*
* FracPete (fracpete at waikato dot ac dot nz)
*/
/**
* a class for postprocessing the test-data
* @see #makeTestDataset(int, int, int, int, int, int, int, int, int, int, boolean)
*/
public static class PostProcessor {
/**
* Provides a hook for derived classes to further modify the data. Currently,
* the data is just passed through.
*
* @param data the data to process
* @return the processed data
*/
public Instances process(Instances data) {
return data;
}
}
/*** The classifier to be examined */
protected Classifier m_Classifier = new weka.classifiers.rules.ZeroR();
/** The options to be passed to the base classifier. */
protected String [] m_ClassifierOptions;
/** The results of the analysis as a string */
protected String m_AnalysisResults;
/** Debugging mode, gives extra output if true */
protected boolean m_Debug = false;
/** Silent mode, for no output at all to stdout */
protected boolean m_Silent = false;
/** The number of instances in the datasets */
protected int m_NumInstances = 20;
/** for post-processing the data even further */
protected PostProcessor m_PostProcessor = null;
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector newVector = new Vector(2);
newVector.addElement(new Option(
"\tTurn on debugging output.",
"D", 0, "-D"));
newVector.addElement(new Option(
"\tSilent mode - prints nothing to stdout.",
"S", 0, "-S"));
newVector.addElement(new Option(
"\tThe number of instances in the datasets (default 20).",
"N", 1, "-N <num>"));
newVector.addElement(new Option(
"\tFull name of the classifier analysed.\n"
+"\teg: weka.classifiers.bayes.NaiveBayes",
"W", 1, "-W"));
if ((m_Classifier != null)
&& (m_Classifier instanceof OptionHandler)) {
newVector.addElement(new Option("", "", 0,
"\nOptions specific to classifier "
+ m_Classifier.getClass().getName()
+ ":"));
Enumeration enu = ((OptionHandler)m_Classifier).listOptions();
while (enu.hasMoreElements())
newVector.addElement(enu.nextElement());
}
return newVector.elements();
}
/**
* Parses a given list of options.
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
String tmpStr;
setDebug(Utils.getFlag('D', options));
setSilent(Utils.getFlag('S', options));
tmpStr = Utils.getOption('N', options);
if (tmpStr.length() != 0)
setNumInstances(Integer.parseInt(tmpStr));
else
setNumInstances(20);
tmpStr = Utils.getOption('W', options);
if (tmpStr.length() == 0)
throw new Exception("A classifier must be specified with"
+ " the -W option.");
setClassifier(Classifier.forName(tmpStr, Utils.partitionOptions(options)));
}
/**
* Gets the current settings of the CheckClassifier.
*
* @return an array of strings suitable for passing to setOptions
*/
public String[] getOptions() {
Vector result;
String[] options;
int i;
result = new Vector();
if (getDebug())
result.add("-D");
if (getSilent())
result.add("-S");
result.add("-N");
result.add("" + getNumInstances());
if (getClassifier() != null) {
result.add("-W");
result.add(getClassifier().getClass().getName());
}
if ((m_Classifier != null) && (m_Classifier instanceof OptionHandler))
options = ((OptionHandler) m_Classifier).getOptions();
else
options = new String[0];
if (options.length > 0) {
result.add("--");
for (i = 0; i < options.length; i++)
result.add(options[i]);
}
return (String[]) result.toArray(new String[result.size()]);
}
/**
* sets the PostProcessor to use
*
* @param value the new PostProcessor
* @see #m_PostProcessor
*/
public void setPostProcessor(PostProcessor value) {
m_PostProcessor = value;
}
/**
* returns the current PostProcessor, can be null
*
* @return the current PostProcessor
*/
public PostProcessor getPostProcessor() {
return m_PostProcessor;
}
/**
* Begin the tests, reporting results to System.out
*/
public void doTests() {
if (getClassifier() == null) {
println("\n=== No classifier set ===");
return;
}
println("\n=== Check on Classifier: "
+ getClassifier().getClass().getName()
+ " ===\n");
// Start tests
canTakeOptions();
boolean updateableClassifier = updateableClassifier()[0];
boolean weightedInstancesHandler = weightedInstancesHandler()[0];
testsPerClassType(false, updateableClassifier, weightedInstancesHandler);
testsPerClassType(true, updateableClassifier, weightedInstancesHandler);
}
/**
* Set debugging mode
*
* @param debug true if debug output should be printed
*/
public void setDebug(boolean debug) {
m_Debug = debug;
// disable silent mode, if necessary
if (getDebug())
setSilent(false);
}
/**
* Get whether debugging is turned on
*
* @return true if debugging output is on
*/
public boolean getDebug() {
return m_Debug;
}
/**
* Set slient mode, i.e., no output at all to stdout
*
* @param value whether silent mode is active or not
*/
public void setSilent(boolean value) {
m_Silent = value;
}
/**
* Get whether silent mode is turned on
*
* @return true if silent mode is on
*/
public boolean getSilent() {
return m_Silent;
}
/**
* Sets the number of instances to use in the datasets (some classifiers
* might require more instances).
*
* @param value the number of instances to use
*/
public void setNumInstances(int value) {
m_NumInstances = value;
}
/**
* Gets the current number of instances to use for the datasets.
*
* @return the number of instances
*/
public int getNumInstances() {
return m_NumInstances;
}
/**
* Set the classifier for boosting.
*
* @param newClassifier the Classifier to use.
*/
public void setClassifier(Classifier newClassifier) {
m_Classifier = newClassifier;
}
/**
* Get the classifier used as the classifier
*
* @return the classifier used as the classifier
*/
public Classifier getClassifier() {
return m_Classifier;
}
/**
* prints the given message to stdout, if not silent mode
*
* @param msg the text to print to stdout
*/
protected void print(Object msg) {
if (!getSilent())
System.out.print(msg);
}
/**
* prints the given message (+ LF) to stdout, if not silent mode
*
* @param msg the message to println to stdout
*/
protected void println(Object msg) {
print(msg + "\n");
}
/**
* prints a LF to stdout, if not silent mode
*/
protected void println() {
print("\n");
}
/**
* Run a battery of tests for a given class attribute type
*
* @param numericClass true if the class attribute should be numeric
* @param updateable true if the classifier is updateable
* @param weighted true if the classifier says it handles weights
*/
protected void testsPerClassType(boolean numericClass, boolean updateable,
boolean weighted) {
boolean PNom = canPredict(true, false, false, numericClass)[0];
boolean PNum = canPredict(false, true, false, numericClass)[0];
boolean PStr = canPredict(false, false, true, numericClass)[0];
if (PNom || PNum || PStr) {
if (weighted)
instanceWeights(PNom, PNum, PStr, numericClass);
if (!numericClass)
canHandleNClasses(PNom, PNum, PStr, 4);
canHandleZeroTraining(PNom, PNum, PStr, numericClass);
boolean handleMissingPredictors = canHandleMissing(PNom, PNum, PStr,
numericClass,
true, false, 20)[0];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -