checkclassifier.java

来自「Java 编写的多种数据挖掘算法包括聚类、分类、预处理等」· Java 代码 · 共 1,847 行 · 第 1/5 页
JAVA
1,847 行
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    CheckClassifier.java *    Copyright (C) 1999 Len Trigg * */package weka.classifiers;import weka.core.Attribute;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.TestInstances;import weka.core.Utils;import weka.core.WeightedInstancesHandler;import weka.core.MultiInstanceCapabilitiesHandler;import java.util.Enumeration;import java.util.Random;import java.util.StringTokenizer;import java.util.Vector;/** * Class for examining the capabilities and finding problems with  * classifiers. If you implement a classifier using the WEKA.libraries, * you should run the checks on it to ensure robustness and correct * operation. Passing all the tests of this object does not mean * bugs in the classifier don't exist, but this will help find some * common ones. <p/> *  * Typical usage: <p/> * <code>java weka.classifiers.CheckClassifier -W classifier_name  * classifier_options </code><p/> *  * CheckClassifier reports on the following: * <ul> *    <li> Classifier abilities  *      <ul> *         <li> Possible command line options to the classifier </li> *         <li> Whether the classifier can predict nominal, numeric, string,  *              date or relational class attributes. Warnings will be displayed if  *              performance is worse than ZeroR </li> *         <li> Whether the classifier can be trained incrementally </li> *         <li> Whether the classifier can handle numeric predictor attributes </li> *         <li> Whether the classifier can handle nominal predictor attributes </li> *         <li> Whether the classifier can handle string predictor attributes </li> *         <li> Whether the classifier can handle date predictor attributes </li> *         <li> Whether the classifier can handle relational predictor attributes </li> *         <li> Whether the classifier can handle multi-instance data </li> *         <li> Whether the classifier can handle missing predictor values </li> *         <li> Whether the classifier can handle missing class values </li> *         <li> Whether a nominal classifier only handles 2 class problems </li> *         <li> Whether the classifier can handle instance weights </li> *      </ul> *    </li> *    <li> Correct functioning  *      <ul> *         <li> Correct initialisation during buildClassifier (i.e. no result *              changes when buildClassifier called repeatedly) </li> *         <li> Whether incremental training produces the same results *              as during non-incremental training (which may or may not  *              be OK) </li> *         <li> Whether the classifier alters the data pased to it  *              (number of instances, instance order, instance weights, etc) </li> *      </ul> *    </li> *    <li> Degenerate cases  *      <ul> *         <li> building classifier with zero training instances </li> *         <li> all but one predictor attribute values missing </li> *         <li> all predictor attribute values missing </li> *         <li> all but one class values missing </li> *         <li> all class values missing </li> *      </ul> *    </li> * </ul> * Running CheckClassifier with the debug option set will output the  * training and test datasets for any failed tests.<p/> * * The <code>weka.classifiers.AbstractClassifierTest</code> uses this * class to test all the classifiers. Any changes here, have to be  * checked in that abstract test class, too. <p/> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -D *  Turn on debugging output.</pre> *  * <pre> -S *  Silent mode - prints nothing to stdout.</pre> *  * <pre> -N &lt;num&gt; *  The number of instances in the datasets (default 20).</pre> *  * <pre> -words &lt;comma-separated-list&gt; *  The words to use in string attributes.</pre> *  * <pre> -word-separators &lt;chars&gt; *  The word separators to use in string attributes.</pre> *  * <pre> -W *  Full name of the classifier analysed. *  eg: weka.classifiers.bayes.NaiveBayes</pre> *  * <pre>  * Options specific to classifier weka.classifiers.rules.ZeroR: * </pre> *  * <pre> -D *  If set, classifier is run in debug mode and *  may output additional info to the console</pre> *  <!-- options-end --> * * Options after -- are passed to the designated classifier.<p/> * * @author Len Trigg (trigg@cs.waikato.ac.nz) * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 1.24 $ * @see TestInstances */public class CheckClassifier implements OptionHandler {  /*   * Note about test methods:   * - methods return array of booleans   * - first index: success or not   * - second index: acceptable or not (e.g., Exception is OK)   * - in case the performance is worse than that of ZeroR both indices are true   *   * FracPete (fracpete at waikato dot ac dot nz)   */    /** a class for postprocessing the test-data    * @see #makeTestDataset(int, int, int, int, int, int, int, int, int, int, boolean) */  public class PostProcessor {    /**     * Provides a hook for derived classes to further modify the data. Currently,     * the data is just passed through.     *      * @param data	the data to process     * @return		the processed data     */    protected Instances process(Instances data) {      return data;    }  }    /*** The classifier to be examined */  protected Classifier m_Classifier = new weka.classifiers.rules.ZeroR();    /** The options to be passed to the base classifier. */  protected String[] m_ClassifierOptions;    /** Debugging mode, gives extra output if true */  protected boolean m_Debug = false;    /** Silent mode, for no output at all to stdout */  protected boolean m_Silent = false;    /** The number of instances in the datasets */  protected int m_NumInstances = 20;    /** for generating String attributes/classes */  protected String[] m_Words = TestInstances.DEFAULT_WORDS;    /** for generating String attributes/classes */  protected String m_WordSeparators = TestInstances.DEFAULT_SEPARATORS;    /** for post-processing the data even further */  protected PostProcessor m_PostProcessor = null;    /** whether classpath problems occurred */  protected boolean m_ClasspathProblems = false;    /**   * Returns an enumeration describing the available options.   *   * @return an enumeration of all the available options.   */  public Enumeration listOptions() {        Vector newVector = new Vector(2);        newVector.addElement(new Option(        "\tTurn on debugging output.",        "D", 0, "-D"));        newVector.addElement(new Option(        "\tSilent mode - prints nothing to stdout.",        "S", 0, "-S"));        newVector.addElement(new Option(        "\tThe number of instances in the datasets (default 20).",        "N", 1, "-N <num>"));        newVector.addElement(new Option(        "\tThe words to use in string attributes.",        "words", 1, "-words <comma-separated-list>"));        newVector.addElement(new Option(        "\tThe word separators to use in string attributes.",        "word-separators", 1, "-word-separators <chars>"));        newVector.addElement(new Option(        "\tFull name of the classifier analysed.\n"        +"\teg: weka.classifiers.bayes.NaiveBayes",        "W", 1, "-W"));        if ((m_Classifier != null)         && (m_Classifier instanceof OptionHandler)) {      newVector.addElement(new Option("", "", 0,           "\nOptions specific to classifier "          + m_Classifier.getClass().getName()          + ":"));      Enumeration enu = ((OptionHandler)m_Classifier).listOptions();      while (enu.hasMoreElements())        newVector.addElement(enu.nextElement());    }        return newVector.elements();  }    /**   * Parses a given list of options.    *   <!-- options-start -->   * Valid options are: <p/>   *    * <pre> -D   *  Turn on debugging output.</pre>   *    * <pre> -S   *  Silent mode - prints nothing to stdout.</pre>   *    * <pre> -N &lt;num&gt;   *  The number of instances in the datasets (default 20).</pre>   *    * <pre> -words &lt;comma-separated-list&gt;   *  The words to use in string attributes.</pre>   *    * <pre> -word-separators &lt;chars&gt;   *  The word separators to use in string attributes.</pre>   *    * <pre> -W   *  Full name of the classifier analysed.   *  eg: weka.classifiers.bayes.NaiveBayes</pre>   *    * <pre>    * Options specific to classifier weka.classifiers.rules.ZeroR:   * </pre>   *    * <pre> -D   *  If set, classifier is run in debug mode and   *  may output additional info to the console</pre>   *    <!-- options-end -->   *   * @param options the list of options as an array of strings   * @throws Exception if an option is not supported   */  public void setOptions(String[] options) throws Exception {    String      tmpStr;        setDebug(Utils.getFlag('D', options));        setSilent(Utils.getFlag('S', options));        tmpStr = Utils.getOption('N', options);    if (tmpStr.length() != 0)      setNumInstances(Integer.parseInt(tmpStr));    else      setNumInstances(20);        tmpStr = Utils.getOption("words", options);    if (tmpStr.length() != 0)      setWords(tmpStr);    else      setWords(new TestInstances().getWords());        if (Utils.getOptionPos("word-separators", options) > -1) {      tmpStr = Utils.getOption("word-separators", options);      setWordSeparators(tmpStr);    }    else {      setWordSeparators(TestInstances.DEFAULT_SEPARATORS);    }        tmpStr = Utils.getOption('W', options);    if (tmpStr.length() == 0)      throw new Exception("A classifier must be specified with the -W option.");    setClassifier(Classifier.forName(tmpStr, Utils.partitionOptions(options)));  }    /**   * Gets the current settings of the CheckClassifier.   *   * @return an array of strings suitable for passing to setOptions   */  public String[] getOptions() {    Vector        result;    String[]      options;    int           i;        result = new Vector();        if (getDebug())      result.add("-D");        if (getSilent())      result.add("-S");        result.add("-N");    result.add("" + getNumInstances());        result.add("-words");    result.add("" + getWords());        result.add("-word-separators");    result.add("" + getWordSeparators());        if (getClassifier() != null) {      result.add("-W");      result.add(getClassifier().getClass().getName());    }        if ((m_Classifier != null) && (m_Classifier instanceof OptionHandler))      options = ((OptionHandler) m_Classifier).getOptions();    else      options = new String[0];        if (options.length > 0) {      result.add("--");      for (i = 0; i < options.length; i++)        result.add(options[i]);    }        return (String[]) result.toArray(new String[result.size()]);  }    /**   * sets the PostProcessor to use   *    * @param value	the new PostProcessor   * @see #m_PostProcessor   */  public void setPostProcessor(PostProcessor value) {    m_PostProcessor = value;  }    /**
checkclassifier.java - 源码说明

本页面展示了「Java 编写的多种数据挖掘算法包括聚类、分类、预处理等」中的 checkclassifier.java 源码文件，采用 Java 编程语言编写，共 1,847 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Java相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?