📄 checkclusterer.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * CheckClusterer.java * Copyright (C) 2006 University of Waikato, Hamilton, New Zealand * */package weka.clusterers;import weka.core.CheckScheme;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import weka.core.MultiInstanceCapabilitiesHandler;import weka.core.Option;import weka.core.OptionHandler;import weka.core.SerializationHelper;import weka.core.TestInstances;import weka.core.Utils;import weka.core.WeightedInstancesHandler;import java.util.Enumeration;import java.util.Random;import java.util.Vector;/** * Class for examining the capabilities and finding problems with * clusterers. If you implement a clusterer using the WEKA.libraries, * you should run the checks on it to ensure robustness and correct * operation. Passing all the tests of this object does not mean * bugs in the clusterer don't exist, but this will help find some * common ones. <p/> * * Typical usage: <p/> * <code>java weka.clusterers.CheckClusterer -W clusterer_name * -- clusterer_options </code><p/> * * CheckClusterer reports on the following: * <ul> * <li> Clusterer abilities * <ul> * <li> Possible command line options to the clusterer </li> * <li> Whether the clusterer can predict nominal, numeric, string, * date or relational class attributes.</li> * <li> Whether the clusterer can handle numeric predictor attributes </li> * <li> Whether the clusterer can handle nominal predictor attributes </li> * <li> Whether the clusterer can handle string predictor attributes </li> * <li> Whether the clusterer can handle date predictor attributes </li> * <li> Whether the clusterer can handle relational predictor attributes </li> * <li> Whether the clusterer can handle multi-instance data </li> * <li> Whether the clusterer can handle missing predictor values </li> * <li> Whether the clusterer can handle instance weights </li> * </ul> * </li> * <li> Correct functioning * <ul> * <li> Correct initialisation during buildClusterer (i.e. no result * changes when buildClusterer called repeatedly) </li> * <li> Whether the clusterer alters the data pased to it * (number of instances, instance order, instance weights, etc) </li> * </ul> * </li> * <li> Degenerate cases * <ul> * <li> building clusterer with zero training instances </li> * <li> all but one predictor attribute values missing </li> * <li> all predictor attribute values missing </li> * <li> all but one class values missing </li> * <li> all class values missing </li> * </ul> * </li> * </ul> * Running CheckClusterer with the debug option set will output the * training dataset for any failed tests.<p/> * * The <code>weka.clusterers.AbstractClustererTest</code> uses this * class to test all the clusterers. Any changes here, have to be * checked in that abstract test class, too. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -D * Turn on debugging output.</pre> * * <pre> -S * Silent mode - prints nothing to stdout.</pre> * * <pre> -N <num> * The number of instances in the datasets (default 20).</pre> * * <pre> -nominal <num> * The number of nominal attributes (default 2).</pre> * * <pre> -nominal-values <num> * The number of values for nominal attributes (default 1).</pre> * * <pre> -numeric <num> * The number of numeric attributes (default 1).</pre> * * <pre> -string <num> * The number of string attributes (default 1).</pre> * * <pre> -date <num> * The number of date attributes (default 1).</pre> * * <pre> -relational <num> * The number of relational attributes (default 1).</pre> * * <pre> -num-instances-relational <num> * The number of instances in relational/bag attributes (default 10).</pre> * * <pre> -words <comma-separated-list> * The words to use in string attributes.</pre> * * <pre> -word-separators <chars> * The word separators to use in string attributes.</pre> * * <pre> -W * Full name of the clusterer analyzed. * eg: weka.clusterers.SimpleKMeans * (default weka.clusterers.SimpleKMeans)</pre> * * <pre> * Options specific to clusterer weka.clusterers.SimpleKMeans: * </pre> * * <pre> -N <num> * number of clusters. (default = 2).</pre> * * <pre> -S <num> * random number seed. * (default 10)</pre> * <!-- options-end --> * * Options after -- are passed to the designated clusterer.<p/> * * @author Len Trigg (trigg@cs.waikato.ac.nz) * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 1.8 $ * @see TestInstances */public class CheckClusterer extends CheckScheme { /* * Note about test methods: * - methods return array of booleans * - first index: success or not * - second index: acceptable or not (e.g., Exception is OK) * * FracPete (fracpete at waikato dot ac dot nz) */ /*** The clusterer to be examined */ protected Clusterer m_Clusterer = new SimpleKMeans(); /** * default constructor */ public CheckClusterer() { super(); setNumInstances(40); } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector result = new Vector(); Enumeration en = super.listOptions(); while (en.hasMoreElements()) result.addElement(en.nextElement()); result.addElement(new Option( "\tFull name of the clusterer analyzed.\n" +"\teg: weka.clusterers.SimpleKMeans\n" + "\t(default weka.clusterers.SimpleKMeans)", "W", 1, "-W")); if ((m_Clusterer != null) && (m_Clusterer instanceof OptionHandler)) { result.addElement(new Option("", "", 0, "\nOptions specific to clusterer " + m_Clusterer.getClass().getName() + ":")); Enumeration enu = ((OptionHandler)m_Clusterer).listOptions(); while (enu.hasMoreElements()) result.addElement(enu.nextElement()); } return result.elements(); } /** * Parses a given list of options. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -D * Turn on debugging output.</pre> * * <pre> -S * Silent mode - prints nothing to stdout.</pre> * * <pre> -N <num> * The number of instances in the datasets (default 20).</pre> * * <pre> -nominal <num> * The number of nominal attributes (default 2).</pre> * * <pre> -nominal-values <num> * The number of values for nominal attributes (default 1).</pre> * * <pre> -numeric <num> * The number of numeric attributes (default 1).</pre> * * <pre> -string <num> * The number of string attributes (default 1).</pre> * * <pre> -date <num> * The number of date attributes (default 1).</pre> * * <pre> -relational <num> * The number of relational attributes (default 1).</pre> * * <pre> -num-instances-relational <num> * The number of instances in relational/bag attributes (default 10).</pre> * * <pre> -words <comma-separated-list> * The words to use in string attributes.</pre> * * <pre> -word-separators <chars> * The word separators to use in string attributes.</pre> * * <pre> -W * Full name of the clusterer analyzed. * eg: weka.clusterers.SimpleKMeans * (default weka.clusterers.SimpleKMeans)</pre> * * <pre> * Options specific to clusterer weka.clusterers.SimpleKMeans: * </pre> * * <pre> -N <num> * number of clusters. (default = 2).</pre> * * <pre> -S <num> * random number seed. * (default 10)</pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String tmpStr; tmpStr = Utils.getOption('N', options); super.setOptions(options); if (tmpStr.length() != 0) setNumInstances(Integer.parseInt(tmpStr)); else setNumInstances(40); tmpStr = Utils.getOption('W', options); if (tmpStr.length() == 0) tmpStr = weka.clusterers.SimpleKMeans.class.getName(); setClusterer( (Clusterer) forName( "weka.clusterers", Clusterer.class, tmpStr, Utils.partitionOptions(options))); } /** * Gets the current settings of the CheckClusterer. * * @return an array of strings suitable for passing to setOptions */ public String[] getOptions() { Vector result; String[] options; int i; result = new Vector(); options = super.getOptions(); for (i = 0; i < options.length; i++) result.add(options[i]); if (getClusterer() != null) { result.add("-W"); result.add(getClusterer().getClass().getName()); } if ((m_Clusterer != null) && (m_Clusterer instanceof OptionHandler)) options = ((OptionHandler) m_Clusterer).getOptions(); else options = new String[0]; if (options.length > 0) { result.add("--"); for (i = 0; i < options.length; i++) result.add(options[i]); } return (String[]) result.toArray(new String[result.size()]); } /** * Begin the tests, reporting results to System.out */ public void doTests() { if (getClusterer() == null) { println("\n=== No clusterer set ==="); return; } println("\n=== Check on Clusterer: " + getClusterer().getClass().getName() + " ===\n"); // Start tests println("--> Checking for interfaces"); canTakeOptions(); boolean updateable = updateableClusterer()[0]; boolean weightedInstancesHandler = weightedInstancesHandler()[0]; boolean multiInstanceHandler = multiInstanceHandler()[0]; println("--> Clusterer tests"); declaresSerialVersionUID(); runTests(weightedInstancesHandler, multiInstanceHandler, updateable); } /** * Set the clusterer for testing. * * @param newClusterer the Clusterer to use. */ public void setClusterer(Clusterer newClusterer) { m_Clusterer = newClusterer; } /** * Get the clusterer used as the clusterer * * @return the clusterer used as the clusterer */ public Clusterer getClusterer() { return m_Clusterer; } /** * Run a battery of tests * * @param weighted true if the clusterer says it handles weights * @param multiInstance true if the clusterer is a multi-instance clusterer * @param updateable true if the classifier is updateable */ protected void runTests(boolean weighted, boolean multiInstance, boolean updateable) { boolean PNom = canPredict(true, false, false, false, false, multiInstance)[0]; boolean PNum = canPredict(false, true, false, false, false, multiInstance)[0]; boolean PStr = canPredict(false, false, true, false, false, multiInstance)[0]; boolean PDat = canPredict(false, false, false, true, false, multiInstance)[0]; boolean PRel; if (!multiInstance) PRel = canPredict(false, false, false, false, true, multiInstance)[0]; else PRel = false; if (PNom || PNum || PStr || PDat || PRel) { if (weighted) instanceWeights(PNom, PNum, PStr, PDat, PRel, multiInstance); canHandleZeroTraining(PNom, PNum, PStr, PDat, PRel, multiInstance); boolean handleMissingPredictors = canHandleMissing(PNom, PNum, PStr, PDat, PRel, multiInstance, true, 20)[0]; if (handleMissingPredictors) canHandleMissing(PNom, PNum, PStr, PDat, PRel, multiInstance, true, 100); correctBuildInitialisation(PNom, PNum, PStr, PDat, PRel, multiInstance); datasetIntegrity(PNom, PNum, PStr, PDat, PRel, multiInstance, handleMissingPredictors); if (updateable) updatingEquality(PNom, PNum, PStr, PDat, PRel, multiInstance); } } /** * Checks whether the scheme can take command line options. * * @return index 0 is true if the clusterer can take options */ protected boolean[] canTakeOptions() { boolean[] result = new boolean[2]; print("options..."); if (m_Clusterer instanceof OptionHandler) { println("yes"); if (m_Debug) { println("\n=== Full report ==="); Enumeration enu = ((OptionHandler)m_Clusterer).listOptions(); while (enu.hasMoreElements()) { Option option = (Option) enu.nextElement(); print(option.synopsis() + "\n" + option.description() + "\n"); } println("\n"); } result[0] = true; } else { println("no"); result[0] = false; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -