📄 addclassification.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * AddClassification.java * Copyright (C) 2006 University of Waikato, Hamilton, New Zealand */package weka.filters.supervised.attribute;import weka.classifiers.Classifier;import weka.core.Attribute;import weka.core.Capabilities;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.SparseInstance;import weka.core.Utils;import weka.filters.SimpleBatchFilter;import java.io.File;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.ObjectInputStream;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * A filter for adding the classification, the class distribution and an error flag to a dataset with a classifier. The classifier is either trained on the data itself or provided as serialized model. * <p/> <!-- globalinfo-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -D * Turns on output of debugging information.</pre> * * <pre> -W <classifier specification> * Full class name of classifier to use, followed * by scheme options. eg: * "weka.classifiers.bayes.NaiveBayes -D" * (default: weka.classifiers.rules.ZeroR)</pre> * * <pre> -serialized <file> * Instead of training a classifier on the data, one can also provide * a serialized model and use that for tagging the data.</pre> * * <pre> -classification * Adds an attribute with the actual classification. * (default: off)</pre> * * <pre> -distribution * Adds attributes with the distribution for all classes * (for numeric classes this will be identical to the attribute * output with '-classification'). * (default: off)</pre> * * <pre> -error * Adds an attribute indicating whether the classifier output * a wrong classification (for numeric classes this is the numeric * difference). * (default: off)</pre> * <!-- options-end --> * * @author fracpete (fracpete at waikato dot ac dot nz) * @version $Revision: 1.1 $ */public class AddClassification extends SimpleBatchFilter { /** for serialization */ private static final long serialVersionUID = -1931467132568441909L; /** The classifier template used to do the classification */ protected Classifier m_Classifier = new weka.classifiers.rules.ZeroR(); /** The file from which to load a serialized classifier */ protected File m_SerializedClassifierFile = new File(System.getProperty("user.dir")); /** The actual classifier used to do the classification */ protected Classifier m_ActualClassifier = null; /** whether to output the classification */ protected boolean m_OutputClassification = false; /** whether to output the class distribution */ protected boolean m_OutputDistribution = false; /** whether to output the error flag */ protected boolean m_OutputErrorFlag = false; /** * Returns a string describing this filter * * @return a description of the filter suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "A filter for adding the classification, the class distribution and " + "an error flag to a dataset with a classifier. The classifier is " + "either trained on the data itself or provided as serialized model."; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector result; Enumeration en; result = new Vector(); en = super.listOptions(); while (en.hasMoreElements()) result.addElement(en.nextElement()); result.addElement(new Option( "\tFull class name of classifier to use, followed\n" + "\tby scheme options. eg:\n" + "\t\t\"weka.classifiers.bayes.NaiveBayes -D\"\n" + "\t(default: weka.classifiers.rules.ZeroR)", "W", 1, "-W <classifier specification>")); result.addElement(new Option( "\tInstead of training a classifier on the data, one can also provide\n" + "\ta serialized model and use that for tagging the data.", "serialized", 1, "-serialized <file>")); result.addElement(new Option( "\tAdds an attribute with the actual classification.\n" + "\t(default: off)", "classification", 0, "-classification")); result.addElement(new Option( "\tAdds attributes with the distribution for all classes \n" + "\t(for numeric classes this will be identical to the attribute \n" + "\toutput with '-classification').\n" + "\t(default: off)", "distribution", 0, "-distribution")); result.addElement(new Option( "\tAdds an attribute indicating whether the classifier output \n" + "\ta wrong classification (for numeric classes this is the numeric \n" + "\tdifference).\n" + "\t(default: off)", "error", 0, "-error")); return result.elements(); } /** * Parses the options for this object. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -D * Turns on output of debugging information.</pre> * * <pre> -W <classifier specification> * Full class name of classifier to use, followed * by scheme options. eg: * "weka.classifiers.bayes.NaiveBayes -D" * (default: weka.classifiers.rules.ZeroR)</pre> * * <pre> -serialized <file> * Instead of training a classifier on the data, one can also provide * a serialized model and use that for tagging the data.</pre> * * <pre> -classification * Adds an attribute with the actual classification. * (default: off)</pre> * * <pre> -distribution * Adds attributes with the distribution for all classes * (for numeric classes this will be identical to the attribute * output with '-classification'). * (default: off)</pre> * * <pre> -error * Adds an attribute indicating whether the classifier output * a wrong classification (for numeric classes this is the numeric * difference). * (default: off)</pre> * <!-- options-end --> * * @param options the options to use * @throws Exception if setting of options fails */ public void setOptions(String[] options) throws Exception { String tmpStr; String[] tmpOptions; File file; boolean serializedModel; setOutputClassification(Utils.getFlag("classification", options)); setOutputClassification(Utils.getFlag("distribution", options)); setOutputClassification(Utils.getFlag("error", options)); serializedModel = false; tmpStr = Utils.getOption("serialized", options); if (tmpStr.length() != 0) { file = new File(tmpStr); if (!file.exists()) throw new FileNotFoundException( "File '" + file.getAbsolutePath() + "' not found!"); if (file.isDirectory()) throw new FileNotFoundException( "'" + file.getAbsolutePath() + "' points to a directory not a file!"); setSerializedClassifierFile(file); serializedModel = true; } else { setSerializedClassifierFile(null); } if (!serializedModel) { tmpStr = Utils.getOption('W', options); if (tmpStr.length() == 0) tmpStr = weka.classifiers.rules.ZeroR.class.getName(); tmpOptions = Utils.splitOptions(tmpStr); if (tmpOptions.length == 0) throw new Exception("Invalid classifier specification string"); tmpStr = tmpOptions[0]; tmpOptions[0] = ""; setClassifier(Classifier.forName(tmpStr, tmpOptions)); } super.setOptions(options); } /** * Gets the current settings of the classifier. * * @return an array of strings suitable for passing to setOptions */ public String[] getOptions() { int i; Vector result; String[] options; File file; result = new Vector(); options = super.getOptions(); for (i = 0; i < options.length; i++) result.add(options[i]); if (getOutputClassification()) result.add("-classification"); if (getOutputDistribution()) result.add("-distribution"); if (getOutputErrorFlag()) result.add("-error"); file = getSerializedClassifierFile(); if ((file != null) && (!file.isDirectory())) { result.add("-serialized"); result.add(file.getAbsolutePath()); } else { result.add("-W"); result.add(getClassifierSpec()); } return (String[]) result.toArray(new String[result.size()]); } /** * Returns the Capabilities of this filter. * * @return the capabilities of this object * @see Capabilities */ public Capabilities getCapabilities() { Capabilities result; if (getClassifier() == null) result = super.getCapabilities(); else result = getClassifier().getCapabilities(); result.setMinimumNumberInstances(0); return result; } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String classifierTipText() { return "The classifier to use for classification."; } /**
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -