📄 datagenerator.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * DataGenerator.java * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand * */package weka.datagenerators;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.Randomizable;import weka.core.Utils;import java.io.FileOutputStream;import java.io.PrintWriter;import java.io.Serializable;import java.io.StringWriter;import java.util.Enumeration;import java.util.HashSet;import java.util.Hashtable;import java.util.Random;import java.util.Vector;/** * Abstract superclass for data generators that generate data for * classifiers and clusterers. * * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 1.3 $ */public abstract class DataGenerator implements OptionHandler, Randomizable, Serializable { /** Debugging mode */ protected boolean m_Debug = false; /** The format for the generated dataset */ protected Instances m_DatasetFormat = null; /** Relation name the dataset should have */ protected String m_RelationName = ""; /** Number of instances that should be produced into the dataset * this number is by default m_NumExamples, * but can be reset by the generator */ protected int m_NumExamplesAct; /** default output (is printed to stdout after generation) */ protected transient StringWriter m_DefaultOutput = new StringWriter(); /** PrintWriter for outputting the generated data */ protected transient PrintWriter m_Output = new PrintWriter(m_DefaultOutput); /** random number generator seed*/ protected int m_Seed; /** random number generator*/ protected Random m_Random = null; /** flag, that indicates whether the relationname is currently assembled */ protected boolean m_CreatingRelationName = false; /** a black list for options not to be listed (for derived generators) * in the makeOptionString method * @see #makeOptionString(DataGenerator) */ protected static HashSet m_OptionBlacklist; static { m_OptionBlacklist = new HashSet(); } /** * initializes with default settings. <br/> * Note: default values are set via a default<name> method. These * default methods are also used in the listOptions method and in the * setOptions method. Why? Derived generators can override the return value * of these default methods, to avoid exceptions. */ public DataGenerator() { clearBlacklist(); setNumExamplesAct(defaultNumExamplesAct()); setSeed(defaultSeed()); } /** * creates a vector out of the enumeration from the listOptions of the * super class. Only a "convenience" method. * @param enm the Enumeration to dump into a vector * @return the elements of the enumeration in a vector */ protected Vector enumToVector(Enumeration enm) { Vector result; result = new Vector(); while (enm.hasMoreElements()) result.add(enm.nextElement()); return result; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options */ public Enumeration listOptions() { Vector result; result = new Vector(); result.addElement(new Option( "\tPrints this help.", "h", 1, "-h")); result.addElement(new Option( "\tThe name of the output file, otherwise the generated data is\n" + "\tprinted to stdout.", "o", 1, "-o <file>")); result.addElement(new Option( "\tThe name of the relation.", "r", 1, "-r <name>")); result.addElement(new Option( "\tWhether to print debug informations.", "d", 0, "-d")); result.addElement(new Option( "\tThe seed for random function (default " + defaultSeed() + ")", "S", 1, "-S")); return result.elements(); } /** * Parses a list of options for this object. <p/> * * For list of valid options see class description. <p/> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String tmpStr; // remove unwanted options options = removeBlacklist(options); tmpStr = Utils.getOption('r', options); if (tmpStr.length() != 0) setRelationName(tmpStr); else setRelationName(""); tmpStr = Utils.getOption('o', options); if (tmpStr.length() != 0) setOutput(new PrintWriter(new FileOutputStream(tmpStr))); else if (getOutput() == null) throw new Exception("No Output defined!"); setDebug(Utils.getFlag('d', options)); tmpStr = Utils.getOption('S', options); if (tmpStr.length() != 0) setSeed(Integer.parseInt(tmpStr)); else setSeed(defaultSeed()); } /** * Gets the current settings of the datagenerator RDG1. Removing of * blacklisted options has to be done in the derived class, that defines * the blacklist-entry. * * @return an array of strings suitable for passing to setOptions * @see #removeBlacklist(String[]) */ public String[] getOptions() { Vector result; result = new Vector(); // to avoid endless loop if (!m_CreatingRelationName) { result.add("-r"); result.add(Utils.quote(getRelationNameToUse())); } if (getDebug()) result.add("-d"); result.add("-S"); result.add("" + getSeed()); return (String[]) result.toArray(new String[result.size()]); } /** * Initializes the format for the dataset produced. * Must be called before the generateExample or generateExamples * methods are used. Also sets a default relation name in case * the current relation name is empty. * * @return the format for the dataset * @throws Exception if the generating of the format failed * @see #defaultRelationName() */ public Instances defineDataFormat() throws Exception { if (getRelationName().length() == 0) setRelationName(defaultRelationName()); return m_DatasetFormat; } /** * Generates one example of the dataset. * * @return the generated example * @throws Exception if the format of the dataset is not yet defined * @throws Exception if the generator only works with generateExamples * which means in non single mode */ public abstract Instance generateExample() throws Exception; /** * Generates all examples of the dataset. * * @return the generated dataset * @throws Exception if the format of the dataset is not yet defined * @throws Exception if the generator only works with generateExample, * which means in single mode */ public abstract Instances generateExamples() throws Exception; /** * Generates a comment string that documentates the data generator. * By default this string is added at the beginning of the produced output * as ARFF file type, next after the options. * * @return string contains info about the generated rules * @throws Exception if the generating of the documentation fails */ public abstract String generateStart () throws Exception; /** * Generates a comment string that documentates the data generator. * By default this string is added at the end of the produced output * as ARFF file type. * * @return string contains info about the generated rules * @throws Exception if the generating of the documentation fails */ public abstract String generateFinished () throws Exception; /** * Return if single mode is set for the given data generator * mode depends on option setting and or generator type. * * @return single mode flag * @throws Exception if mode is not set yet */ public abstract boolean getSingleModeFlag () throws Exception; /** * Sets the debug flag. * @param debug the new debug flag */ public void setDebug(boolean debug) { m_Debug = debug; } /** * Gets the debug flag. * @return the debug flag */ public boolean getDebug() { return m_Debug; } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String debugTipText() { return "Whether the generator is run in debug mode or not."; } /** * Sets the relation name the dataset should have. * @param relationName the new relation name */ public void setRelationName(String relationName) { m_RelationName = relationName; } /** * returns a relation name based on the options * * @return a relation name based on the options */ protected String defaultRelationName() { StringBuffer result; String[] options; String option; int i; m_CreatingRelationName = true; result = new StringBuffer(this.getClass().getName()); options = getOptions(); for (i = 0; i < options.length; i++) { option = options[i].trim(); if (i > 0) result.append("_"); result.append(option.replaceAll(" ", "_")); } m_CreatingRelationName = false; return result.toString(); } /** * returns the relation name to use, i.e., in case the currently set * relation name is empty, a generic one is returned. Must be used in * defineDataFormat() * @return the relation name * @see #defaultRelationName() * @see #defineDataFormat() */ protected String getRelationNameToUse() { String result; result = getRelationName(); if (result.length() == 0) result = defaultRelationName(); return result; } /** * Gets the relation name the dataset should have. * @return the relation name the dataset should have */ public String getRelationName() { return m_RelationName; } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String relationNameTipText() { return "The relation name of the generated data (if empty, a generic one will be supplied)."; } /** * returns the default number of actual examples
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -