⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 datagenerator.java

📁 Java 编写的多种数据挖掘算法 包括聚类、分类、预处理等
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * DataGenerator.java * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand * */package weka.datagenerators;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.Randomizable;import weka.core.Utils;import java.io.FileOutputStream;import java.io.PrintWriter;import java.io.Serializable;import java.io.StringWriter;import java.util.Enumeration;import java.util.HashSet;import java.util.Hashtable;import java.util.Random;import java.util.Vector;/**  * Abstract superclass for data generators that generate data for  * classifiers and clusterers. * * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 1.3 $ */public abstract class DataGenerator   implements OptionHandler, Randomizable, Serializable {  /** Debugging mode */  protected boolean m_Debug = false;  /** The format for the generated dataset */  protected Instances m_DatasetFormat = null;  /** Relation name the dataset should have */  protected String m_RelationName = "";  /** Number of instances that should be produced into the dataset    * this number is by default m_NumExamples,   * but can be reset by the generator    */  protected int m_NumExamplesAct;  /** default output (is printed to stdout after generation) */  protected transient StringWriter m_DefaultOutput = new StringWriter();  /** PrintWriter for outputting the generated data */  protected transient PrintWriter m_Output = new PrintWriter(m_DefaultOutput);  /** random number generator seed*/   protected int m_Seed;  /** random number generator*/   protected Random m_Random = null;  /** flag, that indicates whether the relationname is currently assembled */  protected boolean m_CreatingRelationName = false;  /** a black list for options not to be listed (for derived generators)    *  in the makeOptionString method    *  @see #makeOptionString(DataGenerator) */  protected static HashSet m_OptionBlacklist;  static {    m_OptionBlacklist = new HashSet();  }  /**   * initializes with default settings. <br/>   * Note: default values are set via a default&lt;name&gt; method. These    * default methods are also used in the listOptions method and in the   * setOptions method. Why? Derived generators can override the return value   * of these default methods, to avoid exceptions.    */  public DataGenerator() {    clearBlacklist();        setNumExamplesAct(defaultNumExamplesAct());    setSeed(defaultSeed());  }  /**   * creates a vector out of the enumeration from the listOptions of the   * super class. Only a "convenience" method.   * @param enm     the Enumeration to dump into a vector   * @return        the elements of the enumeration in a vector   */  protected Vector enumToVector(Enumeration enm) {    Vector      result;    result = new Vector();    while (enm.hasMoreElements())      result.add(enm.nextElement());    return result;  }  /**   * Returns an enumeration describing the available options.   *   * @return an enumeration of all the available options   */  public Enumeration listOptions() {    Vector      result;    result = new Vector();    result.addElement(new Option(          "\tPrints this help.",          "h", 1, "-h"));    result.addElement(new Option(          "\tThe name of the output file, otherwise the generated data is\n"          + "\tprinted to stdout.",          "o", 1, "-o <file>"));    result.addElement(new Option(          "\tThe name of the relation.",          "r", 1, "-r <name>"));    result.addElement(new Option(          "\tWhether to print debug informations.",          "d", 0, "-d"));    result.addElement(new Option(          "\tThe seed for random function (default "           + defaultSeed() + ")",          "S", 1, "-S"));    return result.elements();  }  /**   * Parses a list of options for this object. <p/>   *   * For list of valid options see class description. <p/>   *   * @param options the list of options as an array of strings   * @throws Exception if an option is not supported   */  public void setOptions(String[] options) throws Exception {    String        tmpStr;    // remove unwanted options    options = removeBlacklist(options);    tmpStr = Utils.getOption('r', options);    if (tmpStr.length() != 0)      setRelationName(tmpStr);    else      setRelationName("");    tmpStr = Utils.getOption('o', options);    if (tmpStr.length() != 0)      setOutput(new PrintWriter(new FileOutputStream(tmpStr)));    else if (getOutput() == null)      throw new Exception("No Output defined!");    setDebug(Utils.getFlag('d', options));        tmpStr = Utils.getOption('S', options);    if (tmpStr.length() != 0)      setSeed(Integer.parseInt(tmpStr));    else      setSeed(defaultSeed());  }  /**   * Gets the current settings of the datagenerator RDG1. Removing of    * blacklisted options has to be done in the derived class, that defines   * the blacklist-entry.   *   * @return an array of strings suitable for passing to setOptions   * @see    #removeBlacklist(String[])   */  public String[] getOptions() {    Vector        result;    result = new Vector();    // to avoid endless loop    if (!m_CreatingRelationName) {      result.add("-r");      result.add(Utils.quote(getRelationNameToUse()));    }    if (getDebug())      result.add("-d");        result.add("-S");    result.add("" + getSeed());    return (String[]) result.toArray(new String[result.size()]);  }  /**   * Initializes the format for the dataset produced.    * Must be called before the generateExample or generateExamples   * methods are used. Also sets a default relation name in case   * the current relation name is empty.   *   * @return the format for the dataset    * @throws Exception if the generating of the format failed   * @see #defaultRelationName()   */  public Instances defineDataFormat() throws Exception {    if (getRelationName().length() == 0)      setRelationName(defaultRelationName());    return m_DatasetFormat;  }  /**   * Generates one example of the dataset.    *   * @return the generated example   * @throws Exception if the format of the dataset is not yet defined   * @throws Exception if the generator only works with generateExamples   * which means in non single mode   */  public abstract Instance generateExample() throws Exception;  /**   * Generates all examples of the dataset.    *   * @return the generated dataset   * @throws Exception if the format of the dataset is not yet defined   * @throws Exception if the generator only works with generateExample,   * which means in single mode   */  public abstract Instances generateExamples() throws Exception;  /**   * Generates a comment string that documentates the data generator.   * By default this string is added at the beginning of the produced output   * as ARFF file type, next after the options.   *    * @return string contains info about the generated rules   * @throws Exception if the generating of the documentation fails   */  public abstract String generateStart () throws Exception;  /**   * Generates a comment string that documentates the data generator.   * By default this string is added at the end of the produced output   * as ARFF file type.   *    * @return string contains info about the generated rules   * @throws Exception if the generating of the documentation fails   */  public abstract String generateFinished () throws Exception;  /**   * Return if single mode is set for the given data generator   * mode depends on option setting and or generator type.   *    * @return single mode flag   * @throws Exception if mode is not set yet   */  public abstract boolean getSingleModeFlag () throws Exception;  /**   * Sets the debug flag.   * @param debug the new debug flag   */  public void setDebug(boolean debug) {     m_Debug = debug;  }  /**   * Gets the debug flag.   * @return the debug flag    */  public boolean getDebug() {     return m_Debug;   }    /**   * Returns the tip text for this property   *    * @return tip text for this property suitable for   *         displaying in the explorer/experimenter gui   */  public String debugTipText() {    return "Whether the generator is run in debug mode or not.";  }  /**   * Sets the relation name the dataset should have.   * @param relationName the new relation name   */  public void setRelationName(String relationName) {    m_RelationName = relationName;  }  /**   * returns a relation name based on the options   *    * @return a relation name based on the options   */  protected String defaultRelationName() {    StringBuffer    result;    String[]        options;    String          option;    int             i;    m_CreatingRelationName = true;    result = new StringBuffer(this.getClass().getName());    options = getOptions();    for (i = 0; i < options.length; i++) {      option = options[i].trim();      if (i > 0)        result.append("_");      result.append(option.replaceAll(" ", "_"));    }    m_CreatingRelationName = false;    return result.toString();  }  /**   * returns the relation name to use, i.e., in case the currently set   * relation name is empty, a generic one is returned. Must be used in   * defineDataFormat()   * @return the relation name   * @see #defaultRelationName()   * @see #defineDataFormat()   */  protected String getRelationNameToUse() {    String        result;    result = getRelationName();    if (result.length() == 0)      result = defaultRelationName();    return result;  }  /**   * Gets the relation name the dataset should have.   * @return the relation name the dataset should have   */  public String getRelationName() {     return m_RelationName;  }    /**   * Returns the tip text for this property   *    * @return tip text for this property suitable for   *         displaying in the explorer/experimenter gui   */  public String relationNameTipText() {    return "The relation name of the generated data (if empty, a generic one will be supplied).";  }  /**   * returns the default number of actual examples

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -