subspacecluster.java

来自「代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclip」· Java 代码 · 共 994 行 · 第 1/2 页
JAVA
994 行
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    SubspaceCluster.java *    Copyright (C) 2001 Gabi Schmidberger * */package weka.datagenerators.clusterers;import weka.core.Attribute;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.Range;import weka.core.Tag;import weka.core.Utils;import weka.datagenerators.ClusterDefinition;import weka.datagenerators.ClusterGenerator;import java.util.Enumeration;import java.util.Random;import java.util.Vector;/** <!-- globalinfo-start --> * A data generator that produces data points in hyperrectangular subspace clusters. * <p/> <!-- globalinfo-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -h *  Prints this help.</pre> *  * <pre> -o &lt;file&gt; *  The name of the output file, otherwise the generated data is *  printed to stdout.</pre> *  * <pre> -r &lt;name&gt; *  The name of the relation.</pre> *  * <pre> -d *  Whether to print debug informations.</pre> *  * <pre> -S *  The seed for random function (default 1)</pre> *  * <pre> -a &lt;num&gt; *  The number of attributes (default 1).</pre> *  * <pre> -c *  Class Flag, if set, the cluster is listed in extra attribute.</pre> *  * <pre> -b &lt;range&gt; *  The indices for boolean attributes.</pre> *  * <pre> -m &lt;range&gt; *  The indices for nominal attributes.</pre> *  * <pre> -P &lt;num&gt; *  The noise rate in percent (default 0.0). *  Can be between 0% and 30%. (Remark: The original  *  algorithm only allows noise up to 10%.)</pre> *  * <pre> -C &lt;cluster-definition&gt; *  A cluster definition of class 'SubspaceClusterDefinition' *  (definition needs to be quoted to be recognized as  *  a single argument).</pre> *  * <pre>  * Options specific to weka.datagenerators.clusterers.SubspaceClusterDefinition: * </pre> *  * <pre> -A &lt;range&gt; *  Generates randomly distributed instances in the cluster.</pre> *  * <pre> -U &lt;range&gt; *  Generates uniformly distributed instances in the cluster.</pre> *  * <pre> -G &lt;range&gt; *  Generates gaussian distributed instances in the cluster.</pre> *  * <pre> -D &lt;num&gt;,&lt;num&gt; *  The attribute min/max (-A and -U) or mean/stddev (-G) for *  the cluster.</pre> *  * <pre> -N &lt;num&gt;..&lt;num&gt; *  The range of number of instances per cluster (default 1..50).</pre> *  * <pre> -I *  Uses integer instead of continuous values (default continuous).</pre> *  <!-- options-end --> * * @author Gabi Schmidberger (gabi@cs.waikato.ac.nz) * @author  FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 1.3 $  */public class SubspaceCluster   extends ClusterGenerator {  /** for serialization */  static final long serialVersionUID = -3454999858505621128L;    /** noise rate in percent (option P,  between 0 and 30)*/   protected double m_NoiseRate;  /** cluster list */  protected ClusterDefinition[] m_Clusters;  /** if nominal, store number of values */  protected int[] m_numValues;  /** store global min values */  protected double[] m_globalMinValue;  /** store global max values */  protected double[] m_globalMaxValue;  /** cluster type: uniform/random */  public static final int UNIFORM_RANDOM = 0;    /** cluster type: total uniform */  public static final int TOTAL_UNIFORM = 1;  /** cluster type: gaussian */  public static final int GAUSSIAN = 2;  /** the tags for the cluster types */  public static final Tag[] TAGS_CLUSTERTYPE = {    new Tag(UNIFORM_RANDOM, "uniform/random"),    new Tag(TOTAL_UNIFORM,  "total uniform"),    new Tag(GAUSSIAN,       "gaussian")  };  /** cluster subtype: continuous */  public static final int CONTINUOUS = 0;  /** cluster subtype: integer */  public static final int INTEGER = 1;  /** the tags for the cluster types */  public static final Tag[] TAGS_CLUSTERSUBTYPE = {    new Tag(CONTINUOUS, "continuous"),    new Tag(INTEGER,    "integer")  };  /**   * initializes the generator, sets the number of clusters to 0, since user   * has to specify them explicitly   */  public SubspaceCluster() {    super();    setNoiseRate(defaultNoiseRate());  }  /**   * Returns a string describing this data generator.   *   * @return a description of the data generator suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return "A data generator that produces data points in "      + "hyperrectangular subspace clusters.";  }  /**   * Returns an enumeration describing the available options.   *   * @return an enumeration of all the available options   */  public Enumeration listOptions() {    Vector result = enumToVector(super.listOptions());    result.addElement(new Option(          "\tThe noise rate in percent (default "           + defaultNoiseRate() + ").\n"          + "\tCan be between 0% and 30%. (Remark: The original \n"          + "\talgorithm only allows noise up to 10%.)",          "P", 1, "-P <num>"));    result.addElement(new Option(          "\tA cluster definition of class '" 	  + SubspaceClusterDefinition.class.getName().replaceAll(".*\\.", "") + "'\n"	  + "\t(definition needs to be quoted to be recognized as \n"	  + "\ta single argument).",          "C", 1, "-C <cluster-definition>"));    result.addElement(new Option(	      "", "", 0, 	      "\nOptions specific to " 	      + SubspaceClusterDefinition.class.getName() + ":"));    result.addAll(        enumToVector(new SubspaceClusterDefinition(this).listOptions()));    return result.elements();  }  /**   * Parses a list of options for this object. <p/>   *   <!-- options-start -->   * Valid options are: <p/>   *    * <pre> -h   *  Prints this help.</pre>   *    * <pre> -o &lt;file&gt;   *  The name of the output file, otherwise the generated data is   *  printed to stdout.</pre>   *    * <pre> -r &lt;name&gt;   *  The name of the relation.</pre>   *    * <pre> -d   *  Whether to print debug informations.</pre>   *    * <pre> -S   *  The seed for random function (default 1)</pre>   *    * <pre> -a &lt;num&gt;   *  The number of attributes (default 1).</pre>   *    * <pre> -c   *  Class Flag, if set, the cluster is listed in extra attribute.</pre>   *    * <pre> -b &lt;range&gt;   *  The indices for boolean attributes.</pre>   *    * <pre> -m &lt;range&gt;   *  The indices for nominal attributes.</pre>   *    * <pre> -P &lt;num&gt;   *  The noise rate in percent (default 0.0).   *  Can be between 0% and 30%. (Remark: The original    *  algorithm only allows noise up to 10%.)</pre>   *    * <pre> -C &lt;cluster-definition&gt;   *  A cluster definition of class 'SubspaceClusterDefinition'   *  (definition needs to be quoted to be recognized as    *  a single argument).</pre>   *    * <pre>    * Options specific to weka.datagenerators.clusterers.SubspaceClusterDefinition:   * </pre>   *    * <pre> -A &lt;range&gt;   *  Generates randomly distributed instances in the cluster.</pre>   *    * <pre> -U &lt;range&gt;   *  Generates uniformly distributed instances in the cluster.</pre>   *    * <pre> -G &lt;range&gt;   *  Generates gaussian distributed instances in the cluster.</pre>   *    * <pre> -D &lt;num&gt;,&lt;num&gt;   *  The attribute min/max (-A and -U) or mean/stddev (-G) for   *  the cluster.</pre>   *    * <pre> -N &lt;num&gt;..&lt;num&gt;   *  The range of number of instances per cluster (default 1..50).</pre>   *    * <pre> -I   *  Uses integer instead of continuous values (default continuous).</pre>   *    <!-- options-end -->   *   * @param options the list of options as an array of strings   * @throws Exception if an option is not supported   */  public void setOptions(String[] options) throws Exception {    String                      tmpStr;    SubspaceClusterDefinition   cl;    Vector                      list;    int                         clCount;    super.setOptions(options);    m_numValues = new int[getNumAttributes()];    // numValues might be changed by a cluster definition    // (only relevant for nominal data)    for (int i = 0; i < getNumAttributes(); i++)      m_numValues[i] = 1;    tmpStr = Utils.getOption('P', options);    if (tmpStr.length() != 0)      setNoiseRate(Double.parseDouble(tmpStr));    else      setNoiseRate(defaultNoiseRate());    // cluster definitions    list = new Vector();        clCount = 0;    do {      tmpStr = Utils.getOption('C', options);      if (tmpStr.length() != 0) {        clCount++;        cl = new SubspaceClusterDefinition(this);        cl.setOptions(Utils.splitOptions(tmpStr));        list.add(cl);      }    }    while (tmpStr.length() != 0);    m_Clusters = (ClusterDefinition[])                     list.toArray(new ClusterDefinition[list.size()]);    // in case no cluster definition was provided, make sure that there's at    // least one definition present -> see getClusters()    getClusters();  }  /**   * Gets the current settings of the datagenerator.   *   * @return an array of strings suitable for passing to setOptions   */  public String[] getOptions() {    Vector        result;    String[]      options;    int           i;    result  = new Vector();    options = super.getOptions();    for (i = 0; i < options.length; i++)      result.add(options[i]);    result.add("-P");     result.add("" + getNoiseRate());    for (i = 0; i < getClusters().length; i++)  {      result.add("-C");      result.add(Utils.joinOptions(getClusters()[i].getOptions()));    }    return (String[]) result.toArray(new String[result.size()]);  }  /**   * returns the current cluster definitions, if necessary initializes them   *    * @return the current cluster definitions   */  protected ClusterDefinition[] getClusters() {    if ( (m_Clusters == null) || (m_Clusters.length == 0) ) {      if (m_Clusters != null)        System.out.println("NOTE: at least 1 cluster definition is necessary, "             + "created default one.");      m_Clusters = new ClusterDefinition[]{new SubspaceClusterDefinition(this)};    }    return m_Clusters;  }  /**   * returns the default number of attributes   *    * @return the default number of attributes   */  protected int defaultNumAttributes() {    return 1;  }  /**   * Sets the number of attributes the dataset should have.   * @param numAttributes the new number of attributes   */  public void setNumAttributes(int numAttributes) {    super.setNumAttributes(numAttributes);    m_numValues = new int[getNumAttributes()];  }    /**   * Returns the tip text for this property   *    * @return tip text for this property suitable for   *         displaying in the explorer/experimenter gui   */  public String numAttributesTipText() {    return "The number of attributes the generated data will contain (Note: they must be covered by the cluster definitions!)";  }  /**   * returns the default noise rate   *    * @return the default noise rate   */  protected double defaultNoiseRate() {    return 0.0;  }  /**   * Gets the percentage of noise set.   *   * @return the percentage of noise set   */  public double getNoiseRate() {     return m_NoiseRate;   }  /**   * Sets the percentage of noise set.   *   * @param newNoiseRate new percentage of noise    */  public void setNoiseRate(double newNoiseRate) {    m_NoiseRate = newNoiseRate;  }  /**   * Returns the tip text for this property   *    * @return tip text for this property suitable for   *         displaying in the explorer/experimenter gui   */  public String noiseRateTipText() {    return "The noise rate to use.";  }  /**   * returns the currently set clusters   *    * @return the currently set clusters   */  public ClusterDefinition[] getClusterDefinitions() {    return getClusters();  }  /**   * sets the clusters to use   *    * @param value the clusters do use   * @throws Exception if clusters are not the correct class   */  public void setClusterDefinitions(ClusterDefinition[] value)     throws Exception {    String      indexStr;        indexStr   = "";    m_Clusters = value;    for (int i = 0; i < getClusters().length; i++) {      if (!(getClusters()[i] instanceof SubspaceClusterDefinition)) {        if (indexStr.length() != 0)          indexStr += ",";        indexStr += "" + (i+1);      }      getClusters()[i].setParent(this);      getClusters()[i].setOptions(getClusters()[i].getOptions()); // for initializing!    }    // any wrong classes encountered?    if (indexStr.length() != 0)      throw new Exception("These cluster definitions are not '"           + SubspaceClusterDefinition.class.getName() + "': " + indexStr);  }  /**   * Returns the tip text for this property   *    * @return tip text for this property suitable for   *         displaying in the explorer/experimenter gui   */  public String clusterDefinitionsTipText() {    return "The clusters to use.";  }  /**   * Checks, whether all attributes are covered by cluster definitions and    * returns TRUE in that case.   *    * @return whether all attributes are covered   */  protected boolean checkCoverage() {    int         i;    int         n;    int[]       count;    Range       r;    String      attrIndex;    SubspaceClusterDefinition  cl;
subspacecluster.java - 源码说明

本页面展示了「代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行」中的 subspacecluster.java 源码文件，采用 Java 编程语言编写，共 994 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与eclipse相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?