birchcluster.java

来自「wekaUT是 university texas austin 开发的基于wek」· Java 代码 · 共 1,095 行 · 第 1/2 页

JAVA
1,095
字号
   * Sets the percentage of noise set.   *   * @param newNoiseRate new percentage of noise    */  public void setNoiseRate(double newNoiseRate) {    m_NoiseRate = newNoiseRate;  }  /**   * Gets the random generator.   *   * @return the random generator   */  public Random getRandom() {    if (m_Random == null) {      m_Random = new Random (getSeed());    }    return m_Random;  }    /**   * Sets the random generator.   *   * @param newRandom is the random generator.   */  public void setRandom(Random newRandom) {    m_Random = newRandom;  }  /**   * Gets the random number seed.   *   * @return the random number seed.   */  public int getSeed() { return m_Seed; }    /**   * Sets the random number seed.   *   * @param newSeed the new random number seed.   */  public void setSeed(int newSeed) { m_Seed = newSeed; }    /**   * Gets the dataset format.   *   * @return the dataset format.   */  public Instances getDatasetFormat() { return m_DatasetFormat; }    /**   * Sets the dataset format.   *   * @param newDatasetFormat the new dataset format.   */  public void setDatasetFormat(Instances newDatasetFormat) {     m_DatasetFormat = newDatasetFormat;  }    /**   * Gets the single mode flag.   *   * @return true if methode generateExample can be used.   */  public boolean getSingleModeFlag() { return (false); }  /**   * Returns an enumeration describing the available options.   *   * @return an enumeration of all the available options   */  public Enumeration listOptions() {    Vector newVector = new Vector(5);        newVector.addElement(new Option(              "\tSet pattern to grid (default is random).",              "G", 1, "-G"));    newVector.addElement(new Option(              "\tSet pattern to sine (default is random).",              "S", 1, "-S"));    newVector.addElement(new Option(              "\tThe range of number of instances per cluster (default 1..50).",              "N", 1, "-N <num>..<num>"));    newVector.addElement(new Option(              "\tThe range of radius per cluster (default 0.1..sqrt(2)).",              "R", 1, "-R <num>..<num>"));        newVector.addElement(new Option(              "\tThe distance multiplier (default 4).",              "M", 1, "-M <num>"));    newVector.addElement(new Option(              "\tThe number of cycles (default 4).",              "C", 1, "-C <num>"));    newVector.addElement(new Option(              "\tSet input order to ordered (default is randomized).",              "O", 1, "-O"));    newVector.addElement(new Option(              "\tThe noise rate in percent (default 0).",              "P", 1, "-P <num>"));    newVector.addElement(new Option(              "\tThe Seed for random function (default 1).",              "S", 1, "-S"));     return newVector.elements();  }  /**   * Sets all options to their default values. <p>   */  public void setDefaultOptions() {    m_MinInstNum = 1;    m_MaxInstNum = 50;    m_MinRadius = 0.1;    m_MaxRadius = Math.sqrt(2.0);    m_Pattern = RANDOM;    m_DistMult = 4;    m_NumCycles = 4;    m_InputOrder = RANDOMIZED;    m_NoiseRate = 0.0;    m_Seed = 1;  }    /**   * Parses a list of options for this object. <p>   *   * For list of valid options see class description.<p>   *   * @param options the list of options as an array of strings   * @exception Exception if an option is not supported   */  public void setOptions(String[] options) throws Exception {    setDefaultOptions();    String num;    String fromTo;    fromTo = Utils.getOption('N', options);    if (fromTo.length() != 0) {      setInstNums(fromTo);    }        fromTo = Utils.getOption('R', options);    if (fromTo.length() != 0) {      setRadiuses(fromTo);    }     boolean grid = Utils.getFlag('G', options);    boolean sine = Utils.getFlag('I', options);    if (grid && sine)      throw new Exception("Flags G and I can only be set mutually exclusiv.");    if (grid)      setPattern(GRID);    if (sine)      setPattern(SINE);    num = Utils.getOption('M', options);    if (num.length() != 0) {      if (!grid)	throw new Exception("Option M can only be used with GRID pattern.");      setDistMult(Double.valueOf(num).doubleValue());    }     num = Utils.getOption('C', options);    if (num.length() != 0) {      if (!sine)	throw new Exception("Option C can only be used with SINE pattern.");      setNumCycles((int)Double.valueOf(num).doubleValue());    }     boolean ordered = Utils.getFlag('O', options);    if (ordered)      setInputOrder(ORDERED);    num = Utils.getOption('P', options);    if (num.length() != 0) {      setNoiseRate(Double.valueOf(num).doubleValue());    }     num = Utils.getOption('S', options);    if (num.length() != 0) {      setSeed(Integer.parseInt(num));    }   }  /**   * Gets the current settings of the datagenerator BIRCHCluster.   *   * @return an array of strings suitable for passing to setOptions   */  public String [] getOptions() {    String [] options = new String [20];    int i = 0;    options[i++] = "-N"; options[i++] = "" + getInstNums();    options[i++] = "-R"; options[i++] = "" + getRadiuses();    if (getGridFlag()) {      options[i++] = "-G"; options[i++] = "";      options[i++] = "-D"; options[i++] = "" + getDistMult();    }    if (getSineFlag()) {      options[i++] = "-I"; options[i++] = "";      options[i++] = "-C"; options[i++] = "" + getNumCycles();    }    if (getOrderedFlag()) {      options[i++] = "-O"; options[i++] = "";    }    options[i++] = "-P"; options[i++] = "" + getNoiseRate();        while (i < options.length) {      options[i++] = "";    }    return options;  }       /**   * Initializes the format for the dataset produced.    *   * @return the output data format   * @exception Exception data format could not be defined    */  public Instances defineDataFormat() throws Exception {    Random random = new Random (getSeed());    setRandom(random);    Instances dataset;    FastVector attributes = new FastVector(3);    Attribute attribute;    boolean classFlag = getClassFlag();        FastVector classValues = null;    if (classFlag) classValues = new FastVector (m_NumClusters);         // define dataset    for (int i = 0; i < getNumAttributes(); i++) {      attribute = new Attribute("X" + i);       attributes.addElement(attribute);    }        if (classFlag) {      for (int i = 0; i < m_NumClusters; i++) {	classValues.addElement("c" + i);      }      attribute = new Attribute ("class", classValues);       attributes.addElement(attribute);    }    dataset = new Instances(getRelationName(), attributes, 0);    if (classFlag)       dataset.setClassIndex(m_NumAttributes);    // set dataset format of this class    Instances format = new Instances(dataset, 0);    setDatasetFormat(format);    m_ClusterList = defineClusters(random);    System.out.println("dataset" + dataset.numAttributes());    return dataset;   }  /**   * Generate an example of the dataset.    * @return the instance generated   * @exception Exception if format not defined or generating <br>   * examples one by one is not possible, because voting is chosen   */  public Instance generateExample() throws Exception {    throw new Exception("Examples cannot be generated" +                                           " one by one.");  }  /**   * Generate all examples of the dataset.    * @return the instance generated   * @exception Exception if format not defined    */  public Instances generateExamples() throws Exception {    Random random = getRandom();    Instances data = getDatasetFormat();    if (data == null) throw new Exception("Dataset format not defined.");    // generate examples    if (getOrderedFlag())      data = generateExamples(random, data);    else      throw new Exception("RANDOMIZED is not yet implemented.");      return (data);  }  /**   * Generate all examples of the dataset.    * @return the instance generated   * @exception Exception if format not defined   */  public Instances generateExamples(Random random,				    Instances format) throws Exception {    Instance example = null;        if (format == null) throw new Exception("Dataset format not defined.");    // generate examples for one cluster after another    int cNum = 0;    for (Enumeration enum = m_ClusterList.elements();	 enum.hasMoreElements(); cNum++) {      Cluster cl  = (Cluster) enum.nextElement();      double stdDev = cl.getStdDev();      int instNum = cl.getInstNum();      double [] center = cl.getCenter();      String cName = "c" + cNum;      for (int i = 0; i < instNum; i++) {		// generate example	example = generateInstance (format,				    random,				    stdDev,				    center,				    cName);       	if (example != null)	  example.setDataset(format);	format.add(example);      }    }    return (format);  }  /**   * Generate an example of the dataset.    * @return the instance generated   * @exception Exception if format not defined or generating <br>   * examples one by one is not possible, because voting is chosen   */  private Instance generateInstance (Instances format,				     Random randomG,				     double stdDev,				     double [] center,				     String cName				     ) {    Instance example;    int numAtts = m_NumAttributes;    if (getClassFlag()) numAtts++;    example = new Instance(numAtts);    example.setDataset(format);            for (int i = 0; i < m_NumAttributes; i++) {      example.setValue(i, randomG.nextGaussian() * stdDev + center[i]);     }        if (getClassFlag()) {      example.setClassValue(cName);    }    return example;   } /**   * Defines the clusters    *   * @param random random number generator   */  private FastVector defineClusters(Random random)   throws Exception {    if (m_Pattern == GRID)      return defineClustersGRID(random);    else      return defineClustersRANDOM(random);  }  /**   * Defines the clusters if pattern is GRID   *   * @param random random number generator   */  private FastVector defineClustersGRID(Random random)   throws Exception {    FastVector clusters = new FastVector(m_NumClusters);    double diffInstNum = (double) (m_MaxInstNum - m_MinInstNum);    double minInstNum = (double) m_MinInstNum;    double diffRadius = m_MaxRadius - m_MinRadius;    Cluster cluster;    // compute gridsize    double gs = Math.pow(m_NumClusters, 1.0 / m_NumAttributes);        if (gs - ((double) ((int) gs))  > 0.0) {      m_GridSize = (int) (gs + 1.0);    } else { m_GridSize = (int) gs; }    // compute gridwidth    m_GridWidth = ((m_MaxRadius + m_MinRadius) / 2) * m_DistMult;    System.out.println("GridSize= " + m_GridSize);    System.out.println("GridWidth= " + m_GridWidth);    // initialize gridvector with zeros    GridVector gv = new GridVector(m_NumAttributes, m_GridSize);    for (int i = 0; i < m_NumClusters; i++) {      int instNum = (int) (random.nextDouble() * diffInstNum                                   + minInstNum);      double radius = (random.nextDouble() * diffRadius) + m_MinRadius;      // center is defined in the constructor of cluster      cluster = new Cluster(instNum, radius,			    gv.getGridVector(), m_GridWidth);      clusters.addElement((Object) cluster);      gv.addOne();    }    return clusters;  } /**   * Defines the clusters if pattern is RANDOM   *   * @param random random number generator   */  private FastVector defineClustersRANDOM(Random random)   throws Exception {    FastVector clusters = new FastVector(m_NumClusters);    double diffInstNum = (double) (m_MaxInstNum - m_MinInstNum);    double minInstNum = (double) m_MinInstNum;    double diffRadius = m_MaxRadius - m_MinRadius;    Cluster cluster;    for (int i = 0; i < m_NumClusters; i++) {      int instNum = (int) (random.nextDouble() * diffInstNum                                   + minInstNum);      double radius = (random.nextDouble() * diffRadius) + m_MinRadius;      // center is defined in the constructor of cluster      cluster = new Cluster(instNum, radius, random);      clusters.addElement((Object) cluster);    }    return clusters;  }  /**   * Compiles documentation about the data generation after   * the generation process   *   * @return string with additional information about generated dataset   * @exception Exception no input structure has been defined   */  public String generateFinished() throws Exception {    StringBuffer docu = new StringBuffer();    Instances format = getDatasetFormat();//just for exception    // string is empty    docu.append("\n%\n%\n");    return docu.toString();  }    /**   * Compiles documentation about the data generation before   * the generation process   *   * @return string with additional information    */  public String generateStart() {    StringBuffer docu = new StringBuffer();    // string is empty    docu.append("\n%\n%\n");    int sumInst = 0;    int cNum = 0;    for (Enumeration enum = m_ClusterList.elements();	 enum.hasMoreElements(); cNum++) {      Cluster cl  = (Cluster) enum.nextElement();      docu.append("%\n");      docu.append("% Cluster: c"+ cNum + "\n");      docu.append("% ----------------------------------------------\n");      docu.append("% StandardDeviation: "		  + Utils.doubleToString(cl.getStdDev(), 2) + "\n");      docu.append("% Number of instances: "		  + cl.getInstNum() + "\n");      sumInst += cl.getInstNum();      double [] center = cl.getCenter();      docu.append("% ");       for (int i = 0; i < center.length - 1; i++) {        docu.append(Utils.doubleToString(center[i], 2) + ", ");      }      docu.append(Utils.doubleToString(center[center.length - 1], 2) + "\n");    }    docu.append("\n% ----------------------------------------------\n");     docu.append("% Total number of instances: " + sumInst + "\n");    docu.append("%                            in " + cNum + " clusters\n");    docu.append("% Pattern chosen           : ");    if (getGridFlag()) docu.append("GRID, "	     + "distance multiplier = " +	     Utils.doubleToString(m_DistMult, 2) + "\n");    else      if (getSineFlag()) docu.append("SINE\n");      else	docu.append("RANDOM\n");    return docu.toString();  }    /**   * Main method for testing this class.   *   * @param argv should contain arguments for the data producer:    */  public static void main(String [] argv) {    try {      ClusterGenerator.makeData(new BIRCHCluster(), argv);    } catch (Exception ex) {      System.out.println(ex.getMessage());    }  }}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?