⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 subspacecluster.java

📁 代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
    // check whether all the attributes are covered    count = new int[getNumAttributes()];    for (i = 0; i < getNumAttributes(); i++) {      for (n = 0; n < getClusters().length; n++) {        cl = (SubspaceClusterDefinition) getClusters()[n];        r  = new Range(cl.getAttrIndexRange());        r.setUpper(getNumAttributes());        if (r.isInRange(i))          count[i]++;      }    }    // list all indices that are not covered    attrIndex = "";    for (i = 0; i < count.length; i++) {      if (count[i] == 0) {        if (attrIndex.length() != 0)          attrIndex += ",";        attrIndex += (i+1);      }    }    if (attrIndex.length() != 0)      throw new IllegalArgumentException(          "The following attributes are not covered by a cluster "          + "definition: " + attrIndex + "\n");    return true;  }  /**   * Gets the single mode flag.   *   * @return true if methode generateExample can be used.   */  public boolean getSingleModeFlag() {     return false;   }  /**   * Initializes the format for the dataset produced.    *   * @return the output data format   * @throws Exception data format could not be defined    */  public Instances defineDataFormat() throws Exception {    // initialize    setOptions(getOptions());    checkCoverage();    Random random = new Random (getSeed());    setRandom(random);    Instances dataset;    FastVector attributes = new FastVector(3);    Attribute attribute;    boolean classFlag = getClassFlag();    FastVector classValues = null;    if (classFlag)       classValues = new FastVector(getClusters().length);         FastVector boolValues = new FastVector(2);    boolValues.addElement("false");    boolValues.addElement("true");    FastVector nomValues = null;    // define dataset    for (int i = 0; i < getNumAttributes(); i++) {      // define boolean attribute      if (m_booleanCols.isInRange(i)) {        attribute = new Attribute("B" + i, boolValues);      }       else if (m_nominalCols.isInRange(i)) {        // define nominal attribute        nomValues = new FastVector(m_numValues[i]);        for (int j = 0; j < m_numValues[i]; j++)          nomValues.addElement("value-" + j);        attribute = new Attribute("N" + i, nomValues);      }       else {        // numerical attribute        attribute = new Attribute("X" + i);       }      attributes.addElement(attribute);    }    if (classFlag) {      for (int i = 0; i < getClusters().length; i++)        classValues.addElement("c" + i);      attribute = new Attribute ("class", classValues);       attributes.addElement(attribute);    }    dataset = new Instances(getRelationNameToUse(), attributes, 0);    if (classFlag)       dataset.setClassIndex(m_NumAttributes);    // set dataset format of this class    Instances format = new Instances(dataset, 0);    setDatasetFormat(format);    for (int i = 0; i < getClusters().length; i++) {      SubspaceClusterDefinition cl = (SubspaceClusterDefinition) getClusters()[i];      cl.setNumInstances(random);      cl.setParent(this);    }    return dataset;   }  /**   * Returns true if attribute is boolean   *@param index of the attribute   *@return true if the attribute is boolean   */  public boolean isBoolean(int index) {    return m_booleanCols.isInRange(index);   }  /**   * Returns true if attribute is nominal   *@param index of the attribute   *@return true if the attribute is nominal   */  public boolean isNominal(int index) {    return m_nominalCols.isInRange(index);  }  /**   * returns array that stores the number of values for a nominal attribute.   *    * @return the array that stores the number of values for a nominal attribute   */  public int[] getNumValues() {    return m_numValues;  }  /**   * Generate an example of the dataset.    * @return the instance generated   * @throws Exception if format not defined or generating <br/>   * examples one by one is not possible, because voting is chosen   */  public Instance generateExample() throws Exception {    throw new Exception("Examples cannot be generated one by one.");  }  /**   * Generate all examples of the dataset.    * @return the instance generated   * @throws Exception if format not defined    */  public Instances generateExamples() throws Exception {    Instances format = getDatasetFormat();    Instance example = null;    if (format == null)       throw new Exception("Dataset format not defined.");    // generate examples for one cluster after another    for (int cNum = 0; cNum < getClusters().length; cNum++) {      SubspaceClusterDefinition cl  = (SubspaceClusterDefinition) getClusters()[cNum];      //get the number of instances to create      int instNum = cl.getNumInstances();      //class value is c + cluster number      String cName = "c" + cNum;      switch (cl.getClusterType().getSelectedTag().getID()) {        case (UNIFORM_RANDOM):          for (int i = 0; i < instNum; i++) {            // generate example            example = generateExample(format, getRandom(), cl, cName);            if (example != null)              format.add(example);          }          break;        case (TOTAL_UNIFORM):          // generate examples          if (!cl.isInteger())            generateUniformExamples(format, instNum, cl, cName);          else            generateUniformIntegerExamples(format, instNum, cl, cName);          break;        case (GAUSSIAN):          // generate examples          generateGaussianExamples(format, instNum, getRandom(), cl, cName);          break;      }    }    return format;  }  /**   * Generate an example of the dataset.    *    * @param format the dataset format   * @param randomG the random number generator to use   * @param cl the cluster definition   * @param cName the class value   * @return the generated instance   */  private Instance generateExample(      Instances format, Random randomG, SubspaceClusterDefinition cl,       String cName) {    boolean makeInteger = cl.isInteger();    int num = -1;    Instance example = null;    int numAtts = m_NumAttributes;    if (getClassFlag()) numAtts++;    example = new Instance(numAtts);    example.setDataset(format);    boolean[] attributes = cl.getAttributes();    double[] minValue = cl.getMinValue();    double[] maxValue = cl.getMaxValue();    double value;    int clusterI = -1;    for (int i = 0; i < m_NumAttributes; i++) {      if (attributes[i]) {        clusterI++;        num++;        // boolean  or nominal attribute        if (isBoolean(i) || isNominal(i)) {          if (minValue[clusterI] == maxValue[clusterI]) {            value = minValue[clusterI];          }           else {            int numValues = (int)(maxValue[clusterI] - minValue[clusterI] + 1.0);            value = randomG.nextInt(numValues);            value += minValue[clusterI];          }        }         else {          // numeric attribute          value = randomG.nextDouble() *             (maxValue[num] - minValue[num]) + minValue[num];          if (makeInteger)            value = Math.round(value);        }        example.setValue(i, value);      }       else {        example.setMissing(i);      }    }    if (getClassFlag())      example.setClassValue(cName);    return example;   }  /**   * Generate examples for a uniform cluster dataset.    *    * @param format the dataset format   * @param numInstances the number of instances to generator   * @param cl the cluster definition   * @param cName the class value   */  private void generateUniformExamples(      Instances format, int numInstances, SubspaceClusterDefinition cl,       String cName) {    Instance example = null;    int numAtts = m_NumAttributes;    if (getClassFlag()) numAtts++;    example = new Instance(numAtts);    example.setDataset(format);    boolean[] attributes = cl.getAttributes();    double[] minValue = cl.getMinValue();    double[] maxValue = cl.getMaxValue();    double[] diff = new double[minValue.length];    for (int i = 0; i < minValue.length; i++)      diff[i] = (maxValue[i] - minValue[i]);    for (int j = 0; j < numInstances; j++) {      int num = -1;      for (int i = 0; i < m_NumAttributes; i++) {        if (attributes[i]) {          num++;          double value = minValue[num] + (diff[num] * (double)((double)j / (double)(numInstances - 1)));          example.setValue(i, value);        }         else {          example.setMissing(i);        }      }      if (getClassFlag())        example.setClassValue(cName);      format.add(example);    }  }  /**   * Generate examples for a uniform cluster dataset.    *    * @param format the dataset format   * @param numInstances the number of instances to generator   * @param cl the cluster definition   * @param cName the class value   */  private void generateUniformIntegerExamples(      Instances format, int numInstances, SubspaceClusterDefinition cl,       String cName) {    Instance example = null;    int numAtts = m_NumAttributes;    if (getClassFlag()) numAtts++;    example = new Instance(numAtts);    example.setDataset(format);    boolean[] attributes = cl.getAttributes();    double[] minValue = cl.getMinValue();    double[] maxValue = cl.getMaxValue();    int[] minInt = new int[minValue.length];    int[] maxInt = new int[maxValue.length];    int[] intValue = new int[maxValue.length];    int[] numInt = new int[minValue.length];    int num = 1;    for (int i = 0; i < minValue.length; i++) {      minInt[i] = (int)Math.ceil(minValue[i]);      maxInt[i] = (int)Math.floor(maxValue[i]);      numInt[i] = (maxInt[i] - minInt[i] + 1);      num = num * numInt[i];    }    int numEach = numInstances / num;    int rest = numInstances - numEach * num;    // initialize with smallest values combination    for (int i = 0; i < m_NumAttributes; i++) {      if (attributes[i]) {        example.setValue(i, (double)minInt[i]);        intValue[i] = minInt[i];      }       else {        example.setMissing(i);      }    }    if (getClassFlag())      example.setClassValue(cName);    int added = 0;    int attr = 0;    // do while not added all    do {      // add all for one value combination      for (int k = 0; k < numEach; k++) {        format.add(example);        example = (Instance) example.copy();        added++;      }      if (rest > 0) {        format.add(example);        example = (Instance) example.copy();        added++;        rest--;      }      if (added >= numInstances) break;      // switch to the next value combination      boolean done = false;      do {        if (attributes[attr] && (intValue[attr] + 1 <= maxInt[attr])) {          intValue[attr]++;          done = true;        }         else {          attr++;        }      } while (!done);      example.setValue(attr, (double)intValue[attr]);    } while (added < numInstances);  }  /**   * Generate examples for a uniform cluster dataset.    *    * @param format the dataset format   * @param numInstances the number of instances to generate   * @param random the random number generator   * @param cl the cluster definition   * @param cName the class value   */  private void generateGaussianExamples(      Instances format, int numInstances, Random random,       SubspaceClusterDefinition cl, String cName) {    boolean makeInteger = cl.isInteger();    Instance example = null;    int numAtts = m_NumAttributes;    if (getClassFlag()) numAtts++;    example = new Instance(numAtts);    example.setDataset(format);    boolean[] attributes = cl.getAttributes();    double[] meanValue = cl.getMeanValue();    double[] stddevValue = cl.getStddevValue();    for (int j = 0; j < numInstances; j++) {      int num = -1;      for (int i = 0; i < m_NumAttributes; i++) {        if (attributes[i]) {          num++;          double value = meanValue[num] + (random.nextGaussian() * stddevValue[num]);          if (makeInteger)            value = Math.round(value);          example.setValue(i, value);        }         else {          example.setMissing(i);        }      }      if (getClassFlag())        example.setClassValue(cName);      format.add(example);    }  }  /**   * Compiles documentation about the data generation after   * the generation process   *   * @return string with additional information about generated dataset   * @throws Exception no input structure has been defined   */  public String generateFinished() throws Exception {    return "";  }  /**   * Compiles documentation about the data generation before   * the generation process   *   * @return string with additional information    */  public String generateStart() {    StringBuffer docu = new StringBuffer();    int sumInst = 0;    for (int cNum = 0; cNum < getClusters().length; cNum++) {      SubspaceClusterDefinition cl  = (SubspaceClusterDefinition) getClusters()[cNum];      docu.append("%\n");      docu.append("% Cluster: c"+ cNum + "   ");      switch (cl.getClusterType().getSelectedTag().getID()) {        case UNIFORM_RANDOM:           docu.append("Uniform Random");          break;        case TOTAL_UNIFORM:           docu.append("Total Random");          break;        case GAUSSIAN:           docu.append("Gaussian");          break;      }      if (cl.isInteger()) {        docu.append(" / INTEGER");      }      docu.append("\n% ----------------------------------------------\n");      docu.append("%"+cl.attributesToString());      docu.append("\n% Number of Instances:            "  + cl.getInstNums() + "\n");      docu.append(  "% Generated Number of Instances:  "  + cl.getNumInstances() + "\n");      sumInst += cl.getNumInstances();        }    docu.append("%\n% ----------------------------------------------\n");     docu.append("% Total Number of Instances: " + sumInst + "\n");    docu.append("%                            in " + getClusters().length + " Cluster(s)\n%");    return docu.toString();  }  /**   * Main method for testing this class.   *   * @param args should contain arguments for the data producer:    */  public static void main(String[] args) {    runDataGenerator(new SubspaceCluster(), args);  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -