⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 addnoise.java

📁 代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
    return m_AttIndex.getSingleIndex();  }  /**   * Sets index of the attribute used.   *   * @param attIndex the index of the attribute   */  public void setAttributeIndex(String attIndex) {        m_AttIndex.setSingleIndex(attIndex);  }  /**    * Returns the Capabilities of this filter.   *   * @return            the capabilities of this object   * @see               Capabilities   */  public Capabilities getCapabilities() {    Capabilities result = super.getCapabilities();    // attributes    result.enableAllAttributes();    result.enable(Capability.MISSING_VALUES);        // class    result.enableAllClasses();    result.enable(Capability.MISSING_CLASS_VALUES);    result.enable(Capability.NO_CLASS);        return result;  }  /**   * Sets the format of the input instances.   *   * @param instanceInfo an Instances object containing the input    * instance structure (any instances contained in the object are    * ignored - only the structure is required).   * @return true if the outputFormat may be collected immediately   * @throws Exception if the input format can't be set    * successfully   */  public boolean setInputFormat(Instances instanceInfo)        throws Exception {    super.setInputFormat(instanceInfo);    // set input format    //m_InputFormat = new Instances(instanceInfo, 0);    m_AttIndex.setUpper(getInputFormat().numAttributes() - 1);    // set index of attribute to be changed    // test if nominal     if (!getInputFormat().attribute(m_AttIndex.getIndex()).isNominal()) {      throw new Exception("Adding noise is not possible:"                          + "Chosen attribute is numeric.");      }    // test if two values are given    if ((getInputFormat().attribute(m_AttIndex.getIndex()).numValues() < 2)        && (!m_UseMissing)) {      throw new Exception("Adding noise is not possible:"                          + "Chosen attribute has less than two values.");    }     setOutputFormat(getInputFormat());    m_NewBatch = true;     return false;  }  /**   * Input an instance for filtering.    *   * @param instance the input instance   * @return true if the filtered instance may now be   * collected with output().   * @throws Exception if the input format was not set   */  public boolean input(Instance instance) throws Exception {    // check if input format is defined    if (getInputFormat() == null) {      throw new Exception("No input instance format defined");    }        if (m_NewBatch) {      resetQueue();      m_NewBatch = false;    }    if (isFirstBatchDone()) {      push(instance);      return true;    } else {      bufferInput(instance);      return false;    }  }  /**   * Signify that this batch of input to the filter is finished.    * If the filter requires all instances prior to filtering,   * output() may now be called to retrieve the filtered instances.   *   * @return true if there are instances pending output   * @throws Exception if no input structure has been defined   */  public boolean batchFinished() throws Exception {    if (getInputFormat() == null) {      throw new Exception("No input instance format defined");    }    // Do the subsample, and clear the input instances.    addNoise (getInputFormat(), m_RandomSeed, m_Percent, m_AttIndex.getIndex(),               m_UseMissing);    for(int i=0; i<getInputFormat().numInstances(); i++) {      push ((Instance)getInputFormat().instance(i).copy());    }    flushInput();    m_NewBatch = true;    m_FirstBatchDone = true;    return (numPendingOutput() != 0);  }  /**   * add noise to the dataset   *    * a given percentage of the instances are changed in the  way, that    * a set of instances are randomly selected using seed. The attribute    * given by its index is changed from its current value to one of the   * other possibly ones, also randomly. This is done with leaving the   * apportion the same.     * if m_UseMissing is true, missing value is  used as a value of its own   * @param instances is the dataset   * @param seed used for random function   * @param percent percentage of instances that are changed   * @param attIndex index of the attribute changed   * @param useMissing if true missing values are treated as extra value   */  public void addNoise (Instances instances,                          int seed,                          int percent,                         int attIndex,                         boolean useMissing) {    int indexList [];    int partition_count [];    int partition_max [];    double splitPercent = (double) percent; // percentage used for splits    // fill array with the indexes    indexList = new int [instances.numInstances()];    for (int i=0; i<instances.numInstances(); i++) {      indexList[i] = i;      }    // randomize list of indexes    Random random = new Random(seed);    for (int i=instances.numInstances()-1; i>=0; i--) {      int hValue = indexList[i];      int hIndex = (int)(random.nextDouble()*(double) i);      indexList[i] = indexList[hIndex];      indexList[hIndex] = hValue;      }     // initialize arrays that are used to count instances    // of each value and to keep the amount of instances of that value     // that has to be changed    // this is done for the missing values in the two variables    // missing_count and missing_max    int numValues = instances.attribute(attIndex).numValues();    partition_count = new int[numValues];    partition_max = new int[numValues];    int missing_count = 0;;    int missing_max = 0;;    for (int i = 0; i < numValues; i++) {      partition_count[i] = 0;      partition_max[i] = 0;      }    // go through the dataset and count all occurrences of values     // and all missing values using temporarily .._max arrays and    // variable missing_max    for (Enumeration e = instances.enumerateInstances();         e.hasMoreElements();) {      Instance instance = (Instance) e.nextElement();       if (instance.isMissing(attIndex)) {        missing_max++;      }      else {        int j = (int) instance.value(attIndex);        partition_max[(int) instance.value(attIndex)]++;       }    }          // use given percentage to calculate     // how many have to be changed per split and    // how many of the missing values    if (!useMissing) {      missing_max = missing_count;    } else {      missing_max = (int) (((double)missing_max/100) * splitPercent + 0.5);    }    int sum_max = missing_max;    for (int i=0; i<numValues; i++) {      partition_max[i]=(int) (((double)partition_max[i]/100) * splitPercent                               + 0.5);      sum_max = sum_max + partition_max[i];      }    // initialize sum_count to zero, use this variable to see if     // everything is done already    int sum_count = 0;      // add noise    // using the randomized index-array    //     Random randomValue = new Random (seed);    int numOfValues = instances.attribute(attIndex).numValues();    for(int i=0; i<instances.numInstances(); i++) {       if (sum_count >= sum_max) { break; } // finished       Instance currInstance = instances.instance(indexList[i]);       // if value is missing then...       if (currInstance.isMissing(attIndex)) {         if (missing_count < missing_max) {           changeValueRandomly (randomValue,                                 numOfValues,                                attIndex,                                 currInstance,                                useMissing);            missing_count++;           sum_count++;         }                } else {         int vIndex = (int) currInstance.value(attIndex);         if (partition_count[vIndex] < partition_max[vIndex]) {           changeValueRandomly (randomValue,                                numOfValues,                                attIndex,                                     currInstance,                                 useMissing);                      partition_count[vIndex]++;           sum_count++;         }       }    }  }  /**   * method to set a new value   *   * @param r random function   * @param numOfValues    * @param instance   * @param useMissing   */  private void changeValueRandomly(Random r, int numOfValues,                                   int indexOfAtt,                                    Instance instance,                                    boolean useMissing) {    int currValue;    // get current value     // if value is missing set current value to number of values    // whiche is the highest possible value plus one     if (instance.isMissing(indexOfAtt)) {      currValue = numOfValues;    } else {      currValue = (int) instance.value(indexOfAtt);    }    // with only two possible values it is easier    if ((numOfValues == 2) && (!instance.isMissing(indexOfAtt))) {	instance.setValue(indexOfAtt, (double) ((currValue+1)% 2));    } else {      // get randomly a new value not equal to the current value      // if missing values are used as values they must be treated      // in a special way      while (true) {	  int newValue;        if (useMissing) {          newValue = (int) (r.nextDouble() * (double) (numOfValues + 1));        } else {          newValue = (int) (r.nextDouble() * (double) numOfValues);        }        // have we found a new value?        if (newValue != currValue) {           // the value 1 above the highest possible value (=numOfValues)          // is used as missing value          if (newValue == numOfValues) { instance.setMissing(indexOfAtt); }          else { instance.setValue(indexOfAtt, (double) newValue); }          break;        }      }    }  }  /**   * Main method for testing this class.   *   * @param argv should contain arguments to the filter:    * use -h for help   */  public static void main(String [] argv) {    runFilter(new AddNoise(), argv);  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -