⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 addnoise.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
  /**
   * Sets index of the attribute used.
   *
   * @param index the index of the attribute
   */
  public void setAttributeIndex(String attIndex) {
    
    m_AttIndex.setSingleIndex(attIndex);
  }

 /**
   * Sets the format of the input instances.
   *
   * @param instanceInfo an Instances object containing the input 
   * instance structure (any instances contained in the object are 
   * ignored - only the structure is required).
   * @return true if the outputFormat may be collected immediately
   * @exception Exception if the input format can't be set 
   * successfully
   */
  public boolean setInputFormat(Instances instanceInfo) 
       throws Exception {

    super.setInputFormat(instanceInfo);
    // set input format
    //m_InputFormat = new Instances(instanceInfo, 0);
    m_AttIndex.setUpper(getInputFormat().numAttributes() - 1);
    // set index of attribute to be changed

    // test if nominal 
    if (!getInputFormat().attribute(m_AttIndex.getIndex()).isNominal()) {
      throw new Exception("Adding noise is not possible:"
                          + "Chosen attribute is numeric.");
      }

    // test if two values are given
    if ((getInputFormat().attribute(m_AttIndex.getIndex()).numValues() < 2)
        && (!m_UseMissing)) {
      throw new Exception("Adding noise is not possible:"
                          + "Chosen attribute has less than two values.");
    }
 
    setOutputFormat(getInputFormat());
    m_NewBatch = true; 
    return false;
  }

  /**
   * Input an instance for filtering. 
   *
   * @param instance the input instance
   * @return true if the filtered instance may now be
   * collected with output().
   * @exception Exception if the input format was not set
   */
  public boolean input(Instance instance) throws Exception {

    // check if input format is defined
    if (getInputFormat() == null) {
      throw new Exception("No input instance format defined");
    }
    
    if (m_NewBatch) {
      resetQueue();
      m_NewBatch = false;
    }

    // make new instance
    Instance newInstance = (Instance)instance.copy();
    getInputFormat().add(instance);
    return false;
  }

  /**
   * Signify that this batch of input to the filter is finished. 
   * If the filter requires all instances prior to filtering,
   * output() may now be called to retrieve the filtered instances.
   *
   * @return true if there are instances pending output
   * @exception Exception if no input structure has been defined
   */
  public boolean batchFinished() throws Exception {

    Instance current;

    if (getInputFormat() == null) {
      throw new Exception("No input instance format defined");
    }

    // Do the subsample, and clear the input instances.
    addNoise (getInputFormat(), m_RandomSeed, m_Percent, m_AttIndex.getIndex(), 
              m_UseMissing);

    for(int i=0; i<getInputFormat().numInstances(); i++) {
      push (new Instance(getInputFormat().instance(i)));
    }

    m_NewBatch = true;
    return (numPendingOutput() != 0);
  }

  /**
   * add noise to the dataset
   * 
   * a given percentage of the instances are changed in the  way, that 
   * a set of instances are randomly selected using seed. The attribute 
   * given by its index is changed from its current value to one of the
   * other possibly ones, also randomly. This is done with leaving the
   * apportion the same.  
   * if m_UseMissing is true, missing value is  used as a value of its own
   * @param instances is the dataset
   * @param seed used for random function
   * @param percent percentage of instances that are changed
   * @param attIndex index of the attribute changed
   * @param useMissingValue if true missing values are treated as extra value
   */
  public void addNoise (Instances instances, 
                         int seed, 
                         int percent,
                         int attIndex,
                         boolean useMissing) {
    int indexList [];
    int partition_count [];
    int partition_max [];
    double splitPercent = (double) percent; // percentage used for splits

    // fill array with the indexes
    indexList = new int [instances.numInstances()];
    for (int i=0; i<instances.numInstances(); i++) {
      indexList[i] = i;
      }

    // randomize list of indexes
    Random random = new Random(seed);
    for (int i=instances.numInstances()-1; i>=0; i--) {
      int hValue = indexList[i];
      int hIndex = (int)(random.nextDouble()*(double) i);
      indexList[i] = indexList[hIndex];
      indexList[hIndex] = hValue;
      }
 
    // initialize arrays that are used to count instances
    // of each value and to keep the amount of instances of that value 
    // that has to be changed
    // this is done for the missing values in the two variables
    // missing_count and missing_max
    int numValues = instances.attribute(attIndex).numValues();

    partition_count = new int[numValues];
    partition_max = new int[numValues];
    int missing_count = 0;;
    int missing_max = 0;;

    for (int i = 0; i < numValues; i++) {
      partition_count[i] = 0;
      partition_max[i] = 0;
      }

    // go through the dataset and count all occurrences of values 
    // and all missing values using temporarily .._max arrays and
    // variable missing_max
    for (Enumeration e = instances.emerateInstances();
         e.hasMoreElements();) {
      Instance instance = (Instance) e.nextElement(); 
      if (instance.isMissing(attIndex)) {
        missing_max++;
      }
      else {
        int j = (int) instance.value(attIndex);
        partition_max[(int) instance.value(attIndex)]++; 
      }
    }
      
    // use given percentage to calculate 
    // how many have to be changed per split and
    // how many of the missing values
    if (!useMissing) {
      missing_max = missing_count;
    } else {
      missing_max = (int) (((double)missing_max/100) * splitPercent + 0.5);
    }
    int sum_max = missing_max;
    for (int i=0; i<numValues; i++) {
      partition_max[i]=(int) (((double)partition_max[i]/100) * splitPercent 
                              + 0.5);
      sum_max = sum_max + partition_max[i];
      }

    // initialize sum_count to zero, use this variable to see if 
    // everything is done already
    int sum_count = 0;
  
    // add noise
    // using the randomized index-array
    // 
    Random randomValue = new Random (seed);
    int numOfValues = instances.attribute(attIndex).numValues();
    for(int i=0; i<instances.numInstances(); i++) {
       if (sum_count >= sum_max) { break; } // finished
       Instance currInstance = instances.instance(indexList[i]);
       // if value is missing then...
       if (currInstance.isMissing(attIndex)) {
         if (missing_count < missing_max) {
           changeValueRandomly (randomValue, 
                                numOfValues,
                                attIndex, 
                                currInstance,
                                useMissing); 
           missing_count++;
           sum_count++;
         }
         
       } else {
         int vIndex = (int) currInstance.value(attIndex);
         if (partition_count[vIndex] < partition_max[vIndex]) {
           changeValueRandomly (randomValue,
                                numOfValues,
                                attIndex,     
                                currInstance, 
                                useMissing);           
           partition_count[vIndex]++;
           sum_count++;
         }
       }
    }

  }

  /**
   * method to set a new value
   *
   * @param r random function
   * @param numOfValues 
   * @param instance
   * @param useMissing
   */
  private void changeValueRandomly(Random r, int numOfValues,
                                   int indexOfAtt, 
                                   Instance instance, 
                                   boolean useMissing) {
    int currValue;

    // get current value 
    // if value is missing set current value to number of values
    // whiche is the highest possible value plus one 
    if (instance.isMissing(indexOfAtt)) {
      currValue = numOfValues;
    } else {
      currValue = (int) instance.value(indexOfAtt);
    }

    // with only two possible values it is easier
    if ((numOfValues == 2) && (!instance.isMissing(indexOfAtt))) {
	instance.setValue(indexOfAtt, (double) ((currValue+1)% 2));
    } else {
      // get randomly a new value not equal to the current value
      // if missing values are used as values they must be treated
      // in a special way
      while (true) {
	  int newValue;
        if (useMissing) {
          newValue = (int) (r.nextDouble() * (double) (numOfValues + 1));
        } else {
          newValue = (int) (r.nextDouble() * (double) numOfValues);
        }
        // have we found a new value?
        if (newValue != currValue) { 
          // the value 1 above the highest possible value (=numOfValues)
          // is used as missing value
          if (newValue == numOfValues) { instance.setMissing(indexOfAtt); }
          else { instance.setValue(indexOfAtt, (double) newValue); }
          break;
        }
      }
    }
  }

  /**
   * Main method for testing this class.
   *
   * @param argv should contain arguments to the filter: 
   * use -h for help
   */
  public static void main(String [] argv) {

    try {
      if (Utils.getFlag('b', argv)) {
 	Filter.batchFilterFile(new AddNoise(), argv);
      } else {
	Filter.filterFile(new AddNoise(), argv);
      }
    } catch (Exception ex) {
      System.out.println(ex.getMessage());
    }
  }
}


⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -