📄 kstarnumericattribute.java

📁 为了下东西随便发了个 datamining 的源代码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
   * sphere = sum(P)^2/sum(P^2) where
   * P(i) = root*exp(-2*i*root).
   * Since there are n different training instances we multiply P(i) by 1/n.
   */
  private void calculateSphereSize(double scale, KStarWrapper params) {
    String debug = "(KStarNumericAttribute.calculateSphereSize)";
    int i;
    double sphereSize, minprob = 1.0;
    double pstar;                // P*(b|a)
    double pstarSum = 0.0;       // sum(P*)
    double pstarSquareSum = 0.0; // sum(P*^2)
    double inc;
    for (i = 0; i < m_NumInstances; i++) {
      if (m_Distances[i] < 0) {
	// instance with missing value
	continue;
      }
      else {
	pstar = PStar( m_Distances[i], scale );
	if (minprob > pstar) {
	  minprob = pstar;
	}
	inc = pstar / m_ActualCount;
	pstarSum += inc;
	pstarSquareSum += inc * inc;
      }
    }
    sphereSize = (pstarSquareSum == 0 ? 0 
		  : pstarSum * pstarSum / pstarSquareSum);
    // return the values
    params.sphere = sphereSize;
    params.avgProb = pstarSum;
    params.minProb = minprob;
  }
  
  /**
   * Calculates the scale factor using entropy.
   *
   * @return the scale factor value
   */
  private double scaleFactorUsingEntropy() {
    String debug = "(KStarNumericAttribute.scaleFactorUsingEntropy)";
    if ( m_ClassType != Attribute.NOMINAL ) {
      System.err.println("Error: "+debug+" attribute class must be nominal!");
      System.exit(1);
    }
    int i,j, lowestcount = 0, count, itcount;
    double lowest = -1.0, nextlowest = -1.0;
    double root, up, bot, stepsize, delta;
    double actentropy = 0.0, randentropy = 0.0, actscale, randscale;
    double minrand = 0.0, minact = 0.0, maxrand = 0.0, maxact = 0.0;
    double bestdiff, bestroot, currentdiff, lastdiff;
    double bestpsum, bestminprob, scale = 1.0;

    KStarWrapper botvals = new KStarWrapper();
    KStarWrapper upvals = new KStarWrapper();
    KStarWrapper vals = new KStarWrapper();

    m_Distances = new double [m_NumInstances];

    for (j=0; j<m_NumInstances; j++) {
      if ( m_TrainSet.instance(j).isMissing(m_AttrIndex) ) {
	// mark the train instance with a missing value by setting 
	// the distance to -1.0
	m_Distances[j] = -1.0;
      }
      else {
	m_Distances[j] = Math.abs(m_TrainSet.instance(j).value(m_AttrIndex) - 
				  m_Test.value(m_AttrIndex));
	
	if ( (m_Distances[j]+1e-5) < nextlowest || nextlowest == -1.0 ) {
	  if ( (m_Distances[j]+1e-5) < lowest || lowest == -1.0 ) {
	    nextlowest = lowest;
	    lowest = m_Distances[j];
	    lowestcount = 1;
	  }
	  else if ( Math.abs(m_Distances[j]-lowest) < 1e-5 ) {
	    // record the number training instances (number n0) at
	    // the smallest distance from test instance
	    lowestcount++;
	  }
	  else {
	    nextlowest = m_Distances[j];
	  }
	}
	// records the actual number of instances with no missing value
	m_ActualCount++;
      }
    } // for
    
    if (nextlowest == -1 || lowest == -1) { // Data values are all the same
      scale = 1.0;
      m_SmallestProb = m_AverageProb = 1.0;
      return scale;
    }
    else {
      // starting point for root
      root = 1.0 / (nextlowest - lowest);
      // root is bracketed in interval [bot,up]
      bot = 0.0 + ROOT_FINDER_ACCURACY / 2;  
      up = root * 8; // This is bodgy
      // Find (approx) entropy ranges
      calculateEntropy(up, upvals);
      calculateEntropy(bot, botvals);
      actscale = botvals.actEntropy - upvals.actEntropy;
      randscale = botvals.randEntropy - upvals.randEntropy;
      // Optimise the scale factor
      bestroot = root = bot;
      bestdiff = currentdiff = FLOOR1;
      bestpsum = botvals.avgProb;
      bestminprob = botvals.minProb;
      stepsize = (up - bot) / 20.0;
      itcount = 0;
      // Root finding algorithm starts here!
      while (true)
	{
	  itcount++;
	  lastdiff = currentdiff;
	  root += Math.log(root + 1.0) * stepsize;
	  if (root <= bot) {
	    root = bot;
	    currentdiff = 0.0;
	    delta = -1.0;
	  }
	  else if (root >= up) {
	    root = up;
	    currentdiff = 0.0;
	    delta = -1.0;
	  }
	  else {
	    calculateEntropy(root, vals);
	    // Normalise entropies
	    vals.randEntropy = (vals.randEntropy - upvals.randEntropy) / 
	      randscale;
	    vals.actEntropy = (vals.actEntropy - upvals.actEntropy) / 
	      randscale;
	    currentdiff = vals.randEntropy - vals.actEntropy;

	    if (currentdiff < FLOOR1) {
	      currentdiff = FLOOR1;
	      if (stepsize < 0) { 
		// If we've hit the end and turned around we can't 
		// have found any peaks
		bestdiff = currentdiff;
		bestroot = bot;
		bestpsum = botvals.avgProb;
		bestminprob = botvals.minProb;
		break;
	      }
	    }
	    delta = currentdiff - lastdiff;
	  }
	  if (currentdiff > bestdiff) {
	    bestdiff = currentdiff;
	    bestroot = root;
	    bestminprob = vals.minProb;
	    bestpsum = vals.avgProb;
	  }
	  if (delta < 0) {
	    if (Math.abs(stepsize) < ROOT_FINDER_ACCURACY) {
	      break;
	    }
	    else {
	      stepsize /= -4.0;
	    }
	  }
	  if (itcount > ROOT_FINDER_MAX_ITER) {
	    //  System.err.println("Warning: "+debug+" ROOT_FINDER_MAX_ITER 
	    // exceeded");
	    break;
	  }
	} // while

      m_SmallestProb = bestminprob;
      m_AverageProb = bestpsum;
      // Set the probability of transforming to a missing value
      switch ( m_MissingMode )
	{
	case M_DELETE:
	  m_MissingProb = 0.0;
	  break;
	case M_NORMAL:
	  m_MissingProb = 1.0;
	  break;
	case M_MAXDIFF:
	  m_MissingProb = m_SmallestProb;
	  break;
	case M_AVERAGE:
	  m_MissingProb = m_AverageProb;
	  break;
	}
      // set scale factor
      scale = bestroot;
    } // else
    return scale;
  }

  /**
   * Calculates several parameters aside from the entropy: for a specified
   * scale factor, calculates the actual entropy, a random entropy using a
   * randomized set of class value colomns, and records the average and
   * smallest probabilities (for use in missing value case).
   */
  private void calculateEntropy(double scale, KStarWrapper params) {    
    String debug = "(KStarNumericAttribute.calculateEntropy)";
    int i,j,k;
    double actent = 0.0, randent = 0.0;
    double pstar, tprob, avgprob = 0.0, minprob = 1.0;
    double actClassProb, randClassProb;
    double [][] pseudoClassProbs = new double[NUM_RAND_COLS+1][m_NumClasses];
    // init
    for(j = 0; j <= NUM_RAND_COLS; j++) {
      for(i = 0; i < m_NumClasses; i++) {
	pseudoClassProbs[j][i] = 0.0;
      }
    }
    for (i=0; i < m_NumInstances; i++) {
      if (m_Distances[i] < 0) {
	// train instance has mising value
	continue;
      }
      else {
	pstar = PStar(m_Distances[i], scale);
	tprob = pstar / m_ActualCount;
	avgprob += tprob;
	if (pstar < minprob) {
	  minprob = pstar;
	}
	// filter instances with same class value
	for (k=0; k <= NUM_RAND_COLS; k++) {
	  // instance i is assigned a random class value in colomn k;
	  // colomn k = NUM_RAND_COLS contains the original mapping: 
	  // instance -> class vlaue
	  pseudoClassProbs[k][ m_RandClassCols[k][i] ] += tprob;
	}
      }
    }
    // compute the actual entropy using the class probabilities
    // with the original class value mapping (colomn NUM_RAND_COLS)
    for (j = m_NumClasses-1; j >= 0; j--) {
      actClassProb = pseudoClassProbs[NUM_RAND_COLS][j] / avgprob;
      if (actClassProb > 0) {
    	actent -= actClassProb * Math.log(actClassProb) / LOG2;
      }
    }
    // compute a random entropy using the pseudo class probs
    // excluding the colomn NUM_RAND_COLS
    for (k=0; k < NUM_RAND_COLS; k++) {
      for (i = m_NumClasses-1; i >= 0; i--) {
  	randClassProb = pseudoClassProbs[k][i] / avgprob;
  	if (randClassProb > 0) {
  	  randent -= randClassProb * Math.log(randClassProb) / LOG2;
	}
      }
    }
    randent /= NUM_RAND_COLS;
    // return the values
    params.actEntropy = actent;
    params.randEntropy = randent;
    params.avgProb = avgprob;
    params.minProb = minprob;
  }

  /**
   * Calculates the value of P for a given value x using the expression:
   * P(x) = scale * exp( -2.0 * x * scale )
   *
   * @param x input value
   * @param scale the scale factor
   * @return output of the function P(x)
   */          
  private double PStar(double x, double scale) {
    return scale * Math.exp( -2.0 * x * scale );
  }

  /**
   * Set options.
   * @param missingmode the missing value treatment to use
   * @param blendmethod the blending method to use
   * @param blendfactor the level of blending to use
   */
  public void setOptions(int missingmode, int blendmethod, int blendfactor) {
    m_MissingMode = missingmode;
    m_BlendMethod = blendmethod;
    m_BlendFactor = blendfactor;
  }

  /**
   * Set the missing value mode.
   * @param mode the type of missing value treatment to use
   */
  public void setMissingMode(int mode) {
    m_MissingMode = mode;
  }

  /**
   * Set the blending method
   * @param method the blending method to use
   */
  public void setBlendMethod(int method) {
    m_BlendMethod = method;
  }

  /**
   * Set the blending factor
   * @param factor the level of blending to use
   */
  public void setBlendFactor(int factor) {
    m_BlendFactor = factor;
  }


} // class
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -