📄 kstarnumericattribute.java

📁 weka 源代码很好的对于学习数据挖掘算法很有帮助
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
   * sphere = sum(P)^2/sum(P^2) where   * P(i) = root*exp(-2*i*root).   * Since there are n different training instances we multiply P(i) by 1/n.   */  private void calculateSphereSize(double scale, KStarWrapper params) {    String debug = "(KStarNumericAttribute.calculateSphereSize)";    int i;    double sphereSize, minprob = 1.0;    double pstar;                // P*(b|a)    double pstarSum = 0.0;       // sum(P*)    double pstarSquareSum = 0.0; // sum(P*^2)    double inc;    for (i = 0; i < m_NumInstances; i++) {      if (m_Distances[i] < 0) {	// instance with missing value	continue;      }      else {	pstar = PStar( m_Distances[i], scale );	if (minprob > pstar) {	  minprob = pstar;	}	inc = pstar / m_ActualCount;	pstarSum += inc;	pstarSquareSum += inc * inc;      }    }    sphereSize = (pstarSquareSum == 0 ? 0 		  : pstarSum * pstarSum / pstarSquareSum);    // return the values    params.sphere = sphereSize;    params.avgProb = pstarSum;    params.minProb = minprob;  }    /**   * Calculates the scale factor using entropy.   *   * @return the scale factor value   */  private double scaleFactorUsingEntropy() {    String debug = "(KStarNumericAttribute.scaleFactorUsingEntropy)";    if ( m_ClassType != Attribute.NOMINAL ) {      System.err.println("Error: "+debug+" attribute class must be nominal!");      System.exit(1);    }    int i,j, lowestcount = 0, count, itcount;    double lowest = -1.0, nextlowest = -1.0;    double root, up, bot, stepsize, delta;    double actentropy = 0.0, randentropy = 0.0, actscale, randscale;    double minrand = 0.0, minact = 0.0, maxrand = 0.0, maxact = 0.0;    double bestdiff, bestroot, currentdiff, lastdiff;    double bestpsum, bestminprob, scale = 1.0;    KStarWrapper botvals = new KStarWrapper();    KStarWrapper upvals = new KStarWrapper();    KStarWrapper vals = new KStarWrapper();    m_Distances = new double [m_NumInstances];    for (j=0; j<m_NumInstances; j++) {      if ( m_TrainSet.instance(j).isMissing(m_AttrIndex) ) {	// mark the train instance with a missing value by setting 	// the distance to -1.0	m_Distances[j] = -1.0;      }      else {	m_Distances[j] = Math.abs(m_TrainSet.instance(j).value(m_AttrIndex) - 				  m_Test.value(m_AttrIndex));		if ( (m_Distances[j]+1e-5) < nextlowest || nextlowest == -1.0 ) {	  if ( (m_Distances[j]+1e-5) < lowest || lowest == -1.0 ) {	    nextlowest = lowest;	    lowest = m_Distances[j];	    lowestcount = 1;	  }	  else if ( Math.abs(m_Distances[j]-lowest) < 1e-5 ) {	    // record the number training instances (number n0) at	    // the smallest distance from test instance	    lowestcount++;	  }	  else {	    nextlowest = m_Distances[j];	  }	}	// records the actual number of instances with no missing value	m_ActualCount++;      }    } // for        if (nextlowest == -1 || lowest == -1) { // Data values are all the same      scale = 1.0;      m_SmallestProb = m_AverageProb = 1.0;      return scale;    }    else {      // starting point for root      root = 1.0 / (nextlowest - lowest);      // root is bracketed in interval [bot,up]      bot = 0.0 + ROOT_FINDER_ACCURACY / 2;        up = root * 8; // This is bodgy      // Find (approx) entropy ranges      calculateEntropy(up, upvals);      calculateEntropy(bot, botvals);      actscale = botvals.actEntropy - upvals.actEntropy;      randscale = botvals.randEntropy - upvals.randEntropy;      // Optimise the scale factor      bestroot = root = bot;      bestdiff = currentdiff = FLOOR1;      bestpsum = botvals.avgProb;      bestminprob = botvals.minProb;      stepsize = (up - bot) / 20.0;      itcount = 0;      // Root finding algorithm starts here!      while (true)	{	  itcount++;	  lastdiff = currentdiff;	  root += Math.log(root + 1.0) * stepsize;	  if (root <= bot) {	    root = bot;	    currentdiff = 0.0;	    delta = -1.0;	  }	  else if (root >= up) {	    root = up;	    currentdiff = 0.0;	    delta = -1.0;	  }	  else {	    calculateEntropy(root, vals);	    // Normalise entropies	    vals.randEntropy = (vals.randEntropy - upvals.randEntropy) / 	      randscale;	    vals.actEntropy = (vals.actEntropy - upvals.actEntropy) / 	      randscale;	    currentdiff = vals.randEntropy - vals.actEntropy;	    if (currentdiff < FLOOR1) {	      currentdiff = FLOOR1;	      if (stepsize < 0) { 		// If we've hit the end and turned around we can't 		// have found any peaks		bestdiff = currentdiff;		bestroot = bot;		bestpsum = botvals.avgProb;		bestminprob = botvals.minProb;		break;	      }	    }	    delta = currentdiff - lastdiff;	  }	  if (currentdiff > bestdiff) {	    bestdiff = currentdiff;	    bestroot = root;	    bestminprob = vals.minProb;	    bestpsum = vals.avgProb;	  }	  if (delta < 0) {	    if (Math.abs(stepsize) < ROOT_FINDER_ACCURACY) {	      break;	    }	    else {	      stepsize /= -4.0;	    }	  }	  if (itcount > ROOT_FINDER_MAX_ITER) {	    //  System.err.println("Warning: "+debug+" ROOT_FINDER_MAX_ITER 	    // exceeded");	    break;	  }	} // while      m_SmallestProb = bestminprob;      m_AverageProb = bestpsum;      // Set the probability of transforming to a missing value      switch ( m_MissingMode )	{	case M_DELETE:	  m_MissingProb = 0.0;	  break;	case M_NORMAL:	  m_MissingProb = 1.0;	  break;	case M_MAXDIFF:	  m_MissingProb = m_SmallestProb;	  break;	case M_AVERAGE:	  m_MissingProb = m_AverageProb;	  break;	}      // set scale factor      scale = bestroot;    } // else    return scale;  }  /**   * Calculates several parameters aside from the entropy: for a specified   * scale factor, calculates the actual entropy, a random entropy using a   * randomized set of class value colomns, and records the average and   * smallest probabilities (for use in missing value case).   */  private void calculateEntropy(double scale, KStarWrapper params) {        String debug = "(KStarNumericAttribute.calculateEntropy)";    int i,j,k;    double actent = 0.0, randent = 0.0;    double pstar, tprob, avgprob = 0.0, minprob = 1.0;    double actClassProb, randClassProb;    double [][] pseudoClassProbs = new double[NUM_RAND_COLS+1][m_NumClasses];    // init    for(j = 0; j <= NUM_RAND_COLS; j++) {      for(i = 0; i < m_NumClasses; i++) {	pseudoClassProbs[j][i] = 0.0;      }    }    for (i=0; i < m_NumInstances; i++) {      if (m_Distances[i] < 0) {	// train instance has mising value	continue;      }      else {	pstar = PStar(m_Distances[i], scale);	tprob = pstar / m_ActualCount;	avgprob += tprob;	if (pstar < minprob) {	  minprob = pstar;	}	// filter instances with same class value	for (k=0; k <= NUM_RAND_COLS; k++) {	  // instance i is assigned a random class value in colomn k;	  // colomn k = NUM_RAND_COLS contains the original mapping: 	  // instance -> class vlaue	  pseudoClassProbs[k][ m_RandClassCols[k][i] ] += tprob;	}      }    }    // compute the actual entropy using the class probabilities    // with the original class value mapping (colomn NUM_RAND_COLS)    for (j = m_NumClasses-1; j >= 0; j--) {      actClassProb = pseudoClassProbs[NUM_RAND_COLS][j] / avgprob;      if (actClassProb > 0) {    	actent -= actClassProb * Math.log(actClassProb) / LOG2;      }    }    // compute a random entropy using the pseudo class probs    // excluding the colomn NUM_RAND_COLS    for (k=0; k < NUM_RAND_COLS; k++) {      for (i = m_NumClasses-1; i >= 0; i--) {  	randClassProb = pseudoClassProbs[k][i] / avgprob;  	if (randClassProb > 0) {  	  randent -= randClassProb * Math.log(randClassProb) / LOG2;	}      }    }    randent /= NUM_RAND_COLS;    // return the values    params.actEntropy = actent;    params.randEntropy = randent;    params.avgProb = avgprob;    params.minProb = minprob;  }  /**   * Calculates the value of P for a given value x using the expression:   * P(x) = scale * exp( -2.0 * x * scale )   *   * @param x input value   * @param scale the scale factor   * @return output of the function P(x)   */            private double PStar(double x, double scale) {    return scale * Math.exp( -2.0 * x * scale );  }  /**   * Set options.   * @param missingmode the missing value treatment to use   * @param blendmethod the blending method to use   * @param blendfactor the level of blending to use   */  public void setOptions(int missingmode, int blendmethod, int blendfactor) {    m_MissingMode = missingmode;    m_BlendMethod = blendmethod;    m_BlendFactor = blendfactor;  }  /**   * Set the missing value mode.   * @param mode the type of missing value treatment to use   */  public void setMissingMode(int mode) {    m_MissingMode = mode;  }  /**   * Set the blending method   * @param method the blending method to use   */  public void setBlendMethod(int method) {    m_BlendMethod = method;  }  /**   * Set the blending factor   * @param factor the level of blending to use   */  public void setBlendFactor(int factor) {    m_BlendFactor = factor;  }} // class
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -