📄 kstarnominalattribute.java
字号:
case M_NORMAL: m_MissingProb = 1.0; break; case M_MAXDIFF: m_MissingProb = m_SmallestProb; break; case M_AVERAGE: m_MissingProb = m_AverageProb; break; } if ( Math.abs(bestpsum - (double)m_TotalCount) < EPSILON) { // No difference in the values stopProb = 1.0; } else { stopProb = bestpstop; } return stopProb; } /** * Calculates the entropy of the actual class prediction * and the entropy for random class prediction. It also * calculates the smallest and average transformation probabilities. * * @param stop the stop parameter * @param params the object wrapper for the parameters: * actual entropy, random entropy, average probability and smallest * probability. * @return the values are returned in the object "params". * */ private void calculateEntropy( double stop, KStarWrapper params) { String debug = "(KStarNominalAttribute.calculateEntropy)"; int i,j,k; Instance train; double actent = 0.0, randent=0.0; double pstar, tprob, psum=0.0, minprob=1.0; double actClassProb, randClassProb; double [][] pseudoClassProb = new double[NUM_RAND_COLS+1][m_NumClasses]; // init ... for(j = 0; j <= NUM_RAND_COLS; j++) { for(i = 0; i < m_NumClasses; i++) { pseudoClassProb[j][i] = 0.0; } } for (i=0; i < m_NumInstances; i++) { train = m_TrainSet.instance(i); if (!train.isMissing(m_AttrIndex)) { pstar = PStar(m_Test, train, m_AttrIndex, stop); tprob = pstar / m_TotalCount; if (pstar < minprob) { minprob = pstar; } psum += tprob; // filter instances with same class value for (k=0 ; k <= NUM_RAND_COLS ; k++) { // instance i is assigned a random class value in colomn k; // colomn k = NUM_RAND_COLS contains the original mapping: // instance -> class vlaue pseudoClassProb[k][ m_RandClassCols[k][i] ] += tprob; } } } // compute the actual entropy using the class probs // with the original class value mapping (colomn NUM_RAND_COLS) for (j=m_NumClasses-1; j>=0; j--) { actClassProb = pseudoClassProb[NUM_RAND_COLS][j] / psum; if (actClassProb > 0) { actent -= actClassProb * Math.log(actClassProb) / LOG2; } } // compute a random entropy using the pseudo class probs // excluding the colomn NUM_RAND_COLS for (k=0; k < NUM_RAND_COLS;k++) { for (i = m_NumClasses-1; i >= 0; i--) { randClassProb = pseudoClassProb[k][i] / psum; if (randClassProb > 0) { randent -= randClassProb * Math.log(randClassProb) / LOG2; } } } randent /= NUM_RAND_COLS; // return the results ... Yuk !!! params.actEntropy = actent; params.randEntropy = randent; params.avgProb = psum; params.minProb = minprob; } /** * Calculates the "stop parameter" for this attribute using * the blend method: the value is computed using a root finder * algorithm. The method takes advantage of this calculation to * compute the smallest and average transformation probabilities * once the stop factor is obtained. It also sets the transformation * probability to an attribute with a missing value. * * @return the value of the stop parameter. * */ private double stopProbUsingBlend() { String debug = "(KStarNominalAttribute.stopProbUsingBlend) "; int itcount = 0; double stopProb, aimfor; double lower, upper, tstop; KStarWrapper botvals = new KStarWrapper(); KStarWrapper upvals = new KStarWrapper(); KStarWrapper vals = new KStarWrapper(); int testvalue = (int)m_Test.value(m_AttrIndex); aimfor = (m_TotalCount - m_Distribution[testvalue]) * (double)m_BlendFactor / 100.0 + m_Distribution[testvalue]; // Initial values for root finder tstop = 1.0 - (double)m_BlendFactor / 100.0; lower = 0.0 + ROOT_FINDER_ACCURACY/2.0; upper = 1.0 - ROOT_FINDER_ACCURACY/2.0; // Find out function border values calculateSphereSize(testvalue, lower, botvals); botvals.sphere -= aimfor; calculateSphereSize(testvalue, upper, upvals); upvals.sphere -= aimfor; if (upvals.avgProb == 0) { // When there are no training instances with the test value: // doesn't matter what exact value we use for tstop, just acts as // a constant scale factor in this case. calculateSphereSize(testvalue, tstop, vals); } else if (upvals.sphere > 0) { // Can't include aimfor instances, going for min possible tstop = upper; vals.avgProb = upvals.avgProb; } else { // Enter the root finder for (;;) { itcount++; calculateSphereSize(testvalue, tstop, vals); vals.sphere -= aimfor; if ( Math.abs(vals.sphere) <= ROOT_FINDER_ACCURACY || itcount >= ROOT_FINDER_MAX_ITER ) { break; } if (vals.sphere > 0.0) { lower = tstop; tstop = (upper + lower) / 2.0; } else { upper = tstop; tstop = (upper + lower) / 2.0; } } } m_SmallestProb = vals.minProb; m_AverageProb = vals.avgProb; // Set the probability of transforming to a missing value switch ( m_MissingMode ) { case M_DELETE: m_MissingProb = 0.0; break; case M_NORMAL: m_MissingProb = 1.0; break; case M_MAXDIFF: m_MissingProb = m_SmallestProb; break; case M_AVERAGE: m_MissingProb = m_AverageProb; break; } if ( Math.abs(vals.avgProb - m_TotalCount) < EPSILON) { // No difference in the values stopProb = 1.0; } else { stopProb = tstop; } return stopProb; } /** * Calculates the size of the "sphere of influence" defined as: * sphere = sum(P^2)/sum(P)^2 * P(i|j) = (1-tstop)*P(i) + ((i==j)?tstop:0). * This method takes advantage of the calculation to compute the values of * the "smallest" and "average" transformation probabilities when using * the specified stop parameter. * * @param testValue the value of the test instance * @param stop the stop parameter * @param params a wrapper of the parameters to be computed: * "sphere" the sphere size * "avgprob" the average transformation probability * "minProb" the smallest transformation probability * @return the values are returned in "params" object. * */ private void calculateSphereSize(int testvalue, double stop, KStarWrapper params) { String debug = "(KStarNominalAttribute.calculateSphereSize) "; int i, thiscount; double tprob, tval = 0.0, t1 = 0.0; double sphere, minprob = 1.0, transprob = 0.0; for(i = 0; i < m_Distribution.length; i++) { thiscount = m_Distribution[i]; if ( thiscount != 0 ) { if ( testvalue == i ) { tprob = (stop + (1 - stop) / m_Distribution.length) / m_TotalCount; tval += tprob * thiscount; t1 += tprob * tprob * thiscount; } else { tprob = ((1 - stop) / m_Distribution.length) / m_TotalCount; tval += tprob * thiscount; t1 += tprob * tprob * thiscount; } if ( minprob > tprob * m_TotalCount ) { minprob = tprob * m_TotalCount; } } } transprob = tval; sphere = (t1 == 0) ? 0 : ((tval * tval) / t1); // return values ... Yck!!! params.sphere = sphere; params.avgProb = transprob; params.minProb = minprob; } /** * Calculates the nominal probability function defined as: * P(i|j) = (1-stop) * P(i) + ((i==j) ? stop : 0) * In this case, it calculates the transformation probability of the * indexed test attribute to the indexed train attribute. * * @param test the test instance * @param train the train instance * @param col the attribute index * @return the value of the tranformation probability. * */ private double PStar(Instance test, Instance train, int col, double stop) { String debug = "(KStarNominalAttribute.PStar) "; double pstar; int numvalues = 0; try { numvalues = test.attribute(col).numValues(); } catch (Exception ex) { ex.printStackTrace(); } if ( (int)test.value(col) == (int)train.value(col) ) { pstar = stop + (1 - stop) / numvalues; } else { pstar = (1 - stop) / numvalues; } return pstar; } /** * Calculates the distribution, in the dataset, of the indexed nominal * attribute values. It also counts the actual number of training instances * that contributed (those with non-missing values) to calculate the * distribution. */ private void generateAttrDistribution() { String debug = "(KStarNominalAttribute.generateAttrDistribution)"; m_Distribution = new int[ m_TrainSet.attribute(m_AttrIndex).numValues() ]; int i; Instance train; for (i=0; i < m_NumInstances; i++) { train = m_TrainSet.instance(i); if ( !train.isMissing(m_AttrIndex) ) { m_TotalCount++; m_Distribution[(int)train.value(m_AttrIndex)]++; } } } /** * Sets the options. * */ public void setOptions(int missingmode, int blendmethod, int blendfactor) { m_MissingMode = missingmode; m_BlendMethod = blendmethod; m_BlendFactor = blendfactor; }} // class
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -