📄 kstarnumericattribute.java
字号:
* sphere = sum(P)^2/sum(P^2) where
* P(i) = root*exp(-2*i*root).
* Since there are n different training instances we multiply P(i) by 1/n.
*/
private void calculateSphereSize(double scale, KStarWrapper params) {
String debug = "(KStarNumericAttribute.calculateSphereSize)";
int i;
double sphereSize, minprob = 1.0;
double pstar; // P*(b|a)
double pstarSum = 0.0; // sum(P*)
double pstarSquareSum = 0.0; // sum(P*^2)
double inc;
for (i = 0; i < m_NumInstances; i++) {
if (m_Distances[i] < 0) {
// instance with missing value
continue;
}
else {
pstar = PStar( m_Distances[i], scale );
if (minprob > pstar) {
minprob = pstar;
}
inc = pstar / m_ActualCount;
pstarSum += inc;
pstarSquareSum += inc * inc;
}
}
sphereSize = (pstarSquareSum == 0 ? 0
: pstarSum * pstarSum / pstarSquareSum);
// return the values
params.sphere = sphereSize;
params.avgProb = pstarSum;
params.minProb = minprob;
}
/**
* Calculates the scale factor using entropy.
*
* @return the scale factor value
*/
private double scaleFactorUsingEntropy() {
String debug = "(KStarNumericAttribute.scaleFactorUsingEntropy)";
if ( m_ClassType != Attribute.NOMINAL ) {
System.err.println("Error: "+debug+" attribute class must be nominal!");
System.exit(1);
}
int i,j, lowestcount = 0, count, itcount;
double lowest = -1.0, nextlowest = -1.0;
double root, up, bot, stepsize, delta;
double actentropy = 0.0, randentropy = 0.0, actscale, randscale;
double minrand = 0.0, minact = 0.0, maxrand = 0.0, maxact = 0.0;
double bestdiff, bestroot, currentdiff, lastdiff;
double bestpsum, bestminprob, scale = 1.0;
KStarWrapper botvals = new KStarWrapper();
KStarWrapper upvals = new KStarWrapper();
KStarWrapper vals = new KStarWrapper();
m_Distances = new double [m_NumInstances];
for (j=0; j<m_NumInstances; j++) {
if ( m_TrainSet.instance(j).isMissing(m_AttrIndex) ) {
// mark the train instance with a missing value by setting
// the distance to -1.0
m_Distances[j] = -1.0;
}
else {
m_Distances[j] = Math.abs(m_TrainSet.instance(j).value(m_AttrIndex) -
m_Test.value(m_AttrIndex));
if ( (m_Distances[j]+1e-5) < nextlowest || nextlowest == -1.0 ) {
if ( (m_Distances[j]+1e-5) < lowest || lowest == -1.0 ) {
nextlowest = lowest;
lowest = m_Distances[j];
lowestcount = 1;
}
else if ( Math.abs(m_Distances[j]-lowest) < 1e-5 ) {
// record the number training instances (number n0) at
// the smallest distance from test instance
lowestcount++;
}
else {
nextlowest = m_Distances[j];
}
}
// records the actual number of instances with no missing value
m_ActualCount++;
}
} // for
if (nextlowest == -1 || lowest == -1) { // Data values are all the same
scale = 1.0;
m_SmallestProb = m_AverageProb = 1.0;
return scale;
}
else {
// starting point for root
root = 1.0 / (nextlowest - lowest);
// root is bracketed in interval [bot,up]
bot = 0.0 + ROOT_FINDER_ACCURACY / 2;
up = root * 8; // This is bodgy
// Find (approx) entropy ranges
calculateEntropy(up, upvals);
calculateEntropy(bot, botvals);
actscale = botvals.actEntropy - upvals.actEntropy;
randscale = botvals.randEntropy - upvals.randEntropy;
// Optimise the scale factor
bestroot = root = bot;
bestdiff = currentdiff = FLOOR1;
bestpsum = botvals.avgProb;
bestminprob = botvals.minProb;
stepsize = (up - bot) / 20.0;
itcount = 0;
// Root finding algorithm starts here!
while (true)
{
itcount++;
lastdiff = currentdiff;
root += Math.log(root + 1.0) * stepsize;
if (root <= bot) {
root = bot;
currentdiff = 0.0;
delta = -1.0;
}
else if (root >= up) {
root = up;
currentdiff = 0.0;
delta = -1.0;
}
else {
calculateEntropy(root, vals);
// Normalise entropies
vals.randEntropy = (vals.randEntropy - upvals.randEntropy) /
randscale;
vals.actEntropy = (vals.actEntropy - upvals.actEntropy) /
randscale;
currentdiff = vals.randEntropy - vals.actEntropy;
if (currentdiff < FLOOR1) {
currentdiff = FLOOR1;
if (stepsize < 0) {
// If we've hit the end and turned around we can't
// have found any peaks
bestdiff = currentdiff;
bestroot = bot;
bestpsum = botvals.avgProb;
bestminprob = botvals.minProb;
break;
}
}
delta = currentdiff - lastdiff;
}
if (currentdiff > bestdiff) {
bestdiff = currentdiff;
bestroot = root;
bestminprob = vals.minProb;
bestpsum = vals.avgProb;
}
if (delta < 0) {
if (Math.abs(stepsize) < ROOT_FINDER_ACCURACY) {
break;
}
else {
stepsize /= -4.0;
}
}
if (itcount > ROOT_FINDER_MAX_ITER) {
// System.err.println("Warning: "+debug+" ROOT_FINDER_MAX_ITER
// exceeded");
break;
}
} // while
m_SmallestProb = bestminprob;
m_AverageProb = bestpsum;
// Set the probability of transforming to a missing value
switch ( m_MissingMode )
{
case M_DELETE:
m_MissingProb = 0.0;
break;
case M_NORMAL:
m_MissingProb = 1.0;
break;
case M_MAXDIFF:
m_MissingProb = m_SmallestProb;
break;
case M_AVERAGE:
m_MissingProb = m_AverageProb;
break;
}
// set scale factor
scale = bestroot;
} // else
return scale;
}
/**
* Calculates several parameters aside from the entropy: for a specified
* scale factor, calculates the actual entropy, a random entropy using a
* randomized set of class value colomns, and records the average and
* smallest probabilities (for use in missing value case).
*/
private void calculateEntropy(double scale, KStarWrapper params) {
String debug = "(KStarNumericAttribute.calculateEntropy)";
int i,j,k;
double actent = 0.0, randent = 0.0;
double pstar, tprob, avgprob = 0.0, minprob = 1.0;
double actClassProb, randClassProb;
double [][] pseudoClassProbs = new double[NUM_RAND_COLS+1][m_NumClasses];
// init
for(j = 0; j <= NUM_RAND_COLS; j++) {
for(i = 0; i < m_NumClasses; i++) {
pseudoClassProbs[j][i] = 0.0;
}
}
for (i=0; i < m_NumInstances; i++) {
if (m_Distances[i] < 0) {
// train instance has mising value
continue;
}
else {
pstar = PStar(m_Distances[i], scale);
tprob = pstar / m_ActualCount;
avgprob += tprob;
if (pstar < minprob) {
minprob = pstar;
}
// filter instances with same class value
for (k=0; k <= NUM_RAND_COLS; k++) {
// instance i is assigned a random class value in colomn k;
// colomn k = NUM_RAND_COLS contains the original mapping:
// instance -> class vlaue
pseudoClassProbs[k][ m_RandClassCols[k][i] ] += tprob;
}
}
}
// compute the actual entropy using the class probabilities
// with the original class value mapping (colomn NUM_RAND_COLS)
for (j = m_NumClasses-1; j >= 0; j--) {
actClassProb = pseudoClassProbs[NUM_RAND_COLS][j] / avgprob;
if (actClassProb > 0) {
actent -= actClassProb * Math.log(actClassProb) / LOG2;
}
}
// compute a random entropy using the pseudo class probs
// excluding the colomn NUM_RAND_COLS
for (k=0; k < NUM_RAND_COLS; k++) {
for (i = m_NumClasses-1; i >= 0; i--) {
randClassProb = pseudoClassProbs[k][i] / avgprob;
if (randClassProb > 0) {
randent -= randClassProb * Math.log(randClassProb) / LOG2;
}
}
}
randent /= NUM_RAND_COLS;
// return the values
params.actEntropy = actent;
params.randEntropy = randent;
params.avgProb = avgprob;
params.minProb = minprob;
}
/**
* Calculates the value of P for a given value x using the expression:
* P(x) = scale * exp( -2.0 * x * scale )
*
* @param x input value
* @param scale the scale factor
* @return output of the function P(x)
*/
private double PStar(double x, double scale) {
return scale * Math.exp( -2.0 * x * scale );
}
/**
* Set options.
* @param missingmode the missing value treatment to use
* @param blendmethod the blending method to use
* @param blendfactor the level of blending to use
*/
public void setOptions(int missingmode, int blendmethod, int blendfactor) {
m_MissingMode = missingmode;
m_BlendMethod = blendmethod;
m_BlendFactor = blendfactor;
}
/**
* Set the missing value mode.
* @param mode the type of missing value treatment to use
*/
public void setMissingMode(int mode) {
m_MissingMode = mode;
}
/**
* Set the blending method
* @param method the blending method to use
*/
public void setBlendMethod(int method) {
m_BlendMethod = method;
}
/**
* Set the blending factor
* @param factor the level of blending to use
*/
public void setBlendFactor(int factor) {
m_BlendFactor = factor;
}
} // class
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -