📄 kstarnominalattribute.java
字号:
m_MissingProb = 1.0;
break;
case M_MAXDIFF:
m_MissingProb = m_SmallestProb;
break;
case M_AVERAGE:
m_MissingProb = m_AverageProb;
break;
}
if ( Math.abs(bestpsum - (double)m_TotalCount) < EPSILON) {
// No difference in the values
stopProb = 1.0;
}
else {
stopProb = bestpstop;
}
return stopProb;
}
/**
* Calculates the entropy of the actual class prediction
* and the entropy for random class prediction. It also
* calculates the smallest and average transformation probabilities.
*
* @param stop the stop parameter
* @param params the object wrapper for the parameters:
* actual entropy, random entropy, average probability and smallest
* probability.
* @return the values are returned in the object "params".
*
*/
private void calculateEntropy( double stop, KStarWrapper params) {
String debug = "(KStarNominalAttribute.calculateEntropy)";
int i,j,k;
Instance train;
double actent = 0.0, randent=0.0;
double pstar, tprob, psum=0.0, minprob=1.0;
double actClassProb, randClassProb;
double [][] pseudoClassProb = new double[NUM_RAND_COLS+1][m_NumClasses];
// init ...
for(j = 0; j <= NUM_RAND_COLS; j++) {
for(i = 0; i < m_NumClasses; i++) {
pseudoClassProb[j][i] = 0.0;
}
}
for (i=0; i < m_NumInstances; i++) {
train = m_TrainSet.instance(i);
if (!train.isMissing(m_AttrIndex)) {
pstar = PStar(m_Test, train, m_AttrIndex, stop);
tprob = pstar / m_TotalCount;
if (pstar < minprob) {
minprob = pstar;
}
psum += tprob;
// filter instances with same class value
for (k=0 ; k <= NUM_RAND_COLS ; k++) {
// instance i is assigned a random class value in colomn k;
// colomn k = NUM_RAND_COLS contains the original mapping:
// instance -> class vlaue
pseudoClassProb[k][ m_RandClassCols[k][i] ] += tprob;
}
}
}
// compute the actual entropy using the class probs
// with the original class value mapping (colomn NUM_RAND_COLS)
for (j=m_NumClasses-1; j>=0; j--) {
actClassProb = pseudoClassProb[NUM_RAND_COLS][j] / psum;
if (actClassProb > 0) {
actent -= actClassProb * Math.log(actClassProb) / LOG2;
}
}
// compute a random entropy using the pseudo class probs
// excluding the colomn NUM_RAND_COLS
for (k=0; k < NUM_RAND_COLS;k++) {
for (i = m_NumClasses-1; i >= 0; i--) {
randClassProb = pseudoClassProb[k][i] / psum;
if (randClassProb > 0) {
randent -= randClassProb * Math.log(randClassProb) / LOG2;
}
}
}
randent /= NUM_RAND_COLS;
// return the results ... Yuk !!!
params.actEntropy = actent;
params.randEntropy = randent;
params.avgProb = psum;
params.minProb = minprob;
}
/**
* Calculates the "stop parameter" for this attribute using
* the blend method: the value is computed using a root finder
* algorithm. The method takes advantage of this calculation to
* compute the smallest and average transformation probabilities
* once the stop factor is obtained. It also sets the transformation
* probability to an attribute with a missing value.
*
* @return the value of the stop parameter.
*
*/
private double stopProbUsingBlend() {
String debug = "(KStarNominalAttribute.stopProbUsingBlend) ";
int itcount = 0;
double stopProb, aimfor;
double lower, upper, tstop;
KStarWrapper botvals = new KStarWrapper();
KStarWrapper upvals = new KStarWrapper();
KStarWrapper vals = new KStarWrapper();
int testvalue = (int)m_Test.value(m_AttrIndex);
aimfor = (m_TotalCount - m_Distribution[testvalue]) *
(double)m_BlendFactor / 100.0 + m_Distribution[testvalue];
// Initial values for root finder
tstop = 1.0 - (double)m_BlendFactor / 100.0;
lower = 0.0 + ROOT_FINDER_ACCURACY/2.0;
upper = 1.0 - ROOT_FINDER_ACCURACY/2.0;
// Find out function border values
calculateSphereSize(testvalue, lower, botvals);
botvals.sphere -= aimfor;
calculateSphereSize(testvalue, upper, upvals);
upvals.sphere -= aimfor;
if (upvals.avgProb == 0) {
// When there are no training instances with the test value:
// doesn't matter what exact value we use for tstop, just acts as
// a constant scale factor in this case.
calculateSphereSize(testvalue, tstop, vals);
}
else if (upvals.sphere > 0) {
// Can't include aimfor instances, going for min possible
tstop = upper;
vals.avgProb = upvals.avgProb;
}
else {
// Enter the root finder
for (;;) {
itcount++;
calculateSphereSize(testvalue, tstop, vals);
vals.sphere -= aimfor;
if ( Math.abs(vals.sphere) <= ROOT_FINDER_ACCURACY ||
itcount >= ROOT_FINDER_MAX_ITER )
{
break;
}
if (vals.sphere > 0.0) {
lower = tstop;
tstop = (upper + lower) / 2.0;
}
else {
upper = tstop;
tstop = (upper + lower) / 2.0;
}
}
}
m_SmallestProb = vals.minProb;
m_AverageProb = vals.avgProb;
// Set the probability of transforming to a missing value
switch ( m_MissingMode )
{
case M_DELETE:
m_MissingProb = 0.0;
break;
case M_NORMAL:
m_MissingProb = 1.0;
break;
case M_MAXDIFF:
m_MissingProb = m_SmallestProb;
break;
case M_AVERAGE:
m_MissingProb = m_AverageProb;
break;
}
if ( Math.abs(vals.avgProb - m_TotalCount) < EPSILON) {
// No difference in the values
stopProb = 1.0;
}
else {
stopProb = tstop;
}
return stopProb;
}
/**
* Calculates the size of the "sphere of influence" defined as:
* sphere = sum(P^2)/sum(P)^2
* P(i|j) = (1-tstop)*P(i) + ((i==j)?tstop:0).
* This method takes advantage of the calculation to compute the values of
* the "smallest" and "average" transformation probabilities when using
* the specified stop parameter.
*
* @param testValue the value of the test instance
* @param stop the stop parameter
* @param params a wrapper of the parameters to be computed:
* "sphere" the sphere size
* "avgprob" the average transformation probability
* "minProb" the smallest transformation probability
* @return the values are returned in "params" object.
*
*/
private void calculateSphereSize(int testvalue, double stop,
KStarWrapper params) {
String debug = "(KStarNominalAttribute.calculateSphereSize) ";
int i, thiscount;
double tprob, tval = 0.0, t1 = 0.0;
double sphere, minprob = 1.0, transprob = 0.0;
for(i = 0; i < m_Distribution.length; i++) {
thiscount = m_Distribution[i];
if ( thiscount != 0 ) {
if ( testvalue == i ) {
tprob = (stop + (1 - stop) / m_Distribution.length) / m_TotalCount;
tval += tprob * thiscount;
t1 += tprob * tprob * thiscount;
}
else {
tprob = ((1 - stop) / m_Distribution.length) / m_TotalCount;
tval += tprob * thiscount;
t1 += tprob * tprob * thiscount;
}
if ( minprob > tprob * m_TotalCount ) {
minprob = tprob * m_TotalCount;
}
}
}
transprob = tval;
sphere = (t1 == 0) ? 0 : ((tval * tval) / t1);
// return values ... Yck!!!
params.sphere = sphere;
params.avgProb = transprob;
params.minProb = minprob;
}
/**
* Calculates the nominal probability function defined as:
* P(i|j) = (1-stop) * P(i) + ((i==j) ? stop : 0)
* In this case, it calculates the transformation probability of the
* indexed test attribute to the indexed train attribute.
*
* @param test the test instance
* @param train the train instance
* @param col the attribute index
* @return the value of the tranformation probability.
*
*/
private double PStar(Instance test, Instance train, int col, double stop) {
String debug = "(KStarNominalAttribute.PStar) ";
double pstar;
int numvalues = 0;
try {
numvalues = test.attribute(col).numValues();
} catch (Exception ex) {
ex.printStackTrace();
}
if ( (int)test.value(col) == (int)train.value(col) ) {
pstar = stop + (1 - stop) / numvalues;
}
else {
pstar = (1 - stop) / numvalues;
}
return pstar;
}
/**
* Calculates the distribution, in the dataset, of the indexed nominal
* attribute values. It also counts the actual number of training instances
* that contributed (those with non-missing values) to calculate the
* distribution.
*/
private void generateAttrDistribution() {
String debug = "(KStarNominalAttribute.generateAttrDistribution)";
m_Distribution = new int[ m_TrainSet.attribute(m_AttrIndex).numValues() ];
int i;
Instance train;
for (i=0; i < m_NumInstances; i++) {
train = m_TrainSet.instance(i);
if ( !train.isMissing(m_AttrIndex) ) {
m_TotalCount++;
m_Distribution[(int)train.value(m_AttrIndex)]++;
}
}
}
/**
* Sets the options.
*
*/
public void setOptions(int missingmode, int blendmethod, int blendfactor) {
m_MissingMode = missingmode;
m_BlendMethod = blendmethod;
m_BlendFactor = blendfactor;
}
} // class
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -