📄 kstarnominalattribute.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* KStarNominalAttribute.java
* Copyright (c) 1995-97 by Len Trigg (trigg@cs.waikato.ac.nz).
* Java port to Weka by Abdelaziz Mahoui (am14@cs.waikato.ac.nz).
*
*/
package weka.classifiers.lazy.kstar;
import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
/**
* A custom class which provides the environment for computing the
* transformation probability of a specified test instance nominal
* attribute to a specified train instance nominal attribute.
*
* @author Len Trigg (len@reeltwo.com)
* @author Abdelaziz Mahoui (am14@cs.waikato.ac.nz)
* @version $Revision 1.0 $
*/
public class KStarNominalAttribute implements KStarConstants {
/** The training instances used for classification. */
protected Instances m_TrainSet;
/** The test instance */
protected Instance m_Test;
/** The train instance */
protected Instance m_Train;
/** The index of the nominal attribute in the test and train instances */
protected int m_AttrIndex;
/** The stop parameter */
protected double m_Stop = 1.0;
/** Probability of test attribute transforming into train attribute
with missing value */
protected double m_MissingProb = 1.0;
/** Average probability of test attribute transforming into train
attribute */
protected double m_AverageProb = 1.0;
/** Smallest probability of test attribute transforming into
train attribute */
protected double m_SmallestProb = 1.0;
/** Number of trai instances with no missing attribute values */
protected int m_TotalCount;
/** Distribution of the attribute value in the train dataset */
protected int [] m_Distribution;
/** Set of colomns: each colomn representing a randomised version
of the train dataset class colomn */
protected int [][] m_RandClassCols;
/** A cache for storing attribute values and their corresponding
stop parameters */
protected KStarCache m_Cache;
// KStar Global settings
/** The number of instances in the dataset */
protected int m_NumInstances;
/** The number of class values */
protected int m_NumClasses;
/** The number of attributes */
protected int m_NumAttributes;
/** The class attribute type */
protected int m_ClassType;
/** missing value treatment */
protected int m_MissingMode = M_AVERAGE;
/** B_SPHERE = use specified blend, B_ENTROPY = entropic blend setting */
protected int m_BlendMethod = B_SPHERE ;
/** default sphere of influence blend setting */
protected int m_BlendFactor = 20;
/**
* Constructor
*/
public KStarNominalAttribute(Instance test, Instance train, int attrIndex,
Instances trainSet, int [][] randClassCol,
KStarCache cache)
{
m_Test = test;
m_Train = train;
m_AttrIndex = attrIndex;
m_TrainSet = trainSet;
m_RandClassCols = randClassCol;
m_Cache = cache;
init();
}
/**
* Initializes the m_Attributes of the class.
*/
private void init() {
try {
m_NumInstances = m_TrainSet.numInstances();
m_NumClasses = m_TrainSet.numClasses();
m_NumAttributes = m_TrainSet.numAttributes();
m_ClassType = m_TrainSet.classAttribute().type();
} catch(Exception e) {
e.printStackTrace();
}
}
/**
* Calculates the probability of the indexed nominal attribute of the test
* instance transforming into the indexed nominal attribute of the training
* instance.
*
* @return the value of the transformation probability.
*/
public double transProb() {
String debug = "(KStarNominalAttribute.transProb) ";
double transProb = 0.0;
// check if the attribute value has been encountred before
// in which case it should be in the nominal cache
if (m_Cache.containsKey(m_Test.value(m_AttrIndex))) {
KStarCache.TableEntry te =
m_Cache.getCacheValues(m_Test.value(m_AttrIndex));
m_Stop = te.value;
m_MissingProb = te.pmiss;
}
else {
generateAttrDistribution();
// we have to compute the parameters
if (m_BlendMethod == B_ENTROPY) {
m_Stop = stopProbUsingEntropy();
}
else { // default is B_SPHERE
m_Stop = stopProbUsingBlend();
}
// store the values in cache
m_Cache.store( m_Test.value(m_AttrIndex), m_Stop, m_MissingProb );
}
// we've got our m_Stop, then what?
if (m_Train.isMissing(m_AttrIndex)) {
transProb = m_MissingProb;
}
else {
try {
transProb = (1.0 - m_Stop) / m_Test.attribute(m_AttrIndex).numValues();
if ( (int)m_Test.value(m_AttrIndex) ==
(int)m_Train.value(m_AttrIndex) )
{
transProb += m_Stop;
}
} catch (Exception e) {
e.printStackTrace();
}
}
return transProb;
}
/**
* Calculates the "stop parameter" for this attribute using
* the entropy method: the value is computed using a root finder
* algorithm. The method takes advantage of the calculation to
* compute the smallest and average transformation probabilities
* once the stop factor is obtained. It also sets the transformation
* probability to an attribute with a missing value.
*
* @return the value of the stop parameter.
*
*/
private double stopProbUsingEntropy() {
String debug = "(KStarNominalAttribute.stopProbUsingEntropy)";
if ( m_ClassType != Attribute.NOMINAL ) {
System.err.println("Error: "+debug+" attribute class must be nominal!");
System.exit(1);
}
int itcount = 0;
double stopProb;
double lower, upper, pstop;
double bestminprob = 0.0, bestpsum = 0.0;
double bestdiff = 0.0, bestpstop = 0.0;
double currentdiff, lastdiff, stepsize, delta;
KStarWrapper botvals = new KStarWrapper();
KStarWrapper upvals = new KStarWrapper();
KStarWrapper vals = new KStarWrapper();
// Initial values for root finder
lower = 0.0 + ROOT_FINDER_ACCURACY/2.0;
upper = 1.0 - ROOT_FINDER_ACCURACY/2.0;
// Find (approx) entropy ranges
calculateEntropy(upper, upvals);
calculateEntropy(lower, botvals);
if (upvals.avgProb == 0) {
// When there are no training instances with the test value:
// doesn't matter what exact value we use for pstop, just acts as
// a constant scale factor in this case.
calculateEntropy(lower, vals);
}
else
{
// Optimise the scale factor
if ( (upvals.randEntropy - upvals.actEntropy <
botvals.randEntropy - botvals.actEntropy) &&
(botvals.randEntropy - botvals.actEntropy > FLOOR) )
{
bestpstop = pstop = lower;
stepsize = INITIAL_STEP;
bestminprob = botvals.minProb;
bestpsum = botvals.avgProb;
}
else {
bestpstop = pstop = upper;
stepsize = -INITIAL_STEP;
bestminprob = upvals.minProb;
bestpsum = upvals.avgProb;
}
bestdiff = currentdiff = FLOOR;
itcount = 0;
/* Enter the root finder */
while (true)
{
itcount++;
lastdiff = currentdiff;
pstop += stepsize;
if (pstop <= lower) {
pstop = lower;
currentdiff = 0.0;
delta = -1.0;
}
else if (pstop >= upper) {
pstop = upper;
currentdiff = 0.0;
delta = -1.0;
}
else {
calculateEntropy(pstop, vals);
currentdiff = vals.randEntropy - vals.actEntropy;
if (currentdiff < FLOOR) {
currentdiff = FLOOR;
if ((Math.abs(stepsize) < INITIAL_STEP) &&
(bestdiff == FLOOR)) {
bestpstop = lower;
bestminprob = botvals.minProb;
bestpsum = botvals.avgProb;
break;
}
}
delta = currentdiff - lastdiff;
}
if (currentdiff > bestdiff) {
bestdiff = currentdiff;
bestpstop = pstop;
bestminprob = vals.minProb;
bestpsum = vals.avgProb;
}
if (delta < 0) {
if (Math.abs(stepsize) < ROOT_FINDER_ACCURACY) {
break;
}
else {
stepsize /= -2.0;
}
}
if (itcount > ROOT_FINDER_MAX_ITER) {
break;
}
}
}
m_SmallestProb = bestminprob;
m_AverageProb = bestpsum;
// Set the probability of transforming to a missing value
switch ( m_MissingMode )
{
case M_DELETE:
m_MissingProb = 0.0;
break;
case M_NORMAL:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -