📄 kstarnumericattribute.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* KStarNumericAttribute.java
* Copyright (c) 1995-97 by Len Trigg (trigg@cs.waikato.ac.nz).
* Java port to Weka by Abdelaziz Mahoui (am14@cs.waikato.ac.nz).
*
*/
package weka.classifiers.lazy.kstar;
import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
/**
* A custom class which provides the environment for computing the
* transformation probability of a specified test instance numeric
* attribute to a specified train instance numeric attribute.
*
* @author Len Trigg (len@reeltwo.com)
* @author Abdelaziz Mahoui (am14@cs.waikato.ac.nz)
* @version $Revision 1.0 $
*/
public class KStarNumericAttribute implements KStarConstants {
/** The training instances used for classification. */
protected Instances m_TrainSet;
/** The test instance */
protected Instance m_Test;
/** The train instance */
protected Instance m_Train;
/** The index of the attribute in the test and train instances */
protected int m_AttrIndex;
/** The scale parameter */
protected double m_Scale = 1.0;
/** Probability of test attribute transforming into train attribute
with missing value */
protected double m_MissingProb = 1.0;
/** Average probability of test attribute transforming into train
attribute */
protected double m_AverageProb = 1.0;
/** Smallest probability of test attribute transforming into train
attribute */
protected double m_SmallestProb = 1.0;
/** The set of disctances from the test attribute to the set of train
attributes */
protected double [] m_Distances;
/** Set of colomns: each colomn representing a randomised version of
the train dataset class colomn */
protected int [][] m_RandClassCols;
/** The number of train instances with no missing attribute values */
protected int m_ActualCount = 0;
/** A cache for storing attribute values and their corresponding scale
parameters */
protected KStarCache m_Cache;
/** The number of instances in the dataset */
protected int m_NumInstances;
/** The number of class values */
protected int m_NumClasses;
/** The number of attributes */
protected int m_NumAttributes;
/** The class attribute type */
protected int m_ClassType;
/** missing value treatment */
protected int m_MissingMode = M_AVERAGE;
/** 0 = use specified blend, 1 = entropic blend setting */
protected int m_BlendMethod = B_SPHERE ;
/** default sphere of influence blend setting */
protected int m_BlendFactor = 20;
/**
* Constructor
*/
public KStarNumericAttribute(Instance test, Instance train, int attrIndex,
Instances trainSet,
int [][] randClassCols,
KStarCache cache)
{
m_Test = test;
m_Train = train;
m_AttrIndex = attrIndex;
m_TrainSet = trainSet;
m_RandClassCols = randClassCols;
m_Cache = cache;
init();
}
/**
* Initializes the m_Attributes of the class.
*/
private void init() {
try {
m_NumInstances = m_TrainSet.numInstances();
m_NumClasses = m_TrainSet.numClasses();
m_NumAttributes = m_TrainSet.numAttributes();
m_ClassType = m_TrainSet.classAttribute().type();
} catch(Exception e) {
e.printStackTrace();
}
}
/**
* Calculates the transformation probability of the attribute indexed
* "m_AttrIndex" in test instance "m_Test" to the same attribute in
* the train instance "m_Train".
*
* @return the probability value
*/
public double transProb() {
String debug = "(KStarNumericAttribute.transProb) ";
double transProb, distance, scale;
// check if the attribute value has been encountred before
// in which case it should be in the numeric cache
if ( m_Cache.containsKey(m_Test.value(m_AttrIndex))) {
KStarCache.TableEntry te =
m_Cache.getCacheValues( m_Test.value(m_AttrIndex) );
m_Scale = te.value;
m_MissingProb = te.pmiss;
}
else {
if (m_BlendMethod == B_ENTROPY) {
m_Scale = scaleFactorUsingEntropy();
}
else { // default is B_SPHERE
m_Scale = scaleFactorUsingBlend();
}
m_Cache.store( m_Test.value(m_AttrIndex), m_Scale, m_MissingProb );
}
// now what???
if (m_Train.isMissing(m_AttrIndex)) {
transProb = m_MissingProb;
}
else {
distance =
Math.abs( m_Test.value(m_AttrIndex) - m_Train.value(m_AttrIndex) );
transProb = PStar( distance, m_Scale );
}
return transProb;
}
/**
* Calculates the scale factor for the attribute indexed
* "m_AttrIndex" in test instance "m_Test" using a global
* blending factor (default value is 20%).
*
* @return the scale factor value
*/
private double scaleFactorUsingBlend() {
String debug = "(KStarNumericAttribute.scaleFactorUsingBlend)";
int i, j, lowestcount = 0, count = 0;
double lowest = -1.0, nextlowest = -1.0;
double root, broot, up, bot;
double aimfor, min_val = 9e300, scale = 1.0;
double avgprob = 0.0, minprob = 0.0, min_pos = 0.0;
KStarWrapper botvals = new KStarWrapper();
KStarWrapper upvals = new KStarWrapper();
KStarWrapper vals = new KStarWrapper();
m_Distances = new double [m_NumInstances];
for (j=0; j<m_NumInstances; j++) {
if ( m_TrainSet.instance(j).isMissing(m_AttrIndex) ) {
// mark the train instance with a missing value by setting
// the distance to -1.0
m_Distances[j] = -1.0;
}
else {
m_Distances[j] = Math.abs(m_TrainSet.instance(j).value(m_AttrIndex) -
m_Test.value(m_AttrIndex));
if ( (m_Distances[j]+1e-5) < nextlowest || nextlowest == -1.0 ) {
if ( (m_Distances[j]+1e-5) < lowest || lowest == -1.0 ) {
nextlowest = lowest;
lowest = m_Distances[j];
lowestcount = 1;
}
else if ( Math.abs(m_Distances[j]-lowest) < 1e-5 ) {
// record the number training instances (number n0) at
// the smallest distance from test instance
lowestcount++;
}
else {
nextlowest = m_Distances[j];
}
}
// records the actual number of instances with no missing value
m_ActualCount++;
}
}
if (nextlowest == -1 || lowest == -1) { // Data values are all the same
scale = 1.0;
m_SmallestProb = m_AverageProb = 1.0;
return scale;
}
else {
// starting point for root
root = 1.0 / (nextlowest - lowest);
i = 0;
// given the expression: n0 <= E(scale) <= N
// E(scale) = (N - n0) * b + n0 with blending factor: 0 <= b <= 1
// aimfor = (N - n0) * b + n0
aimfor = (m_ActualCount - lowestcount) *
(double)m_BlendFactor / 100.0 + lowestcount;
if (m_BlendFactor == 0) {
aimfor += 1.0;
}
// root is bracketed in interval [bot,up]
bot = 0.0 + ROOT_FINDER_ACCURACY / 2.0;
up = root * 16; // This is bodgy
// E(bot)
calculateSphereSize(bot, botvals);
botvals.sphere -= aimfor;
// E(up)
calculateSphereSize(up, upvals);
upvals.sphere -= aimfor;
if (botvals.sphere < 0) { // Couldn't include that many
// instances - going for max possible
min_pos = bot;
avgprob = botvals.avgProb;
minprob = botvals.minProb;
}
else if (upvals.sphere > 0) { // Couldn't include that few,
// going for min possible
min_pos = up;
avgprob = upvals.avgProb;
minprob = upvals.minProb;
}
else {
// Root finding Algorithm starts here !
for (;;) {
calculateSphereSize(root, vals);
vals.sphere -= aimfor;
if ( Math.abs(vals.sphere) < min_val ) {
min_val = Math.abs(vals.sphere);
min_pos = root;
avgprob = vals.avgProb;
minprob = vals.minProb;
}
if ( Math.abs(vals.sphere) <= ROOT_FINDER_ACCURACY ) {
break; // converged to a solution, done!
}
if (vals.sphere > 0.0) {
broot = (root + up) / 2.0;
bot = root;
root = broot;
}
else {
broot = (root + bot) / 2.0;
up = root;
root = broot;
}
i++;
if (i > ROOT_FINDER_MAX_ITER) {
// System.err.println("Warning: "+debug+"
// ROOT_FINDER_MAX_ITER exceeded");
root = min_pos;
break;
}
}
}
m_SmallestProb = minprob;
m_AverageProb = avgprob;
// Set the probability of transforming to a missing value
switch ( m_MissingMode )
{
case M_DELETE:
m_MissingProb = 0.0;
break;
case M_NORMAL:
m_MissingProb = 1.0;
break;
case M_MAXDIFF:
m_MissingProb = m_SmallestProb;
break;
case M_AVERAGE:
m_MissingProb = m_AverageProb;
break;
}
// set the scale factor value
scale = min_pos;
return scale;
}
}
/**
* Calculates the size of the "sphere of influence" defined as:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -