📄 kstar.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* KS.java
* Copyright (c) 1995-97 by Len Trigg (trigg@cs.waikato.ac.nz).
* Java port to Weka by Abdelaziz Mahoui (am14@cs.waikato.ac.nz).
*
*/
package weka.classifiers.lazy;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;
import weka.classifiers.Classifier;
import weka.classifiers.Evaluation;
import weka.classifiers.UpdateableClassifier;
import weka.classifiers.lazy.kstar.KStarCache;
import weka.classifiers.lazy.kstar.KStarConstants;
import weka.classifiers.lazy.kstar.KStarNominalAttribute;
import weka.classifiers.lazy.kstar.KStarNumericAttribute;
import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.SelectedTag;
import weka.core.Tag;
import weka.core.UnsupportedAttributeTypeException;
import weka.core.Utils;
//import java.text.NumberFormat;
/**
* K* is an instance-based classifier, that is the class of a test
* instance is based upon the class of those training instances
* similar to it, as determined by some similarity function. The
* underlying assumption of instance-based classifiers such as K*,
* IB1, PEBLS, etc, is that similar instances will have similar
* classes.
*
* For more information on K*, see <p>
*
* John, G. Cleary and Leonard, E. Trigg (1995) "K*: An Instance-
* based Learner Using an Entropic Distance Measure",
* <i>Proceedings of the 12th International Conference on Machine
* learning</i>, pp. 108-114.<p>
*
* @author Len Trigg (len@reeltwo.com)
* @author Abdelaziz Mahoui (am14@cs.waikato.ac.nz)
* @version $Revision 1.0 $
*/
public class KStar extends Classifier
implements KStarConstants, UpdateableClassifier {
/** The training instances used for classification. */
protected Instances m_Train;
/** The number of instances in the dataset */
protected int m_NumInstances;
/** The number of class values */
protected int m_NumClasses;
/** The number of attributes */
protected int m_NumAttributes;
/** The class attribute type */
protected int m_ClassType;
/** Table of random class value colomns */
protected int [][] m_RandClassCols;
/** Flag turning on and off the computation of random class colomns */
protected int m_ComputeRandomCols = ON;
/** Flag turning on and off the initialisation of config variables */
protected int m_InitFlag = ON;
/**
* A custom data structure for caching distinct attribute values
* and their scale factor or stop parameter.
*/
protected KStarCache [] m_Cache;
/** missing value treatment */
protected int m_MissingMode = M_AVERAGE;
/** 0 = use specified blend, 1 = entropic blend setting */
protected int m_BlendMethod = B_SPHERE;
/** default sphere of influence blend setting */
protected int m_GlobalBlend = 20;
/** Define possible missing value handling methods */
public static final Tag [] TAGS_MISSING = {
new Tag(M_DELETE, "Ignore the instances with missing values"),
new Tag(M_MAXDIFF, "Treat missing values as maximally different"),
new Tag(M_NORMAL, "Normalize over the attributes"),
new Tag(M_AVERAGE, "Average column entropy curves")
};
/**
* Returns a string describing classifier
* @return a description suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "K* is an instance-based classifier, that is the class of a test "
+ "instance is based upon the class of those training instances "
+ "similar to it, as determined by some similarity function. It differs "
+ "from other instance-based learners in that it uses an entropy-based "
+ "distance function. For more information on K*, see\n\n"
+ "John, G. Cleary and Leonard, E. Trigg (1995) \"K*: An Instance- "
+ "based Learner Using an Entropic Distance Measure\", "
+ "Proceedings of the 12th International Conference on Machine "
+ "learning, pp. 108-114.";
}
/**
* Generates the classifier.
*
* @param instances set of instances serving as training data
* @exception Exception if the classifier has not been generated successfully
*/
public void buildClassifier(Instances instances) throws Exception {
String debug = "(KStar.buildClassifier) ";
if (instances.classIndex() < 0)
throw new Exception ("No class attribute assigned to instances");
if (instances.checkForStringAttributes())
throw new UnsupportedAttributeTypeException("Cannot handle string attributes!");
m_Train = new Instances(instances, 0, instances.numInstances());
// Throw away training instances with missing class
m_Train.deleteWithMissingClass();
// initializes class attributes ** java-speaking! :-) **
init_m_Attributes();
}
/**
* Adds the supplied instance to the training set
*
* @param instance the instance to add
* @exception Exception if instance could not be incorporated successfully
*/
public void updateClassifier(Instance instance) throws Exception {
String debug = "(KStar.updateClassifier) ";
if (m_Train.equalHeaders(instance.dataset()) == false)
throw new Exception("Incompatible instance types");
if ( instance.classIsMissing() )
return;
m_Train.add(instance);
// update relevant attributes ...
update_m_Attributes();
}
/**
* Calculates the class membership probabilities for the given test instance.
*
* @param instance the instance to be classified
* @return predicted class probability distribution
* @exception Exception if an error occurred during the prediction
*/
public double [] distributionForInstance(Instance instance) throws Exception {
String debug = "(KStar.distributionForInstance) ";
double transProb = 0.0, temp = 0.0;
double [] classProbability = new double[m_NumClasses];
double [] predictedValue = new double[1];
// initialization ...
for (int i=0; i<classProbability.length; i++) {
classProbability[i] = 0.0;
}
predictedValue[0] = 0.0;
if (m_InitFlag == ON) {
// need to compute them only once and will be used for all instances.
// We are doing this because the evaluation module controls the calls.
if (m_BlendMethod == B_ENTROPY) {
generateRandomClassColomns();
}
m_Cache = new KStarCache[m_NumAttributes];
for (int i=0; i<m_NumAttributes;i++) {
m_Cache[i] = new KStarCache();
}
m_InitFlag = OFF;
// System.out.println("Computing...");
}
// init done.
Instance trainInstance;
Enumeration em = m_Train.emerateInstances();
while ( em.hasMoreElements() ) {
trainInstance = (Instance)em.nextElement();
transProb = instanceTransformationProbability(instance, trainInstance);
switch ( m_ClassType )
{
case Attribute.NOMINAL:
classProbability[(int)trainInstance.classValue()] += transProb;
break;
case Attribute.NUMERIC:
predictedValue[0] += transProb * trainInstance.classValue();
temp += transProb;
break;
}
}
if (m_ClassType == Attribute.NOMINAL) {
double sum = Utils.sum(classProbability);
if (sum <= 0.0)
for (int i=0; i<classProbability.length; i++)
classProbability[i] = 1/m_NumClasses;
else Utils.normalize(classProbability, sum);
return classProbability;
}
else {
predictedValue[0] = (temp != 0) ? predictedValue[0] / temp : 0.0;
return predictedValue;
}
}
/**
* Calculate the probability of the first instance transforming into the
* second instance:
* the probability is the product of the transformation probabilities of
* the attributes normilized over the number of instances used.
*
* @param first the test instance
* @param second the train instance
* @return transformation probability value
*/
private double instanceTransformationProbability(Instance first,
Instance second) {
String debug = "(KStar.instanceTransformationProbability) ";
double transProb = 1.0;
int numMissAttr = 0;
for (int i = 0; i < m_NumAttributes; i++) {
if (i == m_Train.classIndex()) {
continue; // ignore class attribute
}
if (first.isMissing(i)) { // test instance attribute value is missing
numMissAttr++;
continue;
}
transProb *= attrTransProb(first, second, i);
// normilize for missing values
if (numMissAttr != m_NumAttributes) {
transProb = Math.pow(transProb, (double)m_NumAttributes /
(m_NumAttributes - numMissAttr));
}
else { // weird case!
transProb = 0.0;
}
}
// normilize for the train dataset
return transProb / m_NumInstances;
}
/**
* Calculates the transformation probability of the indexed test attribute
* to the indexed train attribute.
*
* @param first the test instance.
* @param second the train instance.
* @param col the index of the attribute in the instance.
* @return the value of the transformation probability.
*/
private double attrTransProb(Instance first, Instance second, int col) {
String debug = "(KStar.attrTransProb)";
double transProb = 0.0;
KStarNominalAttribute ksNominalAttr;
KStarNumericAttribute ksNumericAttr;
switch ( m_Train.attribute(col).type() )
{
case Attribute.NOMINAL:
ksNominalAttr = new KStarNominalAttribute(first, second, col, m_Train,
m_RandClassCols,
m_Cache[col]);
ksNominalAttr.setOptions(m_MissingMode, m_BlendMethod, m_GlobalBlend);
transProb = ksNominalAttr.transProb();
ksNominalAttr = null;
break;
case Attribute.NUMERIC:
ksNumericAttr = new KStarNumericAttribute(first, second, col,
m_Train, m_RandClassCols,
m_Cache[col]);
ksNumericAttr.setOptions(m_MissingMode, m_BlendMethod, m_GlobalBlend);
transProb = ksNumericAttr.transProb();
ksNumericAttr = null;
break;
}
return transProb;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -