📄 kstar.java

📁 weka 源代码很好的对于学习数据挖掘算法很有帮助
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//** *    KS.java *    Copyright (c) 1995-97 by Len Trigg (trigg@cs.waikato.ac.nz). *    Java port to Weka by Abdelaziz Mahoui (am14@cs.waikato.ac.nz). * */package weka.classifiers.kstar;import java.io.*;import java.util.*;import weka.core.*;import weka.classifiers.*;//import java.text.NumberFormat;/** * K* is an instance-based classifier, that is the class of a test * instance is based upon the class of those training instances * similar to it, as determined by some similarity function.  The * underlying assumption of instance-based classifiers such as K*, * IB1, PEBLS, etc, is that similar instances will have similar * classes. * * For more information on K*, see <p> *  * John, G. Cleary and Leonard, E. Trigg (1995) "K*: An Instance- * based Learner Using an Entropic Distance Measure", * <i>Proceedings of the 12th International Conference on Machine * learning</i>, pp. 108-114.<p> * * @author Len Trigg (len@intelligenesis.net) * @author Abdelaziz Mahoui (am14@cs.waikato.ac.nz) * @version $Revision 1.0 $ */public class KStar extends DistributionClassifier  implements KStarConstants, OptionHandler, UpdateableClassifier, WeightedInstancesHandler {  /** The training instances used for classification. */  protected Instances m_Train;   /** The number of instances in the dataset */  protected int m_NumInstances;  /** The number of class values */  protected int m_NumClasses;  /** The number of attributes */  protected int m_NumAttributes;  /** The class attribute type */  protected int m_ClassType;  /** Table of random class value colomns */  protected int [][] m_RandClassCols;  /** Flag turning on and off the computation of random class colomns */  protected int m_ComputeRandomCols = ON;  /** Flag turning on and off the initialisation of config variables */  protected int m_InitFlag = ON;  /**   * A custom data structure for caching distinct attribute values   * and their scale factor or stop parameter.   */  protected KStarCache [] m_Cache;  /** missing value treatment */  protected int m_MissingMode = M_AVERAGE;  /** 0 = use specified blend, 1 = entropic blend setting */  protected int m_BlendMethod = B_SPHERE;  /** default sphere of influence blend setting */  protected int m_GlobalBlend = 20;  /** Define possible missing value handling methods */  public static final Tag [] TAGS_MISSING = {    new Tag(M_DELETE, "Ignore the instance with the missing value"),    new Tag(M_MAXDIFF, "Treat missing values as maximally different"),    new Tag(M_NORMAL, "Normilize over the attributes"),    new Tag(M_AVERAGE, "Average column entropy curves")      };  /**   * Generates the classifier.   *   * @param instances set of instances serving as training data    * @exception Exception if the classifier has not been generated successfully   */  public void buildClassifier(Instances instances) throws Exception {    String debug = "(KStar.buildClassifier) ";    if (instances.classIndex() < 0)      throw new Exception ("No class attribute assigned to instances");    if (instances.checkForStringAttributes())      throw new Exception("Can't handle string attributes!");    m_Train = new Instances(instances, 0, instances.numInstances());    // Throw away training instances with missing class    m_Train.deleteWithMissingClass();    // initializes class attributes ** java-speaking! :-) **    init_m_Attributes();  }    /**   * Adds the supplied instance to the training set   *   * @param instance the instance to add   * @exception Exception if instance could not be incorporated successfully   */  public void updateClassifier(Instance instance) throws Exception {    String debug = "(KStar.updateClassifier) ";    if (m_Train.equalHeaders(instance.dataset()) == false)      throw new Exception("Incompatible instance types");    if ( instance.classIsMissing() )      return;    m_Train.add(instance);    // update relevant attributes ...    update_m_Attributes();  }  /**   * Calculates the class membership probabilities for the given test instance.   *   * @param instance the instance to be classified   * @return predicted class probability distribution   * @exception Exception if an error occurred during the prediction   */  public double [] distributionForInstance(Instance instance) throws Exception {    String debug = "(KStar.distributionForInstance) ";    double transProb = 0.0, temp = 0.0;    double [] classProbability = new double[m_NumClasses];    double [] predictedValue = new double[1];    // initialization ...    for (int i=0; i<classProbability.length; i++) {      classProbability[i] = 0.0;    }    predictedValue[0] = 0.0;    if (m_InitFlag == ON) {	// need to compute them only once and will be used for all instances.	// We are doing this because the evaluation module controls the calls.       if (m_BlendMethod == B_ENTROPY) {	generateRandomClassColomns();      }      m_Cache = new KStarCache[m_NumAttributes];      for (int i=0; i<m_NumAttributes;i++) {	m_Cache[i] = new KStarCache();      }      m_InitFlag = OFF;      //      System.out.println("Computing...");    }    // init done.    Instance trainInstance;    Enumeration enum = m_Train.enumerateInstances();    while ( enum.hasMoreElements() ) {      trainInstance = (Instance)enum.nextElement();      transProb = instanceTransformationProbability(instance, trainInstance);            switch ( m_ClassType )	{	case Attribute.NOMINAL:	  classProbability[(int)trainInstance.classValue()] += transProb;	  break;	case Attribute.NUMERIC:	  predictedValue[0] += transProb * trainInstance.classValue();	  temp += transProb;	  break;	}    }    if (m_ClassType == Attribute.NOMINAL) {      double sum = Utils.sum(classProbability);      if (sum <= 0.0)	for (int i=0; i<classProbability.length; i++)	  classProbability[i] = 1/m_NumClasses;      else Utils.normalize(classProbability, sum);      return classProbability;    }    else {      predictedValue[0] = (temp != 0) ? predictedValue[0] / temp : 0.0;      return predictedValue;    }  }  /**   * Calculate the probability of the first instance transforming into the    * second instance:   * the probability is the product of the transformation probabilities of    * the attributes normilized over the number of instances used.   *    * @param first the test instance   * @param second the train instance   * @return transformation probability value   */  private double instanceTransformationProbability(Instance first, 						   Instance second) {    String debug = "(KStar.instanceTransformationProbability) ";    double transProb = 1.0;    int numMissAttr = 0;    for (int i = 0; i < m_NumAttributes; i++) {      if (i == m_Train.classIndex()) {	continue; // ignore class attribute      }      if (first.isMissing(i)) { // test instance attribute value is missing	numMissAttr++;	continue;      }      transProb *= attrTransProb(first, second, i);      // normilize for missing values      if (numMissAttr != m_NumAttributes) {	transProb = Math.pow(transProb, (double)m_NumAttributes / 			     (m_NumAttributes - numMissAttr));      }      else { // weird case!	transProb = 0.0;      }    }    // normilize for the train dataset     return transProb / m_NumInstances;  }  /**   * Calculates the transformation probability of the indexed test attribute    * to the indexed train attribute.   *   * @param first the test instance.   * @param second the train instance.   * @param col the index of the attribute in the instance.   * @return the value of the transformation probability.   */  private double attrTransProb(Instance first, Instance second, int col) {    String debug = "(KStar.attrTransProb)";    double transProb = 0.0;    KStarNominalAttribute ksNominalAttr;    KStarNumericAttribute ksNumericAttr;    switch ( m_Train.attribute(col).type() )      {      case Attribute.NOMINAL:	ksNominalAttr = new KStarNominalAttribute(first, second, col, m_Train, 						  m_RandClassCols, 						  m_Cache[col]);	ksNominalAttr.setOptions(m_MissingMode, m_BlendMethod, m_GlobalBlend);	transProb = ksNominalAttr.transProb();	ksNominalAttr = null;	break;      case Attribute.NUMERIC:	ksNumericAttr = new KStarNumericAttribute(first, second, col, 						  m_Train, m_RandClassCols, 						  m_Cache[col]);	ksNumericAttr.setOptions(m_MissingMode, m_BlendMethod, m_GlobalBlend);	transProb = ksNumericAttr.transProb();	ksNumericAttr = null;	break;      }    return transProb;  }  /**
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -