⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kstar.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/**
 *    KS.java
 *    Copyright (c) 1995-97 by Len Trigg (trigg@cs.waikato.ac.nz).
 *    Java port to Weka by Abdelaziz Mahoui (am14@cs.waikato.ac.nz).
 *
 */


package weka.classifiers.lazy;

import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.classifiers.Classifier;
import weka.classifiers.Evaluation;
import weka.classifiers.UpdateableClassifier;
import weka.classifiers.lazy.kstar.KStarCache;
import weka.classifiers.lazy.kstar.KStarConstants;
import weka.classifiers.lazy.kstar.KStarNominalAttribute;
import weka.classifiers.lazy.kstar.KStarNumericAttribute;
import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.SelectedTag;
import weka.core.Tag;
import weka.core.UnsupportedAttributeTypeException;
import weka.core.Utils;

//import java.text.NumberFormat;

/**
 * K* is an instance-based classifier, that is the class of a test
 * instance is based upon the class of those training instances
 * similar to it, as determined by some similarity function.  The
 * underlying assumption of instance-based classifiers such as K*,
 * IB1, PEBLS, etc, is that similar instances will have similar
 * classes.
 *
 * For more information on K*, see <p>
 * 
 * John, G. Cleary and Leonard, E. Trigg (1995) "K*: An Instance-
 * based Learner Using an Entropic Distance Measure",
 * <i>Proceedings of the 12th International Conference on Machine
 * learning</i>, pp. 108-114.<p>
 *
 * @author Len Trigg (len@reeltwo.com)
 * @author Abdelaziz Mahoui (am14@cs.waikato.ac.nz)
 * @version $Revision 1.0 $
 */

public class KStar extends Classifier
  implements KStarConstants, UpdateableClassifier {

  /** The training instances used for classification. */
  protected Instances m_Train; 

  /** The number of instances in the dataset */
  protected int m_NumInstances;

  /** The number of class values */
  protected int m_NumClasses;

  /** The number of attributes */
  protected int m_NumAttributes;

  /** The class attribute type */
  protected int m_ClassType;

  /** Table of random class value colomns */
  protected int [][] m_RandClassCols;

  /** Flag turning on and off the computation of random class colomns */
  protected int m_ComputeRandomCols = ON;

  /** Flag turning on and off the initialisation of config variables */
  protected int m_InitFlag = ON;

  /**
   * A custom data structure for caching distinct attribute values
   * and their scale factor or stop parameter.
   */
  protected KStarCache [] m_Cache;

  /** missing value treatment */
  protected int m_MissingMode = M_AVERAGE;

  /** 0 = use specified blend, 1 = entropic blend setting */
  protected int m_BlendMethod = B_SPHERE;

  /** default sphere of influence blend setting */
  protected int m_GlobalBlend = 20;

  /** Define possible missing value handling methods */
  public static final Tag [] TAGS_MISSING = {
    new Tag(M_DELETE, "Ignore the instances with missing values"),
    new Tag(M_MAXDIFF, "Treat missing values as maximally different"),
    new Tag(M_NORMAL, "Normalize over the attributes"),
    new Tag(M_AVERAGE, "Average column entropy curves")
      };
    
  /**
   * Returns a string describing classifier
   * @return a description suitable for
   * displaying in the explorer/experimenter gui
   */
  public String globalInfo() {

    return "K* is an instance-based classifier, that is the class of a test "
      + "instance is based upon the class of those training instances "
      + "similar to it, as determined by some similarity function.  It differs "
      + "from other instance-based learners in that it uses an entropy-based "
      + "distance function. For more information on K*, see\n\n"
      + "John, G. Cleary and Leonard, E. Trigg (1995) \"K*: An Instance- "
      + "based Learner Using an Entropic Distance Measure\", "
      + "Proceedings of the 12th International Conference on Machine "
      + "learning, pp. 108-114.";
  }

  /**
   * Generates the classifier.
   *
   * @param instances set of instances serving as training data 
   * @exception Exception if the classifier has not been generated successfully
   */
  public void buildClassifier(Instances instances) throws Exception {
    String debug = "(KStar.buildClassifier) ";

    if (instances.classIndex() < 0)
      throw new Exception ("No class attribute assigned to instances");
    if (instances.checkForStringAttributes())
      throw new UnsupportedAttributeTypeException("Cannot handle string attributes!");
    m_Train = new Instances(instances, 0, instances.numInstances());
    // Throw away training instances with missing class
    m_Train.deleteWithMissingClass();
    // initializes class attributes ** java-speaking! :-) **
    init_m_Attributes();
  }
  
  /**
   * Adds the supplied instance to the training set
   *
   * @param instance the instance to add
   * @exception Exception if instance could not be incorporated successfully
   */
  public void updateClassifier(Instance instance) throws Exception {
    String debug = "(KStar.updateClassifier) ";
    if (m_Train.equalHeaders(instance.dataset()) == false)
      throw new Exception("Incompatible instance types");
    if ( instance.classIsMissing() )
      return;
    m_Train.add(instance);
    // update relevant attributes ...
    update_m_Attributes();
  }

  /**
   * Calculates the class membership probabilities for the given test instance.
   *
   * @param instance the instance to be classified
   * @return predicted class probability distribution
   * @exception Exception if an error occurred during the prediction
   */
  public double [] distributionForInstance(Instance instance) throws Exception {

    String debug = "(KStar.distributionForInstance) ";
    double transProb = 0.0, temp = 0.0;
    double [] classProbability = new double[m_NumClasses];
    double [] predictedValue = new double[1];

    // initialization ...
    for (int i=0; i<classProbability.length; i++) {
      classProbability[i] = 0.0;
    }
    predictedValue[0] = 0.0;
    if (m_InitFlag == ON) {
	// need to compute them only once and will be used for all instances.
	// We are doing this because the evaluation module controls the calls. 
      if (m_BlendMethod == B_ENTROPY) {
	generateRandomClassColomns();
      }
      m_Cache = new KStarCache[m_NumAttributes];
      for (int i=0; i<m_NumAttributes;i++) {
	m_Cache[i] = new KStarCache();
      }
      m_InitFlag = OFF;
      //      System.out.println("Computing...");
    }
    // init done.
    Instance trainInstance;
    Enumeration em = m_Train.emerateInstances();
    while ( em.hasMoreElements() ) {
      trainInstance = (Instance)em.nextElement();
      transProb = instanceTransformationProbability(instance, trainInstance);      
      switch ( m_ClassType )
	{
	case Attribute.NOMINAL:
	  classProbability[(int)trainInstance.classValue()] += transProb;
	  break;
	case Attribute.NUMERIC:
	  predictedValue[0] += transProb * trainInstance.classValue();
	  temp += transProb;
	  break;
	}
    }
    if (m_ClassType == Attribute.NOMINAL) {
      double sum = Utils.sum(classProbability);
      if (sum <= 0.0)
	for (int i=0; i<classProbability.length; i++)
	  classProbability[i] = 1/m_NumClasses;
      else Utils.normalize(classProbability, sum);
      return classProbability;
    }
    else {
      predictedValue[0] = (temp != 0) ? predictedValue[0] / temp : 0.0;
      return predictedValue;
    }
  }

  /**
   * Calculate the probability of the first instance transforming into the 
   * second instance:
   * the probability is the product of the transformation probabilities of 
   * the attributes normilized over the number of instances used.
   * 
   * @param first the test instance
   * @param second the train instance
   * @return transformation probability value
   */
  private double instanceTransformationProbability(Instance first, 
						   Instance second) {
    String debug = "(KStar.instanceTransformationProbability) ";
    double transProb = 1.0;
    int numMissAttr = 0;
    for (int i = 0; i < m_NumAttributes; i++) {
      if (i == m_Train.classIndex()) {
	continue; // ignore class attribute
      }
      if (first.isMissing(i)) { // test instance attribute value is missing
	numMissAttr++;
	continue;
      }
      transProb *= attrTransProb(first, second, i);
      // normilize for missing values
      if (numMissAttr != m_NumAttributes) {
	transProb = Math.pow(transProb, (double)m_NumAttributes / 
			     (m_NumAttributes - numMissAttr));
      }
      else { // weird case!
	transProb = 0.0;
      }
    }
    // normilize for the train dataset
     return transProb / m_NumInstances;
  }

  /**
   * Calculates the transformation probability of the indexed test attribute 
   * to the indexed train attribute.
   *
   * @param first the test instance.
   * @param second the train instance.
   * @param col the index of the attribute in the instance.
   * @return the value of the transformation probability.
   */
  private double attrTransProb(Instance first, Instance second, int col) {
    String debug = "(KStar.attrTransProb)";
    double transProb = 0.0;
    KStarNominalAttribute ksNominalAttr;
    KStarNumericAttribute ksNumericAttr;
    switch ( m_Train.attribute(col).type() )
      {
      case Attribute.NOMINAL:
	ksNominalAttr = new KStarNominalAttribute(first, second, col, m_Train, 
						  m_RandClassCols, 
						  m_Cache[col]);
	ksNominalAttr.setOptions(m_MissingMode, m_BlendMethod, m_GlobalBlend);
	transProb = ksNominalAttr.transProb();
	ksNominalAttr = null;
	break;

      case Attribute.NUMERIC:
	ksNumericAttr = new KStarNumericAttribute(first, second, col, 
						  m_Train, m_RandClassCols, 
						  m_Cache[col]);
	ksNumericAttr.setOptions(m_MissingMode, m_BlendMethod, m_GlobalBlend);
	transProb = ksNumericAttr.transProb();
	ksNumericAttr = null;
	break;
      }
    return transProb;
  }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -