📄 kstarnominalattribute.java

📁 weka 源代码很好的对于学习数据挖掘算法很有帮助
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//** *    KStarNominalAttribute.java *    Copyright (c) 1995-97 by Len Trigg (trigg@cs.waikato.ac.nz). *    Java port to Weka by Abdelaziz Mahoui (am14@cs.waikato.ac.nz). * */package weka.classifiers.kstar;import java.io.*;import java.util.*;import weka.core.*;import weka.classifiers.*;/** * A custom class which provides the environment for computing the * transformation probability of a specified test instance nominal * attribute to a specified train instance nominal attribute. * * @author Len Trigg (len@intelligenesis.net) * @author Abdelaziz Mahoui (am14@cs.waikato.ac.nz) * @version $Revision 1.0 $ */public class KStarNominalAttribute implements KStarConstants {    /** The training instances used for classification. */  protected Instances m_TrainSet;  /** The test instance */  protected Instance m_Test;  /** The train instance */  protected Instance m_Train;  /** The index of the nominal attribute in the test and train instances */  protected int m_AttrIndex;  /** The stop parameter */  protected double m_Stop = 1.0;  /** Probability of test attribute transforming into train attribute       with missing value */  protected double m_MissingProb = 1.0;  /** Average probability of test attribute transforming into train       attribute */  protected double m_AverageProb = 1.0;  /** Smallest probability of test attribute transforming into       train attribute */  protected double m_SmallestProb = 1.0;  /** Number of trai instances with no missing attribute values */  protected int m_TotalCount;  /** Distribution of the attribute value in the train dataset */  protected int [] m_Distribution;  /** Set of colomns: each colomn representing a randomised version       of the train dataset class colomn */  protected int [][] m_RandClassCols;  /** A cache for storing attribute values and their corresponding       stop parameters */  protected KStarCache m_Cache;  // KStar Global settings  /** The number of instances in the dataset */  protected int m_NumInstances;  /** The number of class values */  protected int m_NumClasses;  /** The number of attributes */  protected int m_NumAttributes;  /** The class attribute type */  protected int m_ClassType;  /** missing value treatment */  protected int m_MissingMode = M_AVERAGE;  /** B_SPHERE = use specified blend, B_ENTROPY = entropic blend setting */  protected int m_BlendMethod = B_SPHERE ;  /** default sphere of influence blend setting */  protected int m_BlendFactor = 20;    /**   * Constructor   */  public KStarNominalAttribute(Instance test, Instance train, int attrIndex,			       Instances trainSet, int [][] randClassCol, 			       KStarCache cache)  {    m_Test = test;    m_Train = train;    m_AttrIndex = attrIndex;    m_TrainSet = trainSet;    m_RandClassCols = randClassCol;    m_Cache = cache;    init();  }  /**   * Initializes the m_Attributes of the class.   */  private void init() {    try {      m_NumInstances  = m_TrainSet.numInstances();      m_NumClasses    = m_TrainSet.numClasses();      m_NumAttributes = m_TrainSet.numAttributes();      m_ClassType     = m_TrainSet.classAttribute().type();    } catch(Exception e) {      e.printStackTrace();    }  }  /**   * Calculates the probability of the indexed nominal attribute of the test   * instance transforming into the indexed nominal attribute of the training    * instance.   *   * @return the value of the transformation probability.   */  public double transProb() {    String debug = "(KStarNominalAttribute.transProb) ";    double transProb = 0.0;    // check if the attribute value has been encountred before    // in which case it should be in the nominal cache    if (m_Cache.containsKey(m_Test.value(m_AttrIndex))) {      KStarCache.TableEntry te = 	m_Cache.getCacheValues(m_Test.value(m_AttrIndex));      m_Stop = te.value;      m_MissingProb = te.pmiss;    }    else {      generateAttrDistribution();      // we have to compute the parameters      if (m_BlendMethod == B_ENTROPY) {	m_Stop = stopProbUsingEntropy();      }      else { // default is B_SPHERE	m_Stop = stopProbUsingBlend();      }      // store the values in cache      m_Cache.store( m_Test.value(m_AttrIndex), m_Stop, m_MissingProb );    }    // we've got our m_Stop, then what?    if (m_Train.isMissing(m_AttrIndex)) {      transProb = m_MissingProb;    }    else {      try {	transProb = (1.0 - m_Stop) / m_Test.attribute(m_AttrIndex).numValues();	if ( (int)m_Test.value(m_AttrIndex) == 	     (int)m_Train.value(m_AttrIndex) )	  {	    transProb += m_Stop;	  }      } catch (Exception e) {	e.printStackTrace();      }    }    return transProb;  }    /**   * Calculates the "stop parameter" for this attribute using   * the entropy method: the value is computed using a root finder   * algorithm. The method takes advantage of the calculation to   * compute the smallest and average transformation probabilities   * once the stop factor is obtained. It also sets the transformation   * probability to an attribute with a missing value.   *   * @return the value of the stop parameter.   *   */  private double stopProbUsingEntropy() {    String debug = "(KStarNominalAttribute.stopProbUsingEntropy)";    if ( m_ClassType != Attribute.NOMINAL ) {      System.err.println("Error: "+debug+" attribute class must be nominal!");      System.exit(1);    }    int itcount = 0;    double stopProb;    double lower, upper, pstop;    double bestminprob = 0.0, bestpsum = 0.0;    double bestdiff = 0.0, bestpstop = 0.0;    double currentdiff, lastdiff, stepsize, delta;        KStarWrapper botvals = new KStarWrapper();    KStarWrapper upvals = new KStarWrapper();    KStarWrapper vals = new KStarWrapper();    // Initial values for root finder    lower = 0.0 + ROOT_FINDER_ACCURACY/2.0;    upper = 1.0 - ROOT_FINDER_ACCURACY/2.0;        // Find (approx) entropy ranges    calculateEntropy(upper, upvals);    calculateEntropy(lower, botvals);        if (upvals.avgProb == 0) {      // When there are no training instances with the test value:      // doesn't matter what exact value we use for pstop, just acts as      // a constant scale factor in this case.      calculateEntropy(lower, vals);    }    else      {	// Optimise the scale factor	if ( (upvals.randEntropy - upvals.actEntropy < 	      botvals.randEntropy - botvals.actEntropy) &&	     (botvals.randEntropy - botvals.actEntropy > FLOOR) )	  {	    bestpstop = pstop = lower;	    stepsize = INITIAL_STEP;	    bestminprob = botvals.minProb;	    bestpsum = botvals.avgProb;	  }	else {	  bestpstop = pstop = upper;	  stepsize = -INITIAL_STEP;	  bestminprob = upvals.minProb;	  bestpsum = upvals.avgProb;	}	bestdiff = currentdiff = FLOOR;	itcount = 0;	/* Enter the root finder */	while (true)	  {	    itcount++;		    lastdiff = currentdiff;	    pstop += stepsize;	    if (pstop <= lower) {	      pstop = lower;	      currentdiff = 0.0;	      delta = -1.0;	    }	    else if (pstop >= upper) {	      pstop = upper;	      currentdiff = 0.0;	      delta = -1.0;	    }	    else {	      calculateEntropy(pstop, vals);	      currentdiff = vals.randEntropy - vals.actEntropy;	      if (currentdiff < FLOOR) {		currentdiff = FLOOR;		if ((Math.abs(stepsize) < INITIAL_STEP) && 		    (bestdiff == FLOOR)) {		  bestpstop = lower;		  bestminprob = botvals.minProb;		  bestpsum = botvals.avgProb;		  break;		}	      }	      delta = currentdiff - lastdiff;	    }	    if (currentdiff > bestdiff) {	      bestdiff = currentdiff;	      bestpstop = pstop;	      bestminprob = vals.minProb;	      bestpsum = vals.avgProb;	    }	    if (delta < 0) {	      if (Math.abs(stepsize) < ROOT_FINDER_ACCURACY) {		break;	      }	      else {		stepsize /= -2.0;	      }	    }	    if (itcount > ROOT_FINDER_MAX_ITER) {	      break;	    }	  }      }        m_SmallestProb = bestminprob;    m_AverageProb = bestpsum;    // Set the probability of transforming to a missing value    switch ( m_MissingMode )      {      case M_DELETE:	m_MissingProb = 0.0;	break;
12 下一页
💿 文件大小 1068 K
👤 上传用户 hlwang523
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#weka #源代码 #数据挖掘算法
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -