📄 kstarnumericattribute.java

📁 weka 源代码很好的对于学习数据挖掘算法很有帮助
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//** *    KStarNumericAttribute.java *    Copyright (c) 1995-97 by Len Trigg (trigg@cs.waikato.ac.nz). *    Java port to Weka by Abdelaziz Mahoui (am14@cs.waikato.ac.nz). * */package weka.classifiers.kstar;import java.io.*;import java.util.*;import weka.core.*;import weka.classifiers.*;/** * A custom class which provides the environment for computing the * transformation probability of a specified test instance numeric * attribute to a specified train instance numeric attribute. * * @author Len Trigg (len@intelligenesis.net) * @author Abdelaziz Mahoui (am14@cs.waikato.ac.nz) * @version $Revision 1.0 $ */public class KStarNumericAttribute implements KStarConstants {  /** The training instances used for classification. */  protected Instances m_TrainSet;  /** The test instance */  protected Instance m_Test;  /** The train instance */  protected Instance m_Train;  /** The index of the attribute in the test and train instances */  protected int m_AttrIndex;  /** The scale parameter */  protected double m_Scale = 1.0;  /** Probability of test attribute transforming into train attribute       with missing value */  protected double m_MissingProb = 1.0;  /** Average probability of test attribute transforming into train       attribute */  protected double m_AverageProb = 1.0;  /** Smallest probability of test attribute transforming into train       attribute */  protected double m_SmallestProb = 1.0;  /** The set of disctances from the test attribute to the set of train       attributes */  protected double [] m_Distances;  /** Set of colomns: each colomn representing a randomised version of       the train dataset class colomn */  protected int [][] m_RandClassCols;  /** The number of train instances with no missing attribute values */  protected int m_ActualCount = 0;  /** A cache for storing attribute values and their corresponding scale       parameters */  protected KStarCache m_Cache;  /** The number of instances in the dataset */  protected int m_NumInstances;  /** The number of class values */  protected int m_NumClasses;  /** The number of attributes */  protected int m_NumAttributes;  /** The class attribute type */  protected int m_ClassType;  /** missing value treatment */  protected int m_MissingMode = M_AVERAGE;  /** 0 = use specified blend, 1 = entropic blend setting */  protected int m_BlendMethod = B_SPHERE ;  /** default sphere of influence blend setting */  protected int m_BlendFactor = 20;    /**   * Constructor   */  public KStarNumericAttribute(Instance test, Instance train, int attrIndex,			       Instances trainSet, 			       int [][] randClassCols, 			       KStarCache cache)  {    m_Test      = test;    m_Train     = train;    m_AttrIndex = attrIndex;    m_TrainSet  = trainSet;    m_RandClassCols = randClassCols;    m_Cache = cache;    init();  }  /**   * Initializes the m_Attributes of the class.   */  private void init() {    try {      m_NumInstances  = m_TrainSet.numInstances();      m_NumClasses    = m_TrainSet.numClasses();      m_NumAttributes = m_TrainSet.numAttributes();      m_ClassType     = m_TrainSet.classAttribute().type();    } catch(Exception e) {      e.printStackTrace();    }  }    /**   * Calculates the transformation probability of the attribute indexed   * "m_AttrIndex" in test instance "m_Test" to the same attribute in   * the train instance "m_Train".   *   * @return the probability value   */  public double transProb() {    String debug = "(KStarNumericAttribute.transProb) ";    double transProb, distance, scale;    // check if the attribute value has been encountred before    // in which case it should be in the numeric cache    if ( m_Cache.containsKey(m_Test.value(m_AttrIndex))) {      KStarCache.TableEntry te = 	m_Cache.getCacheValues( m_Test.value(m_AttrIndex) );      m_Scale = te.value;      m_MissingProb = te.pmiss;    }    else {      if (m_BlendMethod == B_ENTROPY) {	m_Scale = scaleFactorUsingEntropy();      }      else { // default is B_SPHERE	m_Scale = scaleFactorUsingBlend();      }      m_Cache.store( m_Test.value(m_AttrIndex), m_Scale, m_MissingProb );    }    // now what???    if (m_Train.isMissing(m_AttrIndex)) {      transProb = m_MissingProb;    }    else {      distance = 	Math.abs( m_Test.value(m_AttrIndex) - m_Train.value(m_AttrIndex) );      transProb = PStar( distance, m_Scale );    }    return transProb;  }    /**   * Calculates the scale factor for the attribute indexed   * "m_AttrIndex" in test instance "m_Test" using a global   * blending factor (default value is 20%).   *   * @return the scale factor value   */  private double scaleFactorUsingBlend() {    String debug = "(KStarNumericAttribute.scaleFactorUsingBlend)";    int i, j, lowestcount = 0, count = 0;    double lowest = -1.0, nextlowest = -1.0;    double root, broot, up, bot;    double aimfor, min_val = 9e300, scale = 1.0;    double avgprob = 0.0, minprob = 0.0, min_pos = 0.0;    KStarWrapper botvals = new KStarWrapper();    KStarWrapper upvals = new KStarWrapper();    KStarWrapper vals = new KStarWrapper();    m_Distances = new double [m_NumInstances];    for (j=0; j<m_NumInstances; j++) {      if ( m_TrainSet.instance(j).isMissing(m_AttrIndex) ) {	// mark the train instance with a missing value by setting 	// the distance to -1.0	m_Distances[j] = -1.0;      }      else {	m_Distances[j] = Math.abs(m_TrainSet.instance(j).value(m_AttrIndex) - 				  m_Test.value(m_AttrIndex));	if ( (m_Distances[j]+1e-5) < nextlowest || nextlowest == -1.0 ) {	  if ( (m_Distances[j]+1e-5) < lowest || lowest == -1.0 ) {	    nextlowest = lowest;	    lowest = m_Distances[j];	    lowestcount = 1;	  }	  else if ( Math.abs(m_Distances[j]-lowest) < 1e-5 ) {	    // record the number training instances (number n0) at	    // the smallest distance from test instance	    lowestcount++;	  }	  else {	    nextlowest = m_Distances[j];	  }	}	// records the actual number of instances with no missing value	m_ActualCount++;      }    }        if (nextlowest == -1 || lowest == -1) { // Data values are all the same      scale = 1.0;      m_SmallestProb = m_AverageProb = 1.0;      return scale;    }    else {      // starting point for root      root = 1.0 / (nextlowest - lowest);      i = 0;      // given the expression: n0 <= E(scale) <= N      // E(scale) =  (N - n0) * b + n0  with blending factor: 0 <= b <= 1      // aimfor = (N - n0) * b + n0      aimfor = (m_ActualCount - lowestcount) * 	(double)m_BlendFactor / 100.0 + lowestcount;      if (m_BlendFactor == 0) {	aimfor += 1.0;      }      // root is bracketed in interval [bot,up]      bot = 0.0 + ROOT_FINDER_ACCURACY / 2.0;      up = root * 16;     // This is bodgy      // E(bot)      calculateSphereSize(bot, botvals);      botvals.sphere -= aimfor;      // E(up)      calculateSphereSize(up, upvals);      upvals.sphere -= aimfor;            if (botvals.sphere < 0) {    // Couldn't include that many 	                           // instances - going for max possible	min_pos = bot;	avgprob = botvals.avgProb;	minprob = botvals.minProb;      }      else if (upvals.sphere > 0) { // Couldn't include that few, 	                            // going for min possible	min_pos = up;	avgprob = upvals.avgProb;	minprob = upvals.minProb;      }      else {	// Root finding Algorithm starts here !	for (;;) {	  calculateSphereSize(root, vals);	  vals.sphere -= aimfor;	  if ( Math.abs(vals.sphere) < min_val ) {	    min_val = Math.abs(vals.sphere);	    min_pos = root;	    avgprob = vals.avgProb;	    minprob = vals.minProb;	  }	  if ( Math.abs(vals.sphere) <= ROOT_FINDER_ACCURACY ) {	    break;        // converged to a solution, done!	  }	  if (vals.sphere > 0.0) {	    broot = (root + up) / 2.0;	    bot = root;	    root = broot;	  }	  else {	    broot = (root + bot) / 2.0;	    up = root;	    root = broot;	  }	  i++;	  if (i > ROOT_FINDER_MAX_ITER) {	    //	    System.err.println("Warning: "+debug+" 	    // ROOT_FINDER_MAX_ITER exceeded");	    root = min_pos;	    break;	  }	}      }      m_SmallestProb = minprob;      m_AverageProb = avgprob;      // Set the probability of transforming to a missing value      switch ( m_MissingMode )	{	case M_DELETE:	  m_MissingProb = 0.0;	  break;	case M_NORMAL:	  m_MissingProb = 1.0;	  break;	case M_MAXDIFF:	  m_MissingProb = m_SmallestProb;	  break;	case M_AVERAGE:	  m_MissingProb = m_AverageProb;	  break;	}      // set the scale factor value      scale = min_pos;      return scale;    }  }    /**   * Calculates the size of the "sphere of influence" defined as:
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -