kstar.java

来自「代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclip」· Java 代码 · 共 709 行 · 第 1/2 页
JAVA
709 行
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    KStar.java *    Copyright (c) 1995-97 by Len Trigg (trigg@cs.waikato.ac.nz). *    Java port to Weka by Abdelaziz Mahoui (am14@cs.waikato.ac.nz). * */package weka.classifiers.lazy;import weka.classifiers.Classifier;import weka.classifiers.UpdateableClassifier;import weka.classifiers.lazy.kstar.KStarCache;import weka.classifiers.lazy.kstar.KStarConstants;import weka.classifiers.lazy.kstar.KStarNominalAttribute;import weka.classifiers.lazy.kstar.KStarNumericAttribute;import weka.core.Attribute;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.SelectedTag;import weka.core.Tag;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import java.util.Enumeration;import java.util.Random;import java.util.Vector;/** <!-- globalinfo-start --> * K* is an instance-based classifier, that is the class of a test instance is based upon the class of those training instances similar to it, as determined by some similarity function.  It differs from other instance-based learners in that it uses an entropy-based distance function.<br/> * <br/> * For more information on K*, see<br/> * <br/> * John G. Cleary, Leonard E. Trigg: K*: An Instance-based Learner Using an Entropic Distance Measure. In: 12th International Conference on Machine Learning, 108-114, 1995. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * &#64;inproceedings{Cleary1995, *    author = {John G. Cleary and Leonard E. Trigg}, *    booktitle = {12th International Conference on Machine Learning}, *    pages = {108-114}, *    title = {K*: An Instance-based Learner Using an Entropic Distance Measure}, *    year = {1995} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -B &lt;num&gt; *  Manual blend setting (default 20%) * </pre> *  * <pre> -E *  Enable entropic auto-blend setting (symbolic class only) * </pre> *  * <pre> -M &lt;char&gt; *  Specify the missing value treatment mode (default a) *  Valid options are: a(verage), d(elete), m(axdiff), n(ormal) * </pre> *  <!-- options-end --> * * @author Len Trigg (len@reeltwo.com) * @author Abdelaziz Mahoui (am14@cs.waikato.ac.nz) * @version $Revision: 1.7 $ */public class KStar   extends Classifier  implements KStarConstants, UpdateableClassifier, TechnicalInformationHandler {  /** for serialization */  static final long serialVersionUID = 332458330800479083L;    /** The training instances used for classification. */  protected Instances m_Train;   /** The number of instances in the dataset */  protected int m_NumInstances;  /** The number of class values */  protected int m_NumClasses;  /** The number of attributes */  protected int m_NumAttributes;  /** The class attribute type */  protected int m_ClassType;  /** Table of random class value colomns */  protected int [][] m_RandClassCols;  /** Flag turning on and off the computation of random class colomns */  protected int m_ComputeRandomCols = ON;  /** Flag turning on and off the initialisation of config variables */  protected int m_InitFlag = ON;  /**   * A custom data structure for caching distinct attribute values   * and their scale factor or stop parameter.   */  protected KStarCache [] m_Cache;  /** missing value treatment */  protected int m_MissingMode = M_AVERAGE;  /** 0 = use specified blend, 1 = entropic blend setting */  protected int m_BlendMethod = B_SPHERE;  /** default sphere of influence blend setting */  protected int m_GlobalBlend = 20;  /** Define possible missing value handling methods */  public static final Tag [] TAGS_MISSING = {    new Tag(M_DELETE, "Ignore the instances with missing values"),    new Tag(M_MAXDIFF, "Treat missing values as maximally different"),    new Tag(M_NORMAL, "Normalize over the attributes"),    new Tag(M_AVERAGE, "Average column entropy curves")      };      /**   * Returns a string describing classifier   * @return a description suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return "K* is an instance-based classifier, that is the class of a test "      + "instance is based upon the class of those training instances "      + "similar to it, as determined by some similarity function.  It differs "      + "from other instance-based learners in that it uses an entropy-based "      + "distance function.\n\n"      + "For more information on K*, see\n\n"      + getTechnicalInformation().toString();  }  /**   * Returns an instance of a TechnicalInformation object, containing    * detailed information about the technical background of this class,   * e.g., paper reference or book this class is based on.   *    * @return the technical information about this class   */  public TechnicalInformation getTechnicalInformation() {    TechnicalInformation 	result;        result = new TechnicalInformation(Type.INPROCEEDINGS);    result.setValue(Field.AUTHOR, "John G. Cleary and Leonard E. Trigg");    result.setValue(Field.TITLE, "K*: An Instance-based Learner Using an Entropic Distance Measure");    result.setValue(Field.BOOKTITLE, "12th International Conference on Machine Learning");    result.setValue(Field.YEAR, "1995");    result.setValue(Field.PAGES, "108-114");        return result;  }  /**   * Returns default capabilities of the classifier.   *   * @return      the capabilities of this classifier   */  public Capabilities getCapabilities() {    Capabilities result = super.getCapabilities();    // attributes    result.enable(Capability.NOMINAL_ATTRIBUTES);    result.enable(Capability.NUMERIC_ATTRIBUTES);    result.enable(Capability.DATE_ATTRIBUTES);    result.enable(Capability.MISSING_VALUES);    // class    result.enable(Capability.NOMINAL_CLASS);    result.enable(Capability.NUMERIC_CLASS);    result.enable(Capability.DATE_CLASS);    result.enable(Capability.MISSING_CLASS_VALUES);    // instances    result.setMinimumNumberInstances(0);        return result;  }  /**   * Generates the classifier.   *   * @param instances set of instances serving as training data    * @throws Exception if the classifier has not been generated successfully   */  public void buildClassifier(Instances instances) throws Exception {    String debug = "(KStar.buildClassifier) ";    // can classifier handle the data?    getCapabilities().testWithFail(instances);    // remove instances with missing class    instances = new Instances(instances);    instances.deleteWithMissingClass();        m_Train = new Instances(instances, 0, instances.numInstances());    // initializes class attributes ** java-speaking! :-) **    init_m_Attributes();  }    /**   * Adds the supplied instance to the training set   *   * @param instance the instance to add   * @throws Exception if instance could not be incorporated successfully   */  public void updateClassifier(Instance instance) throws Exception {    String debug = "(KStar.updateClassifier) ";    if (m_Train.equalHeaders(instance.dataset()) == false)      throw new Exception("Incompatible instance types");    if ( instance.classIsMissing() )      return;    m_Train.add(instance);    // update relevant attributes ...    update_m_Attributes();  }  /**   * Calculates the class membership probabilities for the given test instance.   *   * @param instance the instance to be classified   * @return predicted class probability distribution   * @throws Exception if an error occurred during the prediction   */  public double [] distributionForInstance(Instance instance) throws Exception {    String debug = "(KStar.distributionForInstance) ";    double transProb = 0.0, temp = 0.0;    double [] classProbability = new double[m_NumClasses];    double [] predictedValue = new double[1];    // initialization ...    for (int i=0; i<classProbability.length; i++) {      classProbability[i] = 0.0;    }    predictedValue[0] = 0.0;    if (m_InitFlag == ON) {	// need to compute them only once and will be used for all instances.	// We are doing this because the evaluation module controls the calls.       if (m_BlendMethod == B_ENTROPY) {	generateRandomClassColomns();      }      m_Cache = new KStarCache[m_NumAttributes];      for (int i=0; i<m_NumAttributes;i++) {	m_Cache[i] = new KStarCache();      }      m_InitFlag = OFF;      //      System.out.println("Computing...");    }    // init done.    Instance trainInstance;    Enumeration enu = m_Train.enumerateInstances();    while ( enu.hasMoreElements() ) {      trainInstance = (Instance)enu.nextElement();      transProb = instanceTransformationProbability(instance, trainInstance);            switch ( m_ClassType )	{	case Attribute.NOMINAL:	  classProbability[(int)trainInstance.classValue()] += transProb;	  break;	case Attribute.NUMERIC:	  predictedValue[0] += transProb * trainInstance.classValue();	  temp += transProb;	  break;	}    }    if (m_ClassType == Attribute.NOMINAL) {      double sum = Utils.sum(classProbability);      if (sum <= 0.0)	for (int i=0; i<classProbability.length; i++)	  classProbability[i] = (double) 1/ (double) m_NumClasses;      else Utils.normalize(classProbability, sum);      return classProbability;    }    else {      predictedValue[0] = (temp != 0) ? predictedValue[0] / temp : 0.0;      return predictedValue;    }  }  /**   * Calculate the probability of the first instance transforming into the    * second instance:   * the probability is the product of the transformation probabilities of    * the attributes normilized over the number of instances used.   *    * @param first the test instance   * @param second the train instance   * @return transformation probability value   */  private double instanceTransformationProbability(Instance first, 						   Instance second) {    String debug = "(KStar.instanceTransformationProbability) ";    double transProb = 1.0;    int numMissAttr = 0;    for (int i = 0; i < m_NumAttributes; i++) {      if (i == m_Train.classIndex()) {	continue; // ignore class attribute      }      if (first.isMissing(i)) { // test instance attribute value is missing	numMissAttr++;	continue;      }      transProb *= attrTransProb(first, second, i);      // normilize for missing values      if (numMissAttr != m_NumAttributes) {	transProb = Math.pow(transProb, (double)m_NumAttributes / 			     (m_NumAttributes - numMissAttr));      }      else { // weird case!	transProb = 0.0;      }    }    // normilize for the train dataset     return transProb / m_NumInstances;  }  /**   * Calculates the transformation probability of the indexed test attribute    * to the indexed train attribute.   *
kstar.java - 源码说明

本页面展示了「代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行」中的 kstar.java 源码文件，采用 Java 编程语言编写，共 709 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与eclipse相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?