naivebayes.java

来自「代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclip」· Java 代码 · 共 562 行 · 第 1/2 页
JAVA
562 行
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    NaiveBayes.java *    Copyright (C) 1999 Eibe Frank,Len Trigg * */package weka.classifiers.bayes;import weka.classifiers.Classifier;import weka.core.Attribute;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.WeightedInstancesHandler;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import weka.estimators.DiscreteEstimator;import weka.estimators.Estimator;import weka.estimators.KernelEstimator;import weka.estimators.NormalEstimator;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * Class for a Naive Bayes classifier using estimator classes. Numeric estimator precision values are chosen based on analysis of the  training data. For this reason, the classifier is not an UpdateableClassifier (which in typical usage are initialized with zero training instances) -- if you need the UpdateableClassifier functionality, use the NaiveBayesUpdateable classifier. The NaiveBayesUpdateable classifier will  use a default precision of 0.1 for numeric attributes when buildClassifier is called with zero training instances.<br/> * <br/> * For more information on Naive Bayes classifiers, see<br/> * <br/> * George H. John, Pat Langley: Estimating Continuous Distributions in Bayesian Classifiers. In: Eleventh Conference on Uncertainty in Artificial Intelligence, San Mateo, 338-345, 1995. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * &#64;inproceedings{John1995, *    address = {San Mateo}, *    author = {George H. John and Pat Langley}, *    booktitle = {Eleventh Conference on Uncertainty in Artificial Intelligence}, *    pages = {338-345}, *    publisher = {Morgan Kaufmann}, *    title = {Estimating Continuous Distributions in Bayesian Classifiers}, *    year = {1995} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -K *  Use kernel density estimator rather than normal *  distribution for numeric attributes</pre> *  * <pre> -D *  Use supervised discretization to process numeric attributes * </pre> *  <!-- options-end --> * * @author Len Trigg (trigg@cs.waikato.ac.nz) * @author Eibe Frank (eibe@cs.waikato.ac.nz) * @version $Revision: 1.20 $ */public class NaiveBayes extends Classifier   implements OptionHandler, WeightedInstancesHandler, TechnicalInformationHandler {  /** for serialization */  static final long serialVersionUID = 5995231201785697655L;    /** The attribute estimators. */  protected Estimator [][] m_Distributions;    /** The class estimator. */  protected Estimator m_ClassDistribution;  /**   * Whether to use kernel density estimator rather than normal distribution   * for numeric attributes   */  protected boolean m_UseKernelEstimator = false;  /**   * Whether to use discretization than normal distribution   * for numeric attributes   */  protected boolean m_UseDiscretization = false;  /** The number of classes (or 1 for numeric class) */  protected int m_NumClasses;  /**   * The dataset header for the purposes of printing out a semi-intelligible    * model    */  protected Instances m_Instances;  /*** The precision parameter used for numeric attributes */  protected static final double DEFAULT_NUM_PRECISION = 0.01;  /**   * The discretization filter.   */  protected weka.filters.supervised.attribute.Discretize m_Disc = null;  /**   * Returns a string describing this classifier   * @return a description of the classifier suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return "Class for a Naive Bayes classifier using estimator classes. Numeric"      +" estimator precision values are chosen based on analysis of the "      +" training data. For this reason, the classifier is not an"      +" UpdateableClassifier (which in typical usage are initialized with zero"      +" training instances) -- if you need the UpdateableClassifier functionality,"      +" use the NaiveBayesUpdateable classifier. The NaiveBayesUpdateable"      +" classifier will  use a default precision of 0.1 for numeric attributes"      +" when buildClassifier is called with zero training instances.\n\n"      +"For more information on Naive Bayes classifiers, see\n\n"      + getTechnicalInformation().toString();  }  /**   * Returns an instance of a TechnicalInformation object, containing    * detailed information about the technical background of this class,   * e.g., paper reference or book this class is based on.   *    * @return the technical information about this class   */  public TechnicalInformation getTechnicalInformation() {    TechnicalInformation 	result;        result = new TechnicalInformation(Type.INPROCEEDINGS);    result.setValue(Field.AUTHOR, "George H. John and Pat Langley");    result.setValue(Field.TITLE, "Estimating Continuous Distributions in Bayesian Classifiers");    result.setValue(Field.BOOKTITLE, "Eleventh Conference on Uncertainty in Artificial Intelligence");    result.setValue(Field.YEAR, "1995");    result.setValue(Field.PAGES, "338-345");    result.setValue(Field.PUBLISHER, "Morgan Kaufmann");    result.setValue(Field.ADDRESS, "San Mateo");        return result;  }  /**   * Returns default capabilities of the classifier.   *   * @return      the capabilities of this classifier   */  public Capabilities getCapabilities() {    Capabilities result = super.getCapabilities();    // attributes    result.enable(Capability.NOMINAL_ATTRIBUTES);    result.enable(Capability.NUMERIC_ATTRIBUTES);    result.enable(Capability.MISSING_VALUES);    // class    result.enable(Capability.NOMINAL_CLASS);    result.enable(Capability.MISSING_CLASS_VALUES);    // instances    result.setMinimumNumberInstances(0);        return result;  }  /**   * Generates the classifier.   *   * @param instances set of instances serving as training data    * @exception Exception if the classifier has not been generated    * successfully   */  public void buildClassifier(Instances instances) throws Exception {    // can classifier handle the data?    getCapabilities().testWithFail(instances);    // remove instances with missing class    instances = new Instances(instances);    instances.deleteWithMissingClass();        m_NumClasses = instances.numClasses();        // Copy the instances    m_Instances = new Instances(instances);    // Discretize instances if required    if (m_UseDiscretization) {      m_Disc = new weka.filters.supervised.attribute.Discretize();      m_Disc.setInputFormat(m_Instances);      m_Instances = weka.filters.Filter.useFilter(m_Instances, m_Disc);    } else {      m_Disc = null;    }    // Reserve space for the distributions    m_Distributions = new Estimator[m_Instances.numAttributes() - 1]    [m_Instances.numClasses()];    m_ClassDistribution = new DiscreteEstimator(m_Instances.numClasses(), 						true);    int attIndex = 0;    Enumeration enu = m_Instances.enumerateAttributes();    while (enu.hasMoreElements()) {      Attribute attribute = (Attribute) enu.nextElement();      // If the attribute is numeric, determine the estimator       // numeric precision from differences between adjacent values      double numPrecision = DEFAULT_NUM_PRECISION;      if (attribute.type() == Attribute.NUMERIC) {	m_Instances.sort(attribute);	if ((m_Instances.numInstances() > 0)	    && !m_Instances.instance(0).isMissing(attribute)) {	  double lastVal = m_Instances.instance(0).value(attribute);	  double currentVal, deltaSum = 0;	  int distinct = 0;	  for (int i = 1; i < m_Instances.numInstances(); i++) {	    Instance currentInst = m_Instances.instance(i);	    if (currentInst.isMissing(attribute)) {	      break;	    }	    currentVal = currentInst.value(attribute);	    if (currentVal != lastVal) {	      deltaSum += currentVal - lastVal;	      lastVal = currentVal;	      distinct++;	    }	  }	  if (distinct > 0) {	    numPrecision = deltaSum / distinct;	  }	}      }      for (int j = 0; j < m_Instances.numClasses(); j++) {	switch (attribute.type()) {	case Attribute.NUMERIC: 	  if (m_UseKernelEstimator) {	    m_Distributions[attIndex][j] = 	    new KernelEstimator(numPrecision);	  } else {	    m_Distributions[attIndex][j] = 	    new NormalEstimator(numPrecision);	  }	  break;	case Attribute.NOMINAL:	  m_Distributions[attIndex][j] = 	  new DiscreteEstimator(attribute.numValues(), true);	  break;	default:	  throw new Exception("Attribute type unknown to NaiveBayes");	}      }
naivebayes.java - 源码说明

本页面展示了「代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行」中的 naivebayes.java 源码文件，采用 Java 编程语言编写，共 562 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与eclipse相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?