📄 naivebayes.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* NaiveBayes.java
* Copyright (C) 1999 Eibe Frank,Len Trigg
*
*/
package weka.classifiers.bayes;
import java.util.Enumeration;
import java.util.Vector;
import weka.classifiers.Classifier;
import weka.classifiers.Evaluation;
import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.UnsupportedAttributeTypeException;
import weka.core.UnsupportedClassTypeException;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
import weka.estimators.DiscreteEstimator;
import weka.estimators.Estimator;
import weka.estimators.KernelEstimator;
import weka.estimators.NormalEstimator;
/**
* Class for a Naive Bayes classifier using estimator classes. Numeric
* estimator precision values are chosen based on analysis of the
* training data. For this reason, the classifier is not an
* UpdateableClassifier (which in typical usage are initialized with zero
* training instances) -- if you need the UpdateableClassifier functionality,
* use the NaiveBayesUpdateable classifier. The NaiveBayesUpdateable
* classifier will use a default precision of 0.1 for numeric attributes
* when buildClassifier is called with zero training instances.
* <p>
* For more information on Naive Bayes classifiers, see<p>
*
* George H. John and Pat Langley (1995). <i>Estimating
* Continuous Distributions in Bayesian Classifiers</i>. Proceedings
* of the Eleventh Conference on Uncertainty in Artificial
* Intelligence. pp. 338-345. Morgan Kaufmann, San Mateo.<p>
*
* Valid options are:<p>
*
* -K <br>
* Use kernel estimation for modelling numeric attributes rather than
* a single normal distribution.<p>
*
* -D <br>
* Use supervised discretization to process numeric attributes.<p>
*
* @author Len Trigg (trigg@cs.waikato.ac.nz)
* @author Eibe Frank (eibe@cs.waikato.ac.nz)
* @version $Revision$
*/
public class NaiveBayes extends Classifier
implements OptionHandler, WeightedInstancesHandler {
/**
*
*/
private static final long serialVersionUID = 5995231201785697655L;
/** The attribute estimators. */
protected Estimator [][] m_Distributions;
/** The class estimator. */
protected Estimator m_ClassDistribution;
/**
* Whether to use kernel density estimator rather than normal distribution
* for numeric attributes
*/
protected boolean m_UseKernelEstimator = false;
/**
* Whether to use discretization than normal distribution
* for numeric attributes
*/
protected boolean m_UseDiscretization = false;
/** The number of classes (or 1 for numeric class) */
protected int m_NumClasses;
/**
* The dataset header for the purposes of printing out a semi-intelligible
* model
*/
protected Instances m_Instances;
/*** The precision parameter used for numeric attributes */
protected static final double DEFAULT_NUM_PRECISION = 0.01;
/**
* The discretization filter.
*/
protected weka.filters.supervised.attribute.Discretize m_Disc = null;
/**
* Returns a string describing this classifier
* @return a description of the classifier suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "Class for a Naive Bayes classifier using estimator classes. Numeric"
+" estimator precision values are chosen based on analysis of the "
+" training data. For this reason, the classifier is not an"
+" UpdateableClassifier (which in typical usage are initialized with zero"
+" training instances) -- if you need the UpdateableClassifier functionality,"
+" use the NaiveBayesUpdateable classifier. The NaiveBayesUpdateable"
+" classifier will use a default precision of 0.1 for numeric attributes"
+" when buildClassifier is called with zero training instances.\n\n"
+"For more information on Naive Bayes classifiers, see\n\n"
+"George H. John and Pat Langley (1995). Estimating"
+ " Continuous Distributions in Bayesian Classifiers. Proceedings"
+" of the Eleventh Conference on Uncertainty in Artificial"
+" Intelligence. pp. 338-345. Morgan Kaufmann, San Mateo.";
}
/**
* Generates the classifier.
*
* @param instances set of instances serving as training data
* @exception Exception if the classifier has not been generated
* successfully
*/
public void buildClassifier(Instances instances) throws Exception {
if (instances.checkForStringAttributes()) {
throw new UnsupportedAttributeTypeException("Cannot handle string attributes!");
}
if (instances.classAttribute().isNumeric()) {
throw new UnsupportedClassTypeException("Naive Bayes: Class is numeric!");
}
m_NumClasses = instances.numClasses();
if (m_NumClasses < 0) {
throw new Exception ("Dataset has no class attribute");
}
// Copy the instances
m_Instances = new Instances(instances);
// Discretize instances if required
if (m_UseDiscretization) {
m_Disc = new weka.filters.supervised.attribute.Discretize();
m_Disc.setInputFormat(m_Instances);
m_Instances = weka.filters.Filter.useFilter(m_Instances, m_Disc);
} else {
m_Disc = null;
}
// Reserve space for the distributions
m_Distributions = new Estimator[m_Instances.numAttributes() - 1]
[m_Instances.numClasses()];
m_ClassDistribution = new DiscreteEstimator(m_Instances.numClasses(),
true);
int attIndex = 0;
Enumeration em = m_Instances.emerateAttributes();
while (em.hasMoreElements()) {
Attribute attribute = (Attribute) em.nextElement();
// If the attribute is numeric, determine the estimator
// numeric precision from differences between adjacent values
double numPrecision = DEFAULT_NUM_PRECISION;
if (attribute.type() == Attribute.NUMERIC) {
m_Instances.sort(attribute);
if ((m_Instances.numInstances() > 0)
&& !m_Instances.instance(0).isMissing(attribute)) {
double lastVal = m_Instances.instance(0).value(attribute);
double currentVal, deltaSum = 0;
int distinct = 0;
for (int i = 1; i < m_Instances.numInstances(); i++) {
Instance currentInst = m_Instances.instance(i);
if (currentInst.isMissing(attribute)) {
break;
}
currentVal = currentInst.value(attribute);
if (currentVal != lastVal) {
deltaSum += currentVal - lastVal;
lastVal = currentVal;
distinct++;
}
}
if (distinct > 0) {
numPrecision = deltaSum / distinct;
}
}
}
for (int j = 0; j < m_Instances.numClasses(); j++) {
switch (attribute.type()) {
case Attribute.NUMERIC:
if (m_UseKernelEstimator) {
m_Distributions[attIndex][j] =
new KernelEstimator(numPrecision);
} else {
m_Distributions[attIndex][j] =
new NormalEstimator(numPrecision);
}
break;
case Attribute.NOMINAL:
m_Distributions[attIndex][j] =
new DiscreteEstimator(attribute.numValues(), true);
break;
default:
throw new Exception("Attribute type unknown to NaiveBayes");
}
}
attIndex++;
}
// Compute counts
Enumeration emInsts = m_Instances.emerateInstances();
while (emInsts.hasMoreElements()) {
Instance instance =
(Instance) emInsts.nextElement();
updateClassifier(instance);
}
// Save space
m_Instances = new Instances(m_Instances, 0);
}
/**
* Updates the classifier with the given instance.
*
* @param instance the new training instance to include in the model
* @exception Exception if the instance could not be incorporated in
* the model.
*/
public void updateClassifier(Instance instance) throws Exception {
if (!instance.classIsMissing()) {
Enumeration emAtts = m_Instances.emerateAttributes();
int attIndex = 0;
while (emAtts.hasMoreElements()) {
Attribute attribute = (Attribute) emAtts.nextElement();
if (!instance.isMissing(attribute)) {
m_Distributions[attIndex][(int)instance.classValue()].
addValue(instance.value(attribute), instance.weight());
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -