⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 complementnaivebayes.java

📁 代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    ComplementNaiveBayes.java *    Copyright (C) 2003 Ashraf M. Kibriya */package weka.classifiers.bayes;import weka.classifiers.Classifier;import weka.core.Capabilities;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.WeightedInstancesHandler;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;/** <!-- globalinfo-start --> * Class for building and using a Complement class Naive Bayes classifier.<br/> * <br/> * For more information see, <br/> * <br/> * Jason D. Rennie, Lawrence Shih, Jaime Teevan, David R. Karger: Tackling the Poor Assumptions of Naive Bayes Text Classifiers. In: ICML, 616-623, 2003.<br/> * <br/> * P.S.: TF, IDF and length normalization transforms, as described in the paper, can be performed through weka.filters.unsupervised.StringToWordVector. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * &#64;inproceedings{Rennie2003, *    author = {Jason D. Rennie and Lawrence Shih and Jaime Teevan and David R. Karger}, *    booktitle = {ICML}, *    pages = {616-623}, *    publisher = {AAAI Press}, *    title = {Tackling the Poor Assumptions of Naive Bayes Text Classifiers}, *    year = {2003} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -N *  Normalize the word weights for each class * </pre> *  * <pre> -S *  Smoothing value to avoid zero WordGivenClass probabilities (default=1.0). * </pre> *  <!-- options-end --> * * @author Ashraf M. Kibriya (amk14@cs.waikato.ac.nz) * @version $Revision: 1.7 $  */public class ComplementNaiveBayes extends Classifier    implements OptionHandler, WeightedInstancesHandler, TechnicalInformationHandler {        /** for serialization */    static final long serialVersionUID = 7246302925903086397L;      /**      Weight of words for each class. The weight is actually the      log of the probability of a word (w) given a class (c)       (i.e. log(Pr[w|c])). The format of the matrix is:       wordWeights[class][wordAttribute]    */    private double[][] wordWeights;        /** Holds the smoothing value to avoid word probabilities of zero.<br>        P.S.: According to the paper this is the Alpha i parameter      */    private double smoothingParameter = 1.0;        /** True if the words weights are to be normalized */    private boolean m_normalizeWordWeights = false;        /** Holds the number of Class values present in the set of specified         instances */    private int numClasses;        /** The instances header that'll be used in toString */    private Instances header;        /**     * Returns an enumeration describing the available options.     *     * @return an enumeration of all the available options.     */    public java.util.Enumeration listOptions() {        FastVector newVector = new FastVector(2);        newVector.addElement(        new Option("\tNormalize the word weights for each class\n",                   "N", 0,"-N"));        newVector.addElement(        new Option("\tSmoothing value to avoid zero WordGivenClass"+                   " probabilities (default=1.0).\n",                   "S", 1,"-S"));                return newVector.elements();    }        /**     * Gets the current settings of the classifier.     *     * @return an array of strings suitable for passing to setOptions     */    public String[] getOptions() {        String options[] = new String[4];        int current=0;                if(getNormalizeWordWeights())            options[current++] = "-N";                options[current++] = "-S";        options[current++] = Double.toString(smoothingParameter);                while (current < options.length) {            options[current++] = "";        }                return options;    }            /**     * Parses a given list of options. <p/>     *     <!-- options-start -->     * Valid options are: <p/>     *      * <pre> -N     *  Normalize the word weights for each class     * </pre>     *      * <pre> -S     *  Smoothing value to avoid zero WordGivenClass probabilities (default=1.0).     * </pre>     *      <!-- options-end -->     *     * @param options the list of options as an array of strings     * @throws Exception if an option is not supported     */    public void setOptions(String[] options) throws Exception {                setNormalizeWordWeights(Utils.getFlag('N', options));                String val = Utils.getOption('S', options);        if(val.length()!=0)          setSmoothingParameter(Double.parseDouble(val));        else          setSmoothingParameter(1.0);    }        /**     * Returns true if the word weights for each class are to be normalized     *      * @return true if the word weights are normalized     */    public boolean getNormalizeWordWeights() {        return m_normalizeWordWeights;    }        /**     * Sets whether if the word weights for each class should be normalized     *      * @param doNormalize whether the word weights are to be normalized     */    public void setNormalizeWordWeights(boolean doNormalize) {        m_normalizeWordWeights = doNormalize;    }        /**     * Returns the tip text for this property     * @return tip text for this property suitable for     * displaying in the explorer/experimenter gui     */    public String normalizeWordWeightsTipText() {        return "Normalizes the word weights for each class.";    }        /**     * Gets the smoothing value to be used to avoid zero WordGivenClass     * probabilities.     *      * @return the smoothing value     */    public double getSmoothingParameter() {        return smoothingParameter;    }    /**     * Sets the smoothing value used to avoid zero WordGivenClass probabilities     *      * @param val the new smooting value     */    public void setSmoothingParameter(double val) {        smoothingParameter = val;    }            /**     * Returns the tip text for this property     * @return tip text for this property suitable for     * displaying in the explorer/experimenter gui     */    public String smoothingParameterTipText() {        return "Sets the smoothing parameter to avoid zero WordGivenClass "+               "probabilities (default=1.0).";    }    /**     * Returns a string describing this classifier     * @return a description of the classifier suitable for     * displaying in the explorer/experimenter gui     */    public String globalInfo() {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -