📄 complementnaivebayes.java

📁 代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
                return  "Class for building and using a Complement class Naive Bayes "+                "classifier.\n\nFor more information see, \n\n"+                getTechnicalInformation().toString() + "\n\n" +                "P.S.: TF, IDF and length normalization transforms, as "+                "described in the paper, can be performed through "+                "weka.filters.unsupervised.StringToWordVector.";    }    /**     * Returns an instance of a TechnicalInformation object, containing      * detailed information about the technical background of this class,     * e.g., paper reference or book this class is based on.     *      * @return the technical information about this class     */    public TechnicalInformation getTechnicalInformation() {      TechnicalInformation 	result;            result = new TechnicalInformation(Type.INPROCEEDINGS);      result.setValue(Field.AUTHOR, "Jason D. Rennie and Lawrence Shih and Jaime Teevan and David R. Karger");      result.setValue(Field.TITLE, "Tackling the Poor Assumptions of Naive Bayes Text Classifiers");      result.setValue(Field.BOOKTITLE, "ICML");      result.setValue(Field.YEAR, "2003");      result.setValue(Field.PAGES, "616-623");      result.setValue(Field.PUBLISHER, "AAAI Press");            return result;    }    /**     * Returns default capabilities of the classifier.     *     * @return      the capabilities of this classifier     */    public Capabilities getCapabilities() {      Capabilities result = super.getCapabilities();      // attributes      result.enable(Capability.NUMERIC_ATTRIBUTES);      result.enable(Capability.MISSING_VALUES);      // class      result.enable(Capability.NOMINAL_CLASS);      result.enable(Capability.MISSING_CLASS_VALUES);            return result;    }        /**     * Generates the classifier.     *     * @param instances set of instances serving as training data      * @throws Exception if the classifier has not been built successfully     */    public void buildClassifier(Instances instances) throws Exception {      // can classifier handle the data?      getCapabilities().testWithFail(instances);      // remove instances with missing class      instances = new Instances(instances);      instances.deleteWithMissingClass();              numClasses = instances.numClasses();	int numAttributes = instances.numAttributes();                header = new Instances(instances, 0);	double [][] ocrnceOfWordInClass = new double[numClasses][numAttributes];                wordWeights = new double[numClasses][numAttributes];        //double [] docsPerClass = new double[numClasses];	double[] wordsPerClass = new double[numClasses];        double totalWordOccurrences = 0;        double sumOfSmoothingParams = (numAttributes-1)*smoothingParameter;        int classIndex = instances.instance(0).classIndex();        	Instance instance;	int docClass;	double numOccurrences;                        java.util.Enumeration enumInsts = instances.enumerateInstances();	while (enumInsts.hasMoreElements()) {		instance = (Instance) enumInsts.nextElement();		docClass = (int)instance.value(classIndex);		//docsPerClass[docClass] += instance.weight();				for(int a = 0; a<instance.numValues(); a++)		    if(instance.index(a) != instance.classIndex()) {			    if(!instance.isMissing(a)) {				    numOccurrences = instance.valueSparse(a) * instance.weight();				    if(numOccurrences < 0)					throw new Exception("Numeric attribute"+                                                  " values must all be greater"+                                                  " or equal to zero.");                                    totalWordOccurrences += numOccurrences;				    wordsPerClass[docClass] += numOccurrences;				    ocrnceOfWordInClass[docClass]                                          [instance.index(a)] += numOccurrences;                                    //For the time being wordweights[0][i]                                     //will hold the total occurrence of word                                    // i over all classes                                    wordWeights[0]                                          [instance.index(a)] += numOccurrences;                            }                    }        }	//Calculating the complement class probability for all classes except 0        	for(int c=1; c<numClasses; c++) {            //total occurrence of words in classes other than c            double totalWordOcrnces = totalWordOccurrences - wordsPerClass[c];            for(int w=0; w<numAttributes; w++) {                if(w != classIndex ) {                     //occurrence of w in classes other that c                    double ocrncesOfWord =                                 wordWeights[0][w] - ocrnceOfWordInClass[c][w];                    wordWeights[c][w] =                         Math.log((ocrncesOfWord+smoothingParameter) /                                 (totalWordOcrnces+sumOfSmoothingParams));                }            }        }        	//Now calculating the complement class probability for class 0        for(int w=0; w<numAttributes; w++) {            if(w != classIndex) {                //occurrence of w in classes other that c                double ocrncesOfWord = wordWeights[0][w] - ocrnceOfWordInClass[0][w];                //total occurrence of words in classes other than c                double totalWordOcrnces = totalWordOccurrences - wordsPerClass[0];                                wordWeights[0][w] =                Math.log((ocrncesOfWord+smoothingParameter) /                (totalWordOcrnces+sumOfSmoothingParams));            }                    }               	//Normalizing weights        if(m_normalizeWordWeights==true)            for(int c=0; c<numClasses; c++) {                double sum=0;                for(int w=0; w<numAttributes; w++) {                    if(w!=classIndex)                        sum += Math.abs(wordWeights[c][w]);                }                for(int w=0; w<numAttributes; w++) {                    if(w!=classIndex) {                        wordWeights[c][w] = wordWeights[c][w]/sum;                    }                }            }    }        /**     * Classifies a given instance. <p>     *     * The classification rule is: <br>     *     MinC(forAllWords(ti*Wci)) <br>     *      where <br>     *         ti is the frequency of word i in the given instance <br>     *         Wci is the weight of word i in Class c. <p>     *     * For more information see section 4.4 of the paper mentioned above     * in the classifiers description.     *     * @param instance the instance to classify     * @return the index of the class the instance is most likely to belong.     * @throws Exception if the classifier has not been built yet.     */    public double classifyInstance(Instance instance) throws Exception {        if(wordWeights==null)            throw new Exception("Error. The classifier has not been built "+                                "properly.");                double [] valueForClass = new double[numClasses];	double sumOfClassValues=0;		for(int c=0; c<numClasses; c++) {	    double sumOfWordValues=0;	    for(int w=0; w<instance.numValues(); w++) {                if(instance.index(w)!=instance.classIndex()) {                    double freqOfWordInDoc = instance.valueSparse(w);                    sumOfWordValues += freqOfWordInDoc *                                   wordWeights[c][instance.index(w)];                }	    }	    //valueForClass[c] = Math.log(probOfClass[c]) - sumOfWordValues;	    valueForClass[c] = sumOfWordValues;	    sumOfClassValues += valueForClass[c];	}        int minidx=0;	for(int i=0; i<numClasses; i++)	    if(valueForClass[i]<valueForClass[minidx])		minidx = i;		return minidx;    }    /**     * Prints out the internal model built by the classifier. In this case     * it prints out the word weights calculated when building the classifier.     */    public String toString() {        if(wordWeights==null) {                        return "The classifier hasn't been built yet.";        }                int numAttributes = header.numAttributes();        StringBuffer result = new StringBuffer("The word weights for each class are: \n"+                                               "------------------------------------\n\t");                for(int c = 0; c<numClasses; c++)            result.append(header.classAttribute().value(c)).append("\t");                result.append("\n");                for(int w = 0; w<numAttributes; w++) {            result.append(header.attribute(w).name()).append("\t");            for(int c = 0; c<numClasses; c++)                result.append(Double.toString(wordWeights[c][w])).append("\t");            result.append("\n");        }                return result.toString();    }            /**     * Main method for testing this class.     *     * @param argv the options     */    public static void main(String [] argv) {      runClassifier(new ComplementNaiveBayes(), argv);    }        }
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -