📄 complementnaivebayes.java
字号:
/**
* Generates the classifier.
*
* @param instances set of instances serving as training data
* @exception Exception if the classifier has not been built successfully
*/
public void buildClassifier(Instances instances) throws Exception {
numClasses = instances.numClasses();
int numAttributes = instances.numAttributes();
//First check that all attributes (except the class attribute) are
//numeric and that the class attribute in nominal
for(int c = 0; c<numClasses; c++) {
for(int idx = 0; idx<numAttributes; idx++) {
if(instances.classIndex() == idx) {
if(!instances.attribute(idx).isNominal())
throw new Exception("ComplementNaiveBayes cannot "+
"handle non-nominal class attribute");
}
else
if(!instances.attribute(idx).isNumeric())
throw new Exception("Attribute "+
instances.attribute(idx).name()+
" is not numeric! "+
"ComplementNaiveBayes requires "+
"that all attributes (except the "+
"class attribute) are numeric.");
}
}
header = new Instances(instances, 0);
double [][] ocrnceOfWordInClass = new double[numClasses][numAttributes];
wordWeights = new double[numClasses][numAttributes];
//double [] docsPerClass = new double[numClasses];
double[] wordsPerClass = new double[numClasses];
double totalWordOccurrences = 0;
double sumOfSmoothingParams = (numAttributes-1)*smoothingParameter;
int classIndex = instances.instance(0).classIndex();
Instance instance;
int docClass;
double numOccurrences;
java.util.Enumeration emInsts = instances.emerateInstances();
while (emInsts.hasMoreElements()) {
instance = (Instance) emInsts.nextElement();
docClass = (int)instance.value(classIndex);
//docsPerClass[docClass] += instance.weight();
for(int a = 0; a<instance.numValues(); a++)
if(instance.index(a) != instance.classIndex()) {
if(!instance.isMissing(a)) {
numOccurrences = instance.valueSparse(a) * instance.weight();
if(numOccurrences < 0)
throw new Exception("Numeric attribute"+
" values must all be greater"+
" or equal to zero.");
totalWordOccurrences += numOccurrences;
wordsPerClass[docClass] += numOccurrences;
ocrnceOfWordInClass[docClass]
[instance.index(a)] += numOccurrences;
//For the time being wordweights[0][i]
//will hold the total occurrence of word
// i over all classes
wordWeights[0]
[instance.index(a)] += numOccurrences;
}
}
}
//Calculating the complement class probability for all classes except 0
for(int c=1; c<numClasses; c++) {
//total occurrence of words in classes other than c
double totalWordOcrnces = totalWordOccurrences - wordsPerClass[c];
for(int w=0; w<numAttributes; w++) {
if(w != classIndex ) {
//occurrence of w in classes other that c
double ocrncesOfWord =
wordWeights[0][w] - ocrnceOfWordInClass[c][w];
wordWeights[c][w] =
Math.log((ocrncesOfWord+smoothingParameter) /
(totalWordOcrnces+sumOfSmoothingParams));
}
}
}
//Now calculating the complement class probability for class 0
for(int w=0; w<numAttributes; w++) {
if(w != classIndex) {
//occurrence of w in classes other that c
double ocrncesOfWord = wordWeights[0][w] - ocrnceOfWordInClass[0][w];
//total occurrence of words in classes other than c
double totalWordOcrnces = totalWordOccurrences - wordsPerClass[0];
wordWeights[0][w] =
Math.log((ocrncesOfWord+smoothingParameter) /
(totalWordOcrnces+sumOfSmoothingParams));
}
}
//Normalizing weights
if(m_normalizeWordWeights==true)
for(int c=0; c<numClasses; c++) {
double sum=0;
for(int w=0; w<numAttributes; w++) {
if(w!=classIndex)
sum += Math.abs(wordWeights[c][w]);
}
for(int w=0; w<numAttributes; w++) {
if(w!=classIndex) {
wordWeights[c][w] = wordWeights[c][w]/sum;
}
}
}
}
/**
* Classifies a given instance. <p>
*
* The classification rule is: <br>
* MinC(forAllWords(ti*Wci)) <br>
* where <br>
* ti is the frequency of word i in the given instance <br>
* Wci is the weight of word i in Class c. <p>
*
* For more information see section 4.4 of the paper mentioned above
* in the classifiers description.
*
* @return the index of the class the instance is most likely to belong.
* @exception if the classifier has not been built yet.
*/
public double classifyInstance(Instance instance) throws Exception {
if(wordWeights==null)
throw new Exception("Error. The classifier has not been built "+
"properly.");
double [] valueForClass = new double[numClasses];
double sumOfClassValues=0;
for(int c=0; c<numClasses; c++) {
double sumOfWordValues=0;
for(int w=0; w<instance.numValues(); w++) {
if(instance.index(w)!=instance.classIndex()) {
double freqOfWordInDoc = instance.valueSparse(w);
sumOfWordValues += freqOfWordInDoc *
wordWeights[c][instance.index(w)];
}
}
//valueForClass[c] = Math.log(probOfClass[c]) - sumOfWordValues;
valueForClass[c] = sumOfWordValues;
sumOfClassValues += valueForClass[c];
}
int minidx=0;
for(int i=0; i<numClasses; i++)
if(valueForClass[i]<valueForClass[minidx])
minidx = i;
return minidx;
}
/**
* Prints out the internal model built by the classifier. In this case
* it prints out the word weights calculated when building the classifier.
*/
public String toString() {
if(wordWeights==null) {
return "The classifier hasn't been built yet.";
}
int numAttributes = header.numAttributes();
StringBuffer result = new StringBuffer("The word weights for each class are: \n"+
"------------------------------------\n\t");
for(int c = 0; c<numClasses; c++)
result.append(header.classAttribute().value(c)).append("\t");
result.append("\n");
for(int w = 0; w<numAttributes; w++) {
result.append(header.attribute(w).name()).append("\t");
for(int c = 0; c<numClasses; c++)
result.append(Double.toString(wordWeights[c][w])).append("\t");
result.append("\n");
}
return result.toString();
}
/**
* Main method for testing this class.
*
* @param argv the options
*/
public static void main(String [] argv) {
try {
System.out.println(weka.classifiers.Evaluation.evaluateModel(new ComplementNaiveBayes(), argv));
} catch (Exception e) {
e.printStackTrace();
System.err.println(e.getMessage());
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -