📄 bayesianspamchecker.java
字号:
import java.util.*;public class BayesianSpamChecker { public BayesianSpamChecker(Hashtable wordProbabilities) { this.wordProbabilities = wordProbabilities; } public float probabilityOfSpam(String text) throws Exception { Vector words = Tokenizer.getTokens(text); double probs = 1, probs_minus_one = 1; HashSet set = new HashSet(); for (int i = 0; i < 15; i++) { interesting[i] = -1; wrds[i] = ""; } // get unique words: for (int i = 0, size = words.size(); i < size; i++) { set.add(words.get(i)); } Iterator iter = set.iterator(); while (iter.hasNext()) { String w = (String) iter.next(); float f = 0.5f; Float ff = (Float) wordProbabilities.get(w); if (ff != null) { f = ff.floatValue(); } float interest = Math.abs(f - 0.5f); // see if this word is more interesting than on of the 15: // first, find the smallest value to replace (if it is smaller than the // value in 'interest': float minVal = 9999; int minIndex = 0; for (int i = 0; i < 15; i++) { if (minVal > interesting[i]) { minVal = interesting[i]; minIndex = i; } } //System.out.println("w="+w+", minVal="+minVal+", minIndex="+minIndex); if (interest > minVal) { interesting[minIndex] = interest; wrds[minIndex] = w; } } for (int i = 0; i < 15; i++) { if (interesting[i] < 0) continue; Float ff = (Float) wordProbabilities.get(wrds[i]); if (ff != null) { float f = ff.floatValue(); //System.out.println("word: " + wrds[i] + " interest: " + interesting[i] + " spam prob: " + f); probs *= f; probs_minus_one *= (1 - f); } } //if (Math.abs(f - 0.5f) > 0.05f) { // probs *= f; // probs_minus_one *= (1 - f); //} return (float) (probs / (probs + probs_minus_one)); } private Hashtable wordProbabilities; float[] interesting = new float[15]; String[] wrds = new String[15];}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -