📄 standbayemail.java
字号:
/** * <p>Title: StandBayeMail </p> * <p>Description: A bayesian spam filter</p> * <p>Copyright: Copyright (c) 2004 by Luca M. Viola</p> * <p>Company: 3AM.it</p> * @author Luca M. Viola <luca@3am.it> * @version 1.0 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */package StandBayeMail;import java.io.*;import oeimport.*;public class StandBayeMail implements Version{ WordCounterFile goodWordsCount=null; WordCounterFile spamWordsCount=null; WordCounterFile messageWordsCount=null; private static final char FILE_ACCESS_MODE=RandomAccessReader.MODE_UNBUFFERED; public StandBayeMail() {} public StandBayeMail( String action,String filename ) { if( goodWordsCount==null ) goodWordsCount=new WordCounterFile("."+File.separator+"goodlist.sps",false); if( spamWordsCount==null ) spamWordsCount=new WordCounterFile("."+File.separator+"spamlist.sps",false); if( action.equalsIgnoreCase("test") ) { messageWordsCount=new WordCounterFile(filename,false); Lexer lex=new Lexer(filename,Lexer.MAIL_TYPE_MAILBOX); while( lex.hasMoreElements() ) { String token=(String)lex.nextElement(); messageWordsCount.increment(token); } BayesianFilter bf=new BayesianFilter(); Statistics stat=new Statistics(); stat=bf.bmf(spamWordsCount,goodWordsCount,messageWordsCount,stat); for( int i=0; i<BayesianFilter.KEEPERS; i++ ) System.out.println("#"+i+": "+stat.extrema[i].key+" ("+stat.extrema[i].prob+")"); Double db=new Double(stat.spamicity); System.out.println("Spamicity: "+db.toString()); java.text.DecimalFormat df=new java.text.DecimalFormat("0.################################"); String d=df.format(stat.spamicity); System.out.println("Spamicity: "+d); return; } Lexer lex=new Lexer(filename,Lexer.MAIL_TYPE_MAILBOX); while( lex.hasMoreElements() ) { String token=(String)lex.nextElement(); if( action.equalsIgnoreCase("mail") ) goodWordsCount.increment(token); if( action.equalsIgnoreCase("spam") ) spamWordsCount.increment(token); } if( action.equalsIgnoreCase("mail") ) goodWordsCount.commit(lex.getMessageNum()); if( action.equalsIgnoreCase("spam") ) spamWordsCount.commit(lex.getMessageNum()); } public static char getFileMode() { return FILE_ACCESS_MODE; } public Statistics calculateSpamicity( String goodPath,String badPath,String email ) { if( goodWordsCount==null ) goodWordsCount=new WordCounterFile(goodPath,true); if( spamWordsCount==null ) spamWordsCount=new WordCounterFile(badPath,true); messageWordsCount=new WordCounterFile(); Lexer lex=new Lexer(email); while( lex.hasMoreElements() ) { String token=(String)lex.nextElement(); messageWordsCount.increment(token); } BayesianFilter bf=new BayesianFilter(); Statistics stat=new Statistics(); stat=bf.bmf(spamWordsCount,goodWordsCount,messageWordsCount,stat); // for( int i=0; i<BayesianFilter.KEEPERS; i++ ) // System.out.println("#"+i+": "+stat.extrema[i].key+" ("+stat.extrema[i].prob+")"); Double db=new Double(stat.spamicity); // System.out.println("Spamicity: "+db.toString()); java.text.DecimalFormat df=new java.text.DecimalFormat("0.################################"); String d=df.format(stat.spamicity); // System.out.println("Spamicity: "+d); return stat; } private static byte [] getFileHeader(String filename) { byte [] buf=new byte[4]; try { RandomAccessFile ras = new RandomAccessFile(filename, "r"); int r=ras.read(buf); ras.close(); } catch( IOException ioe ) { ioe.printStackTrace(); } return buf; } private static boolean checkMboxFormat(String filename) { String header=new String(getFileHeader(filename)); if( header.equals("From") ) return true; return false; } private static boolean checkDbxFormat(String filename) { byte [] header=getFileHeader(filename); int n0=(header[0] & 0xff); int n1=(header[1] & 0xff); int n2=(header[2] & 0xff); int n3=(header[3] & 0xff); if( n0!=0xcf ) return false; if( n1!=0xad ) return false; if( n2!=0x12 ) return false; if( n3!=0xfe ) return false; return true; } private static void usageAndExit() { System.err.println("StandBayeMail v"+version+" [build "+buildno+","+buildtime+"]"); System.err.println(""); System.err.println("Usage: StandBayeMail <mailbox|outlookexpress> <spam|mail|test> <mailboxfile>"); System.err.println(""); System.err.println("<mailbox|outlookexpress>"); System.err.println(" : specify if the mailbox file is a regular unix mbox or"); System.err.println(" : an outlook express dbx file."); System.err.println("<spam|mail|test>"); System.err.println(" : The switch \"spam\" or \"mail\" will add the <mailboxfile>'s words"); System.err.println(" : to either the good words' database or the spam words'."); System.err.println(" : The switch \"test\" will check the <mailboxfile> applying"); System.err.println(" : the bayesian filter."); System.err.println("<mailboxfile>"); System.err.println(" : specify the path to the mailbox file."); System.err.println(""); System.err.println("Examples:"); System.err.println(" java -jar StandBayeMail.jar mailbox mail c:\\mail\\in.mbx"); System.err.println(" java -jar StandBayeMail.jar mailbox test c:\\mail\\new.mbx"); System.err.println(" java -jar StandBayeMail.jar outlookexpress spam spam.dbx"); System.err.println(""); System.err.println("All parameters are mandatory."); System.err.println(""); System.exit(0); } public static void main( String [] args ) { int argc=args.length; if( argc!=3 ) usageAndExit(); String imptype=args[0]; if( !imptype.equalsIgnoreCase("mailbox") && !imptype.equalsIgnoreCase("outlookexpress") ) usageAndExit(); String action=args[1]; if( !action.equalsIgnoreCase("mail") && !action.equalsIgnoreCase("spam") && !action.equalsIgnoreCase("test") ) usageAndExit(); String filename=args[2]; File f=new File(filename); if( !f.exists() ) { System.err.println("The file "+filename+" does not exist."); System.exit(0); } if( imptype.equalsIgnoreCase("mailbox") ) { if( !checkMboxFormat(filename) ) { System.err.println("The file "+filename+" is not in mailbox format."); System.exit(0); } } if( imptype.equalsIgnoreCase("outlookexpress") ) { if( !checkDbxFormat(filename) ) { System.err.println("The file " + filename +" is not in the outlook express format."); System.exit(0); } else { dbxImport dbi = new dbxImport(filename, "tmp.$$$"); filename = "tmp.$$$"; } } new StandBayeMail(action,filename); f=new File("tmp.$$$"); if( f.exists() ) f.delete(); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -