⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 standbayemail.java

📁 StandBayeMail
💻 JAVA
字号:
/** * <p>Title: StandBayeMail </p> * <p>Description: A bayesian spam filter</p> * <p>Copyright: Copyright (c) 2004 by Luca M. Viola</p> * <p>Company: 3AM.it</p> * @author Luca M. Viola <luca@3am.it> * @version 1.0  This program is free software; you can redistribute it and/or  modify it under the terms of the GNU General Public License  as published by the Free Software Foundation; either version 2  of the License, or (at your option) any later version.  This program is distributed in the hope that it will be useful,  but WITHOUT ANY WARRANTY; without even the implied warranty of  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the  GNU General Public License for more details.  You should have received a copy of the GNU General Public License  along with this program; if not, write to the Free Software  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA. */package StandBayeMail;import java.io.*;import oeimport.*;public class StandBayeMail implements Version{  WordCounterFile goodWordsCount=null;  WordCounterFile spamWordsCount=null;  WordCounterFile messageWordsCount=null;  private static final char FILE_ACCESS_MODE=RandomAccessReader.MODE_UNBUFFERED;  public StandBayeMail() {}  public StandBayeMail( String action,String filename )  {    if( goodWordsCount==null )      goodWordsCount=new WordCounterFile("."+File.separator+"goodlist.sps",false);    if( spamWordsCount==null )      spamWordsCount=new WordCounterFile("."+File.separator+"spamlist.sps",false);    if( action.equalsIgnoreCase("test") )    {      messageWordsCount=new WordCounterFile(filename,false);      Lexer lex=new Lexer(filename,Lexer.MAIL_TYPE_MAILBOX);      while( lex.hasMoreElements() )      {        String token=(String)lex.nextElement();        messageWordsCount.increment(token);      }      BayesianFilter bf=new BayesianFilter();      Statistics stat=new Statistics();      stat=bf.bmf(spamWordsCount,goodWordsCount,messageWordsCount,stat);      for( int i=0; i<BayesianFilter.KEEPERS; i++ )        System.out.println("#"+i+": "+stat.extrema[i].key+" ("+stat.extrema[i].prob+")");      Double db=new Double(stat.spamicity);      System.out.println("Spamicity: "+db.toString());      java.text.DecimalFormat df=new java.text.DecimalFormat("0.################################");      String d=df.format(stat.spamicity);      System.out.println("Spamicity: "+d);      return;    }    Lexer lex=new Lexer(filename,Lexer.MAIL_TYPE_MAILBOX);    while( lex.hasMoreElements() )    {      String token=(String)lex.nextElement();      if( action.equalsIgnoreCase("mail") )          goodWordsCount.increment(token);      if( action.equalsIgnoreCase("spam") )        spamWordsCount.increment(token);    }    if( action.equalsIgnoreCase("mail") )        goodWordsCount.commit(lex.getMessageNum());    if( action.equalsIgnoreCase("spam") )      spamWordsCount.commit(lex.getMessageNum());  }  public static char getFileMode()  {    return FILE_ACCESS_MODE;  }  public Statistics calculateSpamicity( String goodPath,String badPath,String email )  {    if( goodWordsCount==null )      goodWordsCount=new WordCounterFile(goodPath,true);    if( spamWordsCount==null )      spamWordsCount=new WordCounterFile(badPath,true);    messageWordsCount=new WordCounterFile();    Lexer lex=new Lexer(email);    while( lex.hasMoreElements() )    {      String token=(String)lex.nextElement();      messageWordsCount.increment(token);    }    BayesianFilter bf=new BayesianFilter();    Statistics stat=new Statistics();    stat=bf.bmf(spamWordsCount,goodWordsCount,messageWordsCount,stat); //   for( int i=0; i<BayesianFilter.KEEPERS; i++ )  //    System.out.println("#"+i+": "+stat.extrema[i].key+" ("+stat.extrema[i].prob+")");    Double db=new Double(stat.spamicity);  // System.out.println("Spamicity: "+db.toString());   java.text.DecimalFormat df=new java.text.DecimalFormat("0.################################");    String d=df.format(stat.spamicity);  //  System.out.println("Spamicity: "+d);    return stat;  }  private static byte [] getFileHeader(String filename)  {    byte [] buf=new byte[4];    try    {      RandomAccessFile ras = new RandomAccessFile(filename, "r");      int r=ras.read(buf);      ras.close();    }    catch( IOException ioe )    {      ioe.printStackTrace();    }    return buf;  }  private static boolean checkMboxFormat(String filename)  {    String header=new String(getFileHeader(filename));    if( header.equals("From") ) return true;    return false;  }  private static boolean checkDbxFormat(String filename)  {    byte [] header=getFileHeader(filename);    int n0=(header[0] & 0xff);    int n1=(header[1] & 0xff);    int n2=(header[2] & 0xff);    int n3=(header[3] & 0xff);    if( n0!=0xcf ) return false;    if( n1!=0xad ) return false;    if( n2!=0x12 ) return false;    if( n3!=0xfe ) return false;    return true;  }  private static void usageAndExit()  {    System.err.println("StandBayeMail v"+version+" [build "+buildno+","+buildtime+"]");    System.err.println("");    System.err.println("Usage: StandBayeMail <mailbox|outlookexpress> <spam|mail|test> <mailboxfile>");    System.err.println("");    System.err.println("<mailbox|outlookexpress>");    System.err.println("    : specify if the mailbox file is a regular unix mbox or");    System.err.println("    : an outlook express dbx file.");    System.err.println("<spam|mail|test>");    System.err.println("    : The switch \"spam\" or \"mail\" will add the <mailboxfile>'s words");    System.err.println("    : to either the good words' database or the spam words'.");    System.err.println("    : The switch \"test\" will check the <mailboxfile> applying");    System.err.println("    : the bayesian filter.");    System.err.println("<mailboxfile>");    System.err.println("    : specify the path to the mailbox file.");    System.err.println("");    System.err.println("Examples:");    System.err.println("  java -jar StandBayeMail.jar mailbox mail c:\\mail\\in.mbx");    System.err.println("  java -jar StandBayeMail.jar mailbox test c:\\mail\\new.mbx");    System.err.println("  java -jar StandBayeMail.jar outlookexpress spam spam.dbx");    System.err.println("");    System.err.println("All parameters are mandatory.");    System.err.println("");    System.exit(0);  }  public static void main( String [] args )  {    int argc=args.length;    if( argc!=3 )      usageAndExit();    String imptype=args[0];    if( !imptype.equalsIgnoreCase("mailbox") &&        !imptype.equalsIgnoreCase("outlookexpress") ) usageAndExit();    String action=args[1];    if( !action.equalsIgnoreCase("mail") &&        !action.equalsIgnoreCase("spam") &&        !action.equalsIgnoreCase("test") ) usageAndExit();    String filename=args[2];    File f=new File(filename);    if( !f.exists() )    {      System.err.println("The file "+filename+" does not exist.");      System.exit(0);    }    if( imptype.equalsIgnoreCase("mailbox") )    {      if( !checkMboxFormat(filename) )      {        System.err.println("The file "+filename+" is not in mailbox format.");        System.exit(0);      }    }    if( imptype.equalsIgnoreCase("outlookexpress") )    {      if( !checkDbxFormat(filename) )      {        System.err.println("The file " + filename +" is not in the outlook express format.");        System.exit(0);      }      else      {        dbxImport dbi = new dbxImport(filename, "tmp.$$$");        filename = "tmp.$$$";      }    }    new StandBayeMail(action,filename);    f=new File("tmp.$$$");    if( f.exists() )      f.delete();  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -