⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lexerutils.java

📁 StandBayeMail
💻 JAVA
字号:
/** * <p>Title: StandBayeMail </p> * <p>Description: A bayesian spam filter</p> * <p>Copyright: Copyright (c) 2004 by Luca M. Viola</p> * <p>Company: 3AM.it</p> * @author Luca M. Viola <luca@3am.it> * @version 1.0  This program is free software; you can redistribute it and/or  modify it under the terms of the GNU General Public License  as published by the Free Software Foundation; either version 2  of the License, or (at your option) any later version.  This program is distributed in the hope that it will be useful,  but WITHOUT ANY WARRANTY; without even the implied warranty of  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the  GNU General Public License for more details.  You should have received a copy of the GNU General Public License  along with this program; if not, write to the Free Software  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.*/package StandBayeMail;import java.util.*;public class LexerUtils{  private static String g_htmltags[] =  {      "abbr","above","accesskey","acronym","align","alink","all","alt","applet","archive","axis",      "basefont","baseline","below","bgcolor","big","body","border","bottom","box","button",      "cellpadding","cellspacing","center","char","charoff","charset","circle","cite","class",      "classid","clear","codebase","codetype","color","cols","colspan","compact","content","coords",      "data","datetime","declare","default","defer","dfn","dir","disabled",      "face","font","frameborder","groups","head","headers","height","href","hreflang","hsides",      "hspace","http-equiv","iframe","img","input","ismap","justify","kbd","label","lang","language",      "left","lhs","link","longdesc","map","marginheight","marginwidth","media","meta","middle",      "multiple","name","nohref","none","noresize","noshade","nowrap","object","onblur","onchange",      "onclick","ondblclick","onfocus","onkeydown","onkeypress","onkeyup","onload","onmousedown",      "onmousemove","onmouseout","onmouseover","onmouseup","onselect","onunload","param","poly",      "profile","prompt","readonly","rect","rel","rev","rhs","right","rows","rowspan","rules","samp",      "scheme","scope","script","scrolling","select","selected","shape","size","small","span","src",      "standby","strike","strong","style","sub","summary","sup","tabindex","table","target","textarea",      "title","top","type","usemap","valign","value","valuetype","var","vlink","void","vsides","vspace",      "width"  };  private static String g_ignoredheaders[] =  {      "Date:","Delivery-date:","Message-ID:","X-RBL-Warning:","X-Sorted:","X-Spam-"  };  private static final int g_nhtmltags = g_htmltags.length;  private static final int g_nignoredheaders = g_ignoredheaders.length;  private static boolean sorted=false;  private static Comparator comp;  private synchronized static void sortArrays()  {    comp=new Comparator()    {      public int compare(Object o1, Object o2)      {        String s1=o1.toString();        String s2=o2.toString();        int l1=s1.length();        int l2=s2.length();        int l=Math.min(l1,l2);        s1=s1.substring(0,l);        s2=s2.substring(0,l);        return s1.compareToIgnoreCase(s2);      }      public boolean equals(Object obj)      {        return (obj==this);      }    };    Arrays.sort(g_htmltags,comp);    Arrays.sort(g_ignoredheaders,comp);    sorted=true;  }  private static boolean isMimeAlphaNum( char c )  {    if( (c>='a' && c<='z') || (c>='A' && c<='Z') || (c>='0' && c<='9') ) return true;    return false;  }  private static boolean isalnum( char c )  {    boolean ret=UnicodeCharacterUtils.getUtils().isAlphaNumeric(c);    return ret;  }  public static boolean isBase64Char( char c )  {      return ( isMimeAlphaNum(c) || (c == '/' || c == '+') );  }  public static boolean isBase64( String p )  {    int len=p.length();    for( int i=0; i<len; i++ )    {      char c=p.charAt(i);      if( c!='\n' && c!='\r' && !isBase64Char(c) )        return false;    }    return true;  }  public static boolean isWhiteSpace( char c )  {      return ( c == ' ' || c == '\t' || c == '\r' );  }  public static boolean isWordMidChar( char c)  {      return ( isalnum(c) || c == '$' || c == '\'' || c == '.' || c == '-' );  }  public static boolean isWordendChar( char c)  {      return ( isalnum(c) || c == '$' );  }  public static boolean isHtmlTag( String p )  {    if( !sorted ) sortArrays();    int pos=Arrays.binarySearch(g_htmltags,p,comp);    if( pos>=0 ) return true;    return false;  }  public static boolean isHtmlComment( String p )  {    int len=p.length();    if( len >=4 && p.startsWith( "<!--" ) )          return true;    if( len >= 3 && p.startsWith( "-->") )          return true;    return false;  }  public static boolean isMimeBoundary( String p )  {      int len=p.length();      if( len < 3 || p.charAt(0) != '-' || p.charAt(1) != '-' )      {          return false;      }      p=p.substring(2,len);      for( int i=2; i<p.length(); i++ )      {          char ch=p.charAt(i);          if( isWhiteSpace(ch) )          {              return false;          }          if( ch == '\n' || ch == '\r' )          {              break;          }      }      return true;  }  public static boolean isIpAddr( String p )  {    String [] octets=new String[4];    int n;    boolean ret=false;    try    {      StringTokenizer st=new StringTokenizer( p,"." );      for( int i=0; i<4; i++ )      {        octets[i]=st.nextToken();        n=Integer.parseInt(octets[i]);      }      ret=true;    }    catch( Exception e )    {      ret=false;    }    return ret;  }  public static boolean isWord( String p,String token )  {      int len=p.length();      if( len < 3 )      {          return false;      }      char ch=p.charAt(0);      if( !UnicodeCharacterUtils.getUtils().isAlphaNumeric(ch) || ch=='$' )          return false;      int ptoklen=1;      for( int i=1; i<len; i++ )      {          ch=p.charAt(i);          if( !isWordMidChar(ch) )              break;          ptoklen++;      }      while( ptoklen >= 3 && !isWordendChar(ch=p.charAt(len-1)) )      {          ptoklen--;          len--;      }      if( ptoklen < 3 )      {          return false;      }      return true;  }  public static boolean isIgnoredHeader( String p )  {    if( !sorted ) sortArrays();    int pos=Arrays.binarySearch(g_ignoredheaders,p,comp);    if( pos>=0 ) return true;    return false;  }  public static boolean isMailerId( String p )  {      int len=p.length();      if( len < 4 || !p.startsWith( "\tid ") )          return false;      return true;  }  public static boolean isSpamText( String p )  {      int len=p.length();      if( len < 5 || !p.startsWith("SPAM:" ) )          return false;      return true;  }  public static boolean isSmtpId( String p )  {      int len=p.length();      if( len < 8 || !p.startsWith("SMTP id ") )          return false;      return true;  }  public static boolean isBoundaryEqual( String p )  {      int len=p.length();      if( len < 9 || !p.startsWith("boundary=") )          return false;      return true;  }  public static boolean isNameEqual( String p )  {      int len=p.length();      if( len < 6 || !p.startsWith("name=\"") )          return false;      return true;  }  public static boolean isFileNameEqual( String p )  {      int len=p.length();      if( len < 10 || !p.startsWith("filename=\"" ) )          return false;      return true;  }  public static boolean isFrom( String p )  {      int len=p.length();      if( len < 5 || p.startsWith("From ") )          return false;      return true;  }  public static boolean isMboxHeader( String s  )  {    boolean ret;    ret=(s.startsWith("From ") && (s.indexOf("@")!=-1) );    return ret;  }  public static boolean isNumber( String p )  {    int n;    try    {      n=Integer.parseInt(p);      return true;    }    catch( NumberFormatException ne )    {    }    return false;  }  private static boolean isUnwanted(char ch)  {    if( ch=='$' ) return true;    if( ch=='/' ) return true;    if( ch=='\'' ) return true;    if( ch=='"' ) return true;    if( ch=='-' ) return true;    if( ch=='.' ) return true;    if( ch=='+' ) return true;    return false;  }  public static String trim(String s)  {    s=s.trim();    StringBuffer t=new StringBuffer(s);    int l=t.length();    for( int i=0; i<l ; i++ )    {      char ch=t.charAt(i);      if( isUnwanted(ch) ) t.replace(i,i+1," " );      else break;    }    for( int i=l-1; i>=0 ; i-- )    {      char ch=t.charAt(i);      if( isUnwanted(ch) ) t.replace(i,i+1," " );      else break;    }    return t.toString().trim();  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -