📄 lexerutils.java
字号:
/** * <p>Title: StandBayeMail </p> * <p>Description: A bayesian spam filter</p> * <p>Copyright: Copyright (c) 2004 by Luca M. Viola</p> * <p>Company: 3AM.it</p> * @author Luca M. Viola <luca@3am.it> * @version 1.0 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.*/package StandBayeMail;import java.util.*;public class LexerUtils{ private static String g_htmltags[] = { "abbr","above","accesskey","acronym","align","alink","all","alt","applet","archive","axis", "basefont","baseline","below","bgcolor","big","body","border","bottom","box","button", "cellpadding","cellspacing","center","char","charoff","charset","circle","cite","class", "classid","clear","codebase","codetype","color","cols","colspan","compact","content","coords", "data","datetime","declare","default","defer","dfn","dir","disabled", "face","font","frameborder","groups","head","headers","height","href","hreflang","hsides", "hspace","http-equiv","iframe","img","input","ismap","justify","kbd","label","lang","language", "left","lhs","link","longdesc","map","marginheight","marginwidth","media","meta","middle", "multiple","name","nohref","none","noresize","noshade","nowrap","object","onblur","onchange", "onclick","ondblclick","onfocus","onkeydown","onkeypress","onkeyup","onload","onmousedown", "onmousemove","onmouseout","onmouseover","onmouseup","onselect","onunload","param","poly", "profile","prompt","readonly","rect","rel","rev","rhs","right","rows","rowspan","rules","samp", "scheme","scope","script","scrolling","select","selected","shape","size","small","span","src", "standby","strike","strong","style","sub","summary","sup","tabindex","table","target","textarea", "title","top","type","usemap","valign","value","valuetype","var","vlink","void","vsides","vspace", "width" }; private static String g_ignoredheaders[] = { "Date:","Delivery-date:","Message-ID:","X-RBL-Warning:","X-Sorted:","X-Spam-" }; private static final int g_nhtmltags = g_htmltags.length; private static final int g_nignoredheaders = g_ignoredheaders.length; private static boolean sorted=false; private static Comparator comp; private synchronized static void sortArrays() { comp=new Comparator() { public int compare(Object o1, Object o2) { String s1=o1.toString(); String s2=o2.toString(); int l1=s1.length(); int l2=s2.length(); int l=Math.min(l1,l2); s1=s1.substring(0,l); s2=s2.substring(0,l); return s1.compareToIgnoreCase(s2); } public boolean equals(Object obj) { return (obj==this); } }; Arrays.sort(g_htmltags,comp); Arrays.sort(g_ignoredheaders,comp); sorted=true; } private static boolean isMimeAlphaNum( char c ) { if( (c>='a' && c<='z') || (c>='A' && c<='Z') || (c>='0' && c<='9') ) return true; return false; } private static boolean isalnum( char c ) { boolean ret=UnicodeCharacterUtils.getUtils().isAlphaNumeric(c); return ret; } public static boolean isBase64Char( char c ) { return ( isMimeAlphaNum(c) || (c == '/' || c == '+') ); } public static boolean isBase64( String p ) { int len=p.length(); for( int i=0; i<len; i++ ) { char c=p.charAt(i); if( c!='\n' && c!='\r' && !isBase64Char(c) ) return false; } return true; } public static boolean isWhiteSpace( char c ) { return ( c == ' ' || c == '\t' || c == '\r' ); } public static boolean isWordMidChar( char c) { return ( isalnum(c) || c == '$' || c == '\'' || c == '.' || c == '-' ); } public static boolean isWordendChar( char c) { return ( isalnum(c) || c == '$' ); } public static boolean isHtmlTag( String p ) { if( !sorted ) sortArrays(); int pos=Arrays.binarySearch(g_htmltags,p,comp); if( pos>=0 ) return true; return false; } public static boolean isHtmlComment( String p ) { int len=p.length(); if( len >=4 && p.startsWith( "<!--" ) ) return true; if( len >= 3 && p.startsWith( "-->") ) return true; return false; } public static boolean isMimeBoundary( String p ) { int len=p.length(); if( len < 3 || p.charAt(0) != '-' || p.charAt(1) != '-' ) { return false; } p=p.substring(2,len); for( int i=2; i<p.length(); i++ ) { char ch=p.charAt(i); if( isWhiteSpace(ch) ) { return false; } if( ch == '\n' || ch == '\r' ) { break; } } return true; } public static boolean isIpAddr( String p ) { String [] octets=new String[4]; int n; boolean ret=false; try { StringTokenizer st=new StringTokenizer( p,"." ); for( int i=0; i<4; i++ ) { octets[i]=st.nextToken(); n=Integer.parseInt(octets[i]); } ret=true; } catch( Exception e ) { ret=false; } return ret; } public static boolean isWord( String p,String token ) { int len=p.length(); if( len < 3 ) { return false; } char ch=p.charAt(0); if( !UnicodeCharacterUtils.getUtils().isAlphaNumeric(ch) || ch=='$' ) return false; int ptoklen=1; for( int i=1; i<len; i++ ) { ch=p.charAt(i); if( !isWordMidChar(ch) ) break; ptoklen++; } while( ptoklen >= 3 && !isWordendChar(ch=p.charAt(len-1)) ) { ptoklen--; len--; } if( ptoklen < 3 ) { return false; } return true; } public static boolean isIgnoredHeader( String p ) { if( !sorted ) sortArrays(); int pos=Arrays.binarySearch(g_ignoredheaders,p,comp); if( pos>=0 ) return true; return false; } public static boolean isMailerId( String p ) { int len=p.length(); if( len < 4 || !p.startsWith( "\tid ") ) return false; return true; } public static boolean isSpamText( String p ) { int len=p.length(); if( len < 5 || !p.startsWith("SPAM:" ) ) return false; return true; } public static boolean isSmtpId( String p ) { int len=p.length(); if( len < 8 || !p.startsWith("SMTP id ") ) return false; return true; } public static boolean isBoundaryEqual( String p ) { int len=p.length(); if( len < 9 || !p.startsWith("boundary=") ) return false; return true; } public static boolean isNameEqual( String p ) { int len=p.length(); if( len < 6 || !p.startsWith("name=\"") ) return false; return true; } public static boolean isFileNameEqual( String p ) { int len=p.length(); if( len < 10 || !p.startsWith("filename=\"" ) ) return false; return true; } public static boolean isFrom( String p ) { int len=p.length(); if( len < 5 || p.startsWith("From ") ) return false; return true; } public static boolean isMboxHeader( String s ) { boolean ret; ret=(s.startsWith("From ") && (s.indexOf("@")!=-1) ); return ret; } public static boolean isNumber( String p ) { int n; try { n=Integer.parseInt(p); return true; } catch( NumberFormatException ne ) { } return false; } private static boolean isUnwanted(char ch) { if( ch=='$' ) return true; if( ch=='/' ) return true; if( ch=='\'' ) return true; if( ch=='"' ) return true; if( ch=='-' ) return true; if( ch=='.' ) return true; if( ch=='+' ) return true; return false; } public static String trim(String s) { s=s.trim(); StringBuffer t=new StringBuffer(s); int l=t.length(); for( int i=0; i<l ; i++ ) { char ch=t.charAt(i); if( isUnwanted(ch) ) t.replace(i,i+1," " ); else break; } for( int i=l-1; i>=0 ; i-- ) { char ch=t.charAt(i); if( isUnwanted(ch) ) t.replace(i,i+1," " ); else break; } return t.toString().trim(); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -