📄 spamfilter.java
字号:
/* JSPWiki - a JSP-based WikiWiki clone. Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */package com.ecyrd.jspwiki.filters;import java.io.*;import java.util.*;import javax.servlet.http.HttpServletRequest;import javax.servlet.http.HttpServletResponse;import javax.servlet.jsp.PageContext;import net.sf.akismet.Akismet;import org.apache.commons.jrcs.diff.*;import org.apache.commons.jrcs.diff.myers.MyersDiff;import org.apache.commons.lang.time.StopWatch;import org.apache.log4j.Logger;import org.apache.oro.text.regex.*;import com.ecyrd.jspwiki.*;import com.ecyrd.jspwiki.attachment.Attachment;import com.ecyrd.jspwiki.auth.user.UserProfile;import com.ecyrd.jspwiki.providers.ProviderException;import com.ecyrd.jspwiki.ui.EditorManager;/** * This is Herb, the JSPWiki spamfilter that can also do choke modifications. * * Parameters: * <ul> * <li>wordlist - Page name where the regexps are found. Use [{SET spamwords='regexp list separated with spaces'}] on * that page. Default is "SpamFilterWordList". * <li>blacklist - The name of an attachment containing the list of spam patterns, one per line. Default is * "SpamFilterWordList/blacklist.txt"</li> * <li>errorpage - The page to which the user is redirected. Has a special variable $msg which states the reason. Default is "RejectedMessage". * <li>pagechangesinminute - How many page changes are allowed/minute. Default is 5.</li> * <li>similarchanges - How many similar page changes are allowed before the host is banned. Default is 2. (since 2.4.72)</li> * <li>bantime - How long an IP address stays on the temporary ban list (default is 60 for 60 minutes).</li> * <li>maxurls - How many URLs can be added to the page before it is considered spam (default is 5)</li> * <li>akismet-apikey - The Akismet API key (see akismet.org)</li> * <li>ignoreauthenticated - If set to "true", all authenticated users are ignored and never caught in SpamFilter</li> * <li>captcha - Sets the captcha technology to use. Current allowed values are "none" and "asirra".</li> * <li>strategy - Sets the filtering strategy to use. If set to "eager", will stop at the first probable * match, and won't consider any other tests. This is the default, as it's considerably lighter. If set to "score", will go through all of the tests * and calculates a score for the spam, which is then compared to a filter level value. * </ul> * * <p>Please see the default editors/plain.jsp for examples on how the SpamFilter integrates * with the editor system.</p> * * <p>Changes by admin users are ignored in any case.</p> * * @since 2.1.112 */public class SpamFilter extends BasicPageFilter{ private static final String ATTR_SPAMFILTER_SCORE = "spamfilter.score"; private static final String REASON_REGEXP = "Regexp"; private static final String REASON_IP_BANNED_TEMPORARILY = "IPBannedTemporarily"; private static final String REASON_BOT_TRAP = "BotTrap"; private static final String REASON_AKISMET = "Akismet"; private static final String REASON_TOO_MANY_URLS = "TooManyUrls"; private static final String REASON_SIMILAR_MODIFICATIONS = "SimilarModifications"; private static final String REASON_TOO_MANY_MODIFICATIONS = "TooManyModifications"; private static final String REASON_UTF8_TRAP = "UTF8Trap"; private static final String LISTVAR = "spamwords"; /** The filter property name for specifying the page which contains the list of spamwords. * Value is <tt>{@value}</tt>. */ public static final String PROP_WORDLIST = "wordlist"; /** The filter property name for the page to which you are directed if Herb rejects your * edit. Value is <tt>{@value}</tt>. */ public static final String PROP_ERRORPAGE = "errorpage"; /** The filter property name for specifying how many changes is any given IP address * allowed to do per minute. Value is <tt>{@value}</tt>. */ public static final String PROP_PAGECHANGES = "pagechangesinminute"; /** The filter property name for specifying how many similar changes are allowed * before a host is banned. Value is <tt>{@value}</tt>. */ public static final String PROP_SIMILARCHANGES = "similarchanges"; /** The filter property name for specifying how long a host is banned. Value is <tt>{@value}</tt>.*/ public static final String PROP_BANTIME = "bantime"; /** The filter property name for the attachment containing the blacklist. Value is <tt>{@value}</tt>.*/ public static final String PROP_BLACKLIST = "blacklist"; /** The filter property name for specifying how many URLs can any given edit contain. * Value is <tt>{@value}</tt> */ public static final String PROP_MAXURLS = "maxurls"; /** The filter property name for specifying the Akismet API-key. Value is <tt>{@value}</tt>. */ public static final String PROP_AKISMET_API_KEY = "akismet-apikey"; /** The filter property name for specifying whether authenticated users should be ignored. Value is <tt>{@value}</tt>. */ public static final String PROP_IGNORE_AUTHENTICATED = "ignoreauthenticated"; /** The filter property name for specifying which captcha technology should be used. Value is <tt>{@value}</tt>. */ public static final String PROP_CAPTCHA = "captcha"; /** The filter property name for specifying which filter strategy should be used. Value is <tt>{@value}</tt>. */ public static final String PROP_FILTERSTRATEGY = "strategy"; /** The string specifying the "eager" strategy. Value is <tt>{@value}</tt>. */ public static final String STRATEGY_EAGER = "eager"; /** The string specifying the "score" strategy. Value is <tt>{@value}</tt>. */ public static final String STRATEGY_SCORE = "score"; private static final String URL_REGEXP = "(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;]+)"; private String m_forbiddenWordsPage = "SpamFilterWordList"; private String m_errorPage = "RejectedMessage"; private String m_blacklist = "SpamFilterWordList/blacklist.txt"; private PatternMatcher m_matcher = new Perl5Matcher(); private PatternCompiler m_compiler = new Perl5Compiler(); private Collection<Pattern> m_spamPatterns = null; private Date m_lastRebuild = new Date( 0L ); private static Logger c_spamlog = Logger.getLogger( "SpamLog" ); private static Logger log = Logger.getLogger( SpamFilter.class ); private Vector<Host> m_temporaryBanList = new Vector<Host>(); private int m_banTime = 60; // minutes private Vector<Host> m_lastModifications = new Vector<Host>(); /** * How many times a single IP address can change a page per minute? */ private int m_limitSinglePageChanges = 5; /** * How many times can you add the exact same string to a page? */ private int m_limitSimilarChanges = 2; /** * How many URLs can be added at maximum. */ private int m_maxUrls = 10; private Pattern m_urlPattern; private Akismet m_akismet; private String m_akismetAPIKey = null; private boolean m_useCaptcha = false; /** The limit at which we consider something to be spam. */ private int m_scoreLimit = 1; /** * If set to true, will ignore anyone who is in Authenticated role. */ private boolean m_ignoreAuthenticated = false; private boolean m_stopAtFirstMatch = true; private static String c_hashName; private static long c_lastUpdate; /** The HASH_DELAY value is a maximum amount of time that an user can keep * a session open, because after the value has expired, we will invent a new * hash field name. By default this is {@value} hours, which should be ample * time for someone. */ private static final long HASH_DELAY = 24; /** * {@inheritDoc} */ @Override public void initialize( WikiEngine engine, Properties properties ) { m_forbiddenWordsPage = properties.getProperty( PROP_WORDLIST, m_forbiddenWordsPage ); m_errorPage = properties.getProperty( PROP_ERRORPAGE, m_errorPage ); m_limitSinglePageChanges = TextUtil.getIntegerProperty( properties, PROP_PAGECHANGES, m_limitSinglePageChanges ); m_limitSimilarChanges = TextUtil.getIntegerProperty( properties, PROP_SIMILARCHANGES, m_limitSimilarChanges ); m_maxUrls = TextUtil.getIntegerProperty( properties, PROP_MAXURLS, m_maxUrls ); m_banTime = TextUtil.getIntegerProperty( properties, PROP_BANTIME, m_banTime ); m_blacklist = properties.getProperty( PROP_BLACKLIST, m_blacklist ); m_ignoreAuthenticated = TextUtil.getBooleanProperty( properties, PROP_IGNORE_AUTHENTICATED, m_ignoreAuthenticated ); m_useCaptcha = properties.getProperty( PROP_CAPTCHA, "" ).equals("asirra"); try { m_urlPattern = m_compiler.compile( URL_REGEXP ); } catch( MalformedPatternException e ) { log.fatal("Internal error: Someone put in a faulty pattern.",e); throw new InternalWikiException("Faulty pattern."); } m_akismetAPIKey = TextUtil.getStringProperty( properties, PROP_AKISMET_API_KEY, m_akismetAPIKey ); m_stopAtFirstMatch = TextUtil.getStringProperty( properties, PROP_FILTERSTRATEGY, STRATEGY_EAGER ).equals(STRATEGY_EAGER); log.info("# Spam filter initialized. Temporary ban time "+m_banTime+ " mins, max page changes/minute: "+m_limitSinglePageChanges ); } private static final int REJECT = 0; private static final int ACCEPT = 1; private static final int NOTE = 2; private static String log( WikiContext ctx, int type, String source, String message ) { message = TextUtil.replaceString( message, "\r\n", "\\r\\n" ); message = TextUtil.replaceString( message, "\"", "\\\"" ); String uid = getUniqueID(); String page = ctx.getPage().getName(); String reason = "UNKNOWN"; String addr = ctx.getHttpRequest() != null ? ctx.getHttpRequest().getRemoteAddr() : "-"; switch( type ) { case REJECT: reason = "REJECTED"; break; case ACCEPT: reason = "ACCEPTED"; break; case NOTE: reason = "NOTE"; break; default: throw new InternalWikiException("Illegal type "+type); } c_spamlog.info( reason+" "+source+" "+uid+" "+addr+" \""+page+"\" "+message ); return uid; } /** {@inheritDoc} */ public String preSave( WikiContext context, String content ) throws RedirectException { cleanBanList(); refreshBlacklists(context); Change change = getChange( context, content ); if(!ignoreThisUser(context)) { checkBanList( context, change ); checkSinglePageChange( context, content, change ); checkPatternList(context, content, change); } if( !m_stopAtFirstMatch ) { Integer score = (Integer)context.getVariable(ATTR_SPAMFILTER_SCORE); if( score != null && score.intValue() >= m_scoreLimit ) { throw new RedirectException( "Herb says you got too many points", getRedirectPage(context) ); } } log( context, ACCEPT, "-", change.toString() ); return content; } private void checkStrategy( WikiContext context, String error, String message ) throws RedirectException { if( m_stopAtFirstMatch ) { throw new RedirectException( message, getRedirectPage(context) ); } Integer score = (Integer)context.getVariable( ATTR_SPAMFILTER_SCORE ); if( score != null ) score = score+1; else score = 1; context.setVariable( ATTR_SPAMFILTER_SCORE, score ); } /** * Parses a list of patterns and returns a Collection of compiled Pattern * objects. * * @param source * @param list * @return A Collection of the Patterns that were found from the lists. */ private Collection<Pattern> parseWordList( WikiPage source, String list ) { ArrayList<Pattern> compiledpatterns = new ArrayList<Pattern>(); if( list != null ) { StringTokenizer tok = new StringTokenizer( list, " \t\n" ); while( tok.hasMoreTokens() ) { String pattern = tok.nextToken(); try { compiledpatterns.add( m_compiler.compile( pattern ) ); } catch( MalformedPatternException e ) { log.debug( "Malformed spam filter pattern "+pattern ); source.setAttribute("error", "Malformed spam filter pattern "+pattern); } } } return compiledpatterns; } /** * Takes a MT-Blacklist -formatted blacklist and returns a list of compiled * Pattern objects. * * @param list * @return The parsed blacklist patterns. */ private Collection<Pattern> parseBlacklist( String list ) { ArrayList<Pattern> compiledpatterns = new ArrayList<Pattern>(); if( list != null ) { try { BufferedReader in = new BufferedReader( new StringReader(list) ); String line; while( (line = in.readLine()) != null ) { line = line.trim(); if( line.length() == 0 ) continue; // Empty line if( line.startsWith("#") ) continue; // It's a comment int ws = line.indexOf(' '); if( ws == -1 ) ws = line.indexOf('\t'); if( ws != -1 ) line = line.substring(0,ws); try {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -