⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 websessionfilter.java

📁 java编写的OCR软件
💻 JAVA
字号:
package de.spieleck.app.jacson.filter;

import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.text.ParseException;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Date;

import org.apache.oro.text.regex.*;

import de.spieleck.config.ConfigNode;

import de.spieleck.app.jacson.JacsonException;
import de.spieleck.app.jacson.JacsonRegistry;
import de.spieleck.app.jacson.JacsonConfigException;
import de.spieleck.app.jacson.util.RegExpUtil;

/**
 * A hack to derive something like a session from a web log file by using
 * the well known ip=user session till timeout logic. This isn't working
 * great, but it is working. 
 * <br/>
 * <b>Note:</b> The implementation depends on the fact that timestamps
 * are monotonical increasing in the logfile. If this isn't the case we
 * are in real trouble.
 * <br/>
 * <b>Note:</b> The implementation currently cannot merge sessions between
 * multiple input streams (separated by null chunks)
 * <br/>
 * The plugin understands the following parameters:
 * <ul>
 * <li>timeout - the timeout to use to terminate sessions in Milliseconds. Optional, defaults to 1200000 = 20 minutes.</li>
 * <li>dateregexp - an regular expression to fetch the date/time date from a chunk. Required. </li>
 * <li>clientregexp - another regular expression to fetch the client from the chunk. Required. Common for Weblogfiles is to use either IP or IP+BrowserID, better er URL-Session-IDs if you have them.</li>
 * <li>All other options for RegExpUtil. These are shared by above regexps.</li>
 * <li>dateformat - Format for the datestring to be parsed in the java.text.SimpleDateFormat syntax. Defaults to yyyy-MM-dd HH:mm:ss</li>
 *
 * </ul>
 * @see de.spieleck.app.jacson.util.RegExpUtil
 * @author fsn
 */
public class WebSessionFilter
    extends ConstFilter
{
    public final static String TIMEOUT_NODE = "timeout";
    public final static String DATEREGEXP_NODE = "dateregexp";
    public final static String CLIENTREGEXP_NODE = "clientregexp";
    public final static String DATEFORMAT_NODE = "dateformat";

    public final static String DEFAULT_DATEFORMAT = "yyyy-MM-dd HH:mm:ss";

    public final static String WSF = "WebSessionFilter";

    /** Utility for regular expressions */
    protected RegExpUtil rutil;

    /** The timeout assumed for a session */
    protected int timeout;

    /** Date regular expression */
    protected Pattern datePattern;

    /** Date parse format */
    protected DateFormat dateFormat;

    /** ID regular expression */
    protected Pattern idPattern;

    /** Object caching for speedup (flyweight pattern) */
    protected long lastTime = -1;

    /** Object caching for speedup (flyweight pattern) */
    protected Long lastTimeLong = null;

    /** List of sessions observed so far, kept in access order. */
    LinkedHashMap sessions = new LinkedHashMap(1000, (float)0.75, true);

    public void init(ConfigNode node, JacsonRegistry registry)
        throws JacsonConfigException
    {
        rutil = registry.getRegExpUtil();
        timeout = node.getInt(TIMEOUT_NODE, 1200000);
        if ( timeout < 0 )
            throw new JacsonConfigException(WSF+" needs timeout > 0");
        datePattern = rutil.obtainPattern(WSF, node, registry,DATEREGEXP_NODE);
        idPattern = rutil.obtainPattern(WSF, node, registry, CLIENTREGEXP_NODE);
        try
        {
            String h = node.getString(DATEFORMAT_NODE, DEFAULT_DATEFORMAT);
            dateFormat = new SimpleDateFormat(h);
        }
        catch ( Exception e )
        {
            throw new JacsonConfigException("Cannot parse Format.", e);
        }
        super.init(node, registry);
    }

    public boolean accept(ConfigNode node)
    {
        String name = node.getName();
        return TIMEOUT_NODE.equals(name)
            || DATEREGEXP_NODE.equals(name)
            || CLIENTREGEXP_NODE.equals(name)
            || DATEFORMAT_NODE.equals(name)
            || rutil.acceptOptions(node);
    }

    public void putChunk(String chunk)
        throws JacsonException
    {
        if ( chunk == null )
        {
            flushSessions(Long.MAX_VALUE);
            drain.putChunk(null);
        }
        else
        {
            //PatternMatcher matcher = rutil.getMatcher();
            String date = matching(chunk, datePattern);
            if ( date == null )
                return;
            String id = matching(chunk, idPattern);
            if ( id == null )
                return;
            long time;
            try
            {
                Date d = dateFormat.parse(date);
                time = d.getTime();
            }
            catch(ParseException pe)
            {
                throw new JacsonException("Failed parsing date: <"+date+">",pe);
            }
            Long t; 
            if ( time == lastTime )
            {
                t = lastTimeLong;
            }
            else
            {
                lastTimeLong = t = new Long(time);
                lastTime = time;
                flushSessions(time - timeout);
            }
            sessions.put(id, t);
        }
    }

    /**
     * Clean up the stored sessions up to a certain point
     * in history.
     */
    protected void flushSessions(long limit)
        throws JacsonException
    {
        Iterator it = sessions.entrySet().iterator();
        while (it.hasNext())
        {
            Map.Entry e = (Map.Entry) it.next();
            Long l = (Long) e.getValue();
            if ( l.longValue() > limit )
                break;
            drain.putChunk((String) e.getKey());
            it.remove();
        }
    }

    /**
     * Convenience method to extract a string from a chunk via a match.
     */
    protected String matching(String chunk, Pattern p)
    {
        PatternMatcher matcher = rutil.getMatcher();
        if ( !matcher.contains(chunk, p) )
            return null;
        MatchResult match = matcher.getMatch();
        int n = match.groups();
        StringBuffer sb = new StringBuffer(100);
        for(int i = 1; i < n; i++)
        {
            if ( i > 1 )
                sb.append(' ');
            sb.append(match.group(i));
        }
        return sb.toString();
    }

}
//
//    Jacson - Text Filtering with Java.
//    Copyright (C) 2003 Frank S. Nestel (nestefan -at- users.sourceforge.net)
//
//    This library is free software; you can redistribute it and/or
//    modify it under the terms of the GNU Lesser General Public
//    License as published by the Free Software Foundation; either
//    version 2.1 of the License, or (at your option) any later version.
//
//    This library is distributed in the hope that it will be useful,
//    but WITHOUT ANY WARRANTY; without even the implied warranty of
//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
//    Lesser General Public License for more details.
//
//    You should have received a copy of the GNU Lesser General Public
//    License along with this library; if not, write to the Free Software
//    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
//

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -