⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regexpextractfilter.java

📁 java编写的OCR软件
💻 JAVA
字号:
package de.spieleck.app.jacson.filter;

import org.apache.oro.text.regex.*;

import de.spieleck.config.ConfigNode;
import de.spieleck.config.ConfigVerify.Acceptor;

import de.spieleck.app.jacson.JacsonException;
import de.spieleck.app.jacson.JacsonConfigException;
import de.spieleck.app.jacson.JacsonRegistry;
import de.spieleck.app.jacson.util.RegExpUtil;
import de.spieleck.app.jacson.util.ConfigUtil;

/**
 * Filter that extracts part of the incoming expression and
 * forwards the obtained groups as new chunks.
 * Configurable weather to block chunks that do not match at all.
 * @author fsn
 */
public class RegExpExtractFilter
    extends FilterBase
    implements Acceptor
{
    public final static String SEPARATOR_NODE = "separator";
    public final static String ONLYMATCHES_NODE = "onlymatches";

    /** oro-Regexp-Pattern */
    protected Pattern pattern;

    /** If set, let through only chunks which match the expression. */
    protected boolean onlymatches;

    protected String separator = " ";

    protected RegExpUtil rutil;

    public RegExpExtractFilter()
    {
    }

    public void init(ConfigNode config, JacsonRegistry registry)
        throws JacsonConfigException
    {
        rutil = registry.getRegExpUtil();
        pattern = rutil.obtainPattern("RegExpExtract", config, registry);
        separator = config.getString(SEPARATOR_NODE, null);
        onlymatches = config.getBoolean(ONLYMATCHES_NODE, false);
        ConfigUtil.verify(config, this);
    }

    public boolean accept(ConfigNode node)
    {
        String name = node.getName();
        return SEPARATOR_NODE.equals(name)
            || ONLYMATCHES_NODE.equals(name)
            || rutil.accept(node);
    }

    public void putChunk(String chunk)
        throws JacsonException
    {
        if ( chunk == null )
            drain.putChunk(chunk);
        else
        {
            PatternMatcher matcher = rutil.getMatcher();
            if ( matcher.contains(chunk, pattern) )
            {
                MatchResult match = matcher.getMatch();
                int n = match.groups();
                StringBuffer sb = new StringBuffer(chunk.length());
                for (int i = 1; i < n; i++)
                {
                    if ( i > 1 )
                        sb.append(separator);
                    sb.append(match.group(i));
                }
                drain.putChunk(sb.toString());
            }
            else if ( !onlymatches )
                drain.putChunk(chunk);
        }
    }
}
//
//    Jacson - Text Filtering with Java.
//    Copyright (C) 2002 Frank S. Nestel (nestefan -at- users.sourceforge.net)
//
//    This library is free software; you can redistribute it and/or
//    modify it under the terms of the GNU Lesser General Public
//    License as published by the Free Software Foundation; either
//    version 2.1 of the License, or (at your option) any later version.
//
//    This library is distributed in the hope that it will be useful,
//    but WITHOUT ANY WARRANTY; without even the implied warranty of
//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
//    Lesser General Public License for more details.
//
//    You should have received a copy of the GNU Lesser General Public
//    License along with this library; if not, write to the Free Software
//    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
//

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -