📄 highlightfilter.java
字号:
/* ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2004 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache" and "Apache Software Foundation" and * "Apache Lucene" must not be used to endorse or promote products * derived from this software without prior written permission. For * written permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache", * "Apache Lucene", nor may "Apache" appear in their name, without * prior written permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. * * $Id: HighlightFilter.java,v 1.2 2004/05/29 20:24:33 chedong Exp $ */package org.apache.lucene.analysis;import java.io.IOException;import java.util.ArrayList;/** * HighLightFilter is a reverse StopFilter:<br> * * <ul> * <li> * StopFilter removed tokens from token stream according to stop words table; * </li> * <li> * HighlightFilter only allow words return from the input TokenStream that are * named in the array of words. * </li> * </ul> * * * @author Che, Dong */public final class HighlightFilter extends TokenFilter { //~ Instance fields -------------------------------------------------------- /** token buffer */ private StringBuffer tokenTextBuffer = new StringBuffer(); /** current token start offset */ private int currentStart = 0; /** current token end offset */ private int currentEnd = 0; /** return words list */ private ArrayList wordList = new ArrayList(); //~ Constructors ----------------------------------------------------------- /** * Constructs a filter which allow words return from the input TokenStream * that are named in the array of words. * * @param in input token stream * @param stopWords stop words array */ public HighlightFilter(TokenStream in, String[] stopWords) { super(in); wordList = makeStopTable(stopWords); } /** * Constructs a filter which removes words from the input TokenStream that * are named in the ArrayList. * * @param in input token stream * @param words return word list */ public HighlightFilter(TokenStream in, ArrayList words) { super(in); wordList = words; } //~ Methods ---------------------------------------------------------------- /** * Builds a ArrayList from an array of stop words, appropriate for passing * into the StopFilter constructor. This permits this table construction * to be cached once when an Analyzer is constructed. * * @param wordList word list in String[] * * @return ArrayList word List */ public static final ArrayList makeStopTable(String[] wordList) { ArrayList stopTable = new ArrayList(wordList.length); for (int i = 0; i < wordList.length; i++) { stopTable.add(wordList[i]); } return stopTable; } /** * return merged token: C1C2 C2C3 C3C4 will be merge to C1C2C3C4 to return * * @return Token Returns the next input Token whose termText() is named in * word list. * * @throws IOException ioexception */ public final Token next() throws IOException { // return the first non-stop word for (Token token = input.next(); token != null; token = input.next()) { //get first match token if (tokenTextBuffer.length() == 0) { if (wordList.indexOf(token.termText()) != -1) { create(token); } } else { /** * find next token: <br> * if current token overlaped with previous token: C1C2 C2C3 * C2 with same C2 <br> * then merged with cached privious token */ if (wordList.indexOf(token.termText()) != -1) { if (token.startOffset() > (currentEnd)) { Token returnToken = new Token(tokenTextBuffer.toString(), currentStart, currentEnd ); create(token); return returnToken; } else { //merge with previous neighbor token append(token); } } } } //return cached string buffer as on token if (tokenTextBuffer.length() > 0) { Token tk = new Token(tokenTextBuffer.toString(), currentStart, currentEnd ); //empty token text buffer tokenTextBuffer = new StringBuffer(); return tk; } else { // reached EOS -- return null return null; } } /** * append token cache: merge duplicate part<br> * example: C1C2 C2C3 ==>C1C2C3 * * @param t token */ private final void append(Token t) { String appendText = t.termText(); int start = currentEnd - t.startOffset(); appendText = appendText.substring(start, appendText.length()); tokenTextBuffer.append(appendText); currentEnd = t.endOffset(); } /** * create new token cache * * @param t input token */ private final void create(Token t) { tokenTextBuffer = new StringBuffer(); tokenTextBuffer.append(t.termText()); currentStart = t.startOffset(); currentEnd = t.endOffset(); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -