⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 synonymtokenfilter.java

📁 lucene2.2.0版本
💻 JAVA
字号:
package org.apache.lucene.index.memory;/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements.  See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License.  You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import java.io.IOException;import org.apache.lucene.analysis.Token;import org.apache.lucene.analysis.TokenFilter;import org.apache.lucene.analysis.TokenStream;/** * Injects additional tokens for synonyms of token terms fetched from the * underlying child stream; the child stream must deliver lowercase tokens * for synonyms to be found. *  * @author whoschek.AT.lbl.DOT.gov */public class SynonymTokenFilter extends TokenFilter {      /** The Token.type used to indicate a synonym to higher level filters. */  public static final String SYNONYM_TOKEN_TYPE = "SYNONYM";  private final SynonymMap synonyms;  private final int maxSynonyms;    private String[] stack = null;  private int index = 0;  private Token current = null;  private int todo = 0;    /**   * Creates an instance for the given underlying stream and synonym table.   *    * @param input   *            the underlying child token stream   * @param synonyms   *            the map used to extract synonyms for terms   * @param maxSynonyms   *            the maximum number of synonym tokens to return per underlying   *            token word (a value of Integer.MAX_VALUE indicates unlimited)   */  public SynonymTokenFilter(TokenStream input, SynonymMap synonyms, int maxSynonyms) {    super(input);    if (input == null)      throw new IllegalArgumentException("input must not be null");    if (synonyms == null)      throw new IllegalArgumentException("synonyms must not be null");    if (maxSynonyms < 0)       throw new IllegalArgumentException("maxSynonyms must not be negative");        this.synonyms = synonyms;    this.maxSynonyms = maxSynonyms;  }    /** Returns the next token in the stream, or null at EOS. */  public Token next() throws IOException {    Token token;    while (todo > 0 && index < stack.length) { // pop from stack      token = createToken(stack[index++], current);      if (token != null) {        todo--;        return token;      }    }        token = input.next();    if (token == null) return null; // EOS; iterator exhausted        stack = synonyms.getSynonyms(token.termText()); // push onto stack    if (stack.length > maxSynonyms) randomize(stack);    index = 0;    current = token;    todo = maxSynonyms;    return token;  }    /**   * Creates and returns a token for the given synonym of the current input   * token; Override for custom (stateless or stateful) behaviour, if desired.   *    * @param synonym    *            a synonym for the current token's term   * @param current   *            the current token from the underlying child stream   * @return a new token, or null to indicate that the given synonym should be   *         ignored   */  protected Token createToken(String synonym, Token current) {    Token token = new Token(      synonym, current.startOffset(), current.endOffset(), SYNONYM_TOKEN_TYPE);    token.setPositionIncrement(0);    return token;  }    /**   * Randomize synonyms to later sample a subset. Uses constant random seed   * for reproducability. Uses "DRand", a simple, fast, uniform pseudo-random   * number generator with medium statistical quality (multiplicative   * congruential method), producing integers in the range [Integer.MIN_VALUE,   * Integer.MAX_VALUE].   */  private static void randomize(Object[] arr) {    int seed = 1234567; // constant    int randomState = 4*seed + 1;//    Random random = new Random(seed); // unnecessary overhead    int len = arr.length;    for (int i=0; i < len-1; i++) {      randomState *= 0x278DDE6D; // z(i+1)=a*z(i) (mod 2**32)      int r = randomState % (len-i);      if (r < 0) r = -r; // e.g. -9 % 2 == -1//      int r = random.nextInt(len-i);            // swap arr[i, i+r]      Object tmp = arr[i];      arr[i] = arr[i + r];      arr[i + r] = tmp;    }     }  }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -