⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fuzzyquery.java

📁 Lucene a java open-source SearchEngine Framework
💻 JAVA
字号:
package org.apache.lucene.search;/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements.  See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License.  You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.Term;import org.apache.lucene.util.PriorityQueue;import org.apache.lucene.util.ToStringUtils;import java.io.IOException;/** Implements the fuzzy search query. The similiarity measurement * is based on the Levenshtein (edit distance) algorithm. */public class FuzzyQuery extends MultiTermQuery {    public final static float defaultMinSimilarity = 0.5f;  public final static int defaultPrefixLength = 0;    private float minimumSimilarity;  private int prefixLength;    /**   * Create a new FuzzyQuery that will match terms with a similarity    * of at least <code>minimumSimilarity</code> to <code>term</code>.   * If a <code>prefixLength</code> &gt; 0 is specified, a common prefix   * of that length is also required.   *    * @param term the term to search for   * @param minimumSimilarity a value between 0 and 1 to set the required similarity   *  between the query term and the matching terms. For example, for a   *  <code>minimumSimilarity</code> of <code>0.5</code> a term of the same length   *  as the query term is considered similar to the query term if the edit distance   *  between both terms is less than <code>length(term)*0.5</code>   * @param prefixLength length of common (non-fuzzy) prefix   * @throws IllegalArgumentException if minimumSimilarity is &gt;= 1 or &lt; 0   * or if prefixLength &lt; 0   */  public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength) throws IllegalArgumentException {    super(term);        if (minimumSimilarity >= 1.0f)      throw new IllegalArgumentException("minimumSimilarity >= 1");    else if (minimumSimilarity < 0.0f)      throw new IllegalArgumentException("minimumSimilarity < 0");    if (prefixLength < 0)      throw new IllegalArgumentException("prefixLength < 0");        this.minimumSimilarity = minimumSimilarity;    this.prefixLength = prefixLength;  }    /**   * Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, 0)}.   */  public FuzzyQuery(Term term, float minimumSimilarity) throws IllegalArgumentException {      this(term, minimumSimilarity, defaultPrefixLength);  }  /**   * Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, 0.5f, 0)}.   */  public FuzzyQuery(Term term) {    this(term, defaultMinSimilarity, defaultPrefixLength);  }    /**   * Returns the minimum similarity that is required for this query to match.   * @return float value between 0.0 and 1.0   */  public float getMinSimilarity() {    return minimumSimilarity;  }      /**   * Returns the non-fuzzy prefix length. This is the number of characters at the start   * of a term that must be identical (not fuzzy) to the query term if the query   * is to match that term.    */  public int getPrefixLength() {    return prefixLength;  }  protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {    return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity, prefixLength);  }    public Query rewrite(IndexReader reader) throws IOException {    FilteredTermEnum enumerator = getEnum(reader);    int maxClauseCount = BooleanQuery.getMaxClauseCount();    ScoreTermQueue stQueue = new ScoreTermQueue(maxClauseCount);    ScoreTerm reusableST = null;    try {      do {        float score = 0.0f;        Term t = enumerator.term();        if (t != null) {          score = enumerator.difference();          if (reusableST == null) {            reusableST = new ScoreTerm(t, score);          } else if (score >= reusableST.score) {            // reusableST holds the last "rejected" entry, so, if            // this new score is not better than that, there's no            // need to try inserting it            reusableST.score = score;            reusableST.term = t;          } else {            continue;          }          reusableST = (ScoreTerm) stQueue.insertWithOverflow(reusableST);        }      } while (enumerator.next());    } finally {      enumerator.close();    }        BooleanQuery query = new BooleanQuery(true);    int size = stQueue.size();    for(int i = 0; i < size; i++){      ScoreTerm st = (ScoreTerm) stQueue.pop();      TermQuery tq = new TermQuery(st.term);      // found a match      tq.setBoost(getBoost() * st.score); // set the boost      query.add(tq, BooleanClause.Occur.SHOULD);          // add to query    }    return query;  }      public String toString(String field) {    StringBuffer buffer = new StringBuffer();    Term term = getTerm();    if (!term.field().equals(field)) {        buffer.append(term.field());        buffer.append(":");    }    buffer.append(term.text());    buffer.append('~');    buffer.append(Float.toString(minimumSimilarity));    buffer.append(ToStringUtils.boost(getBoost()));    return buffer.toString();  }    protected static class ScoreTerm {    public Term term;    public float score;        public ScoreTerm(Term term, float score){      this.term = term;      this.score = score;    }  }    protected static class ScoreTermQueue extends PriorityQueue {        public ScoreTermQueue(int size){      initialize(size);    }        /* (non-Javadoc)     * @see org.apache.lucene.util.PriorityQueue#lessThan(java.lang.Object, java.lang.Object)     */    protected boolean lessThan(Object a, Object b) {      ScoreTerm termA = (ScoreTerm)a;      ScoreTerm termB = (ScoreTerm)b;      if (termA.score == termB.score)        return termA.term.compareTo(termB.term) > 0;      else        return termA.score < termB.score;    }      }  public boolean equals(Object o) {    if (this == o) return true;    if (!(o instanceof FuzzyQuery)) return false;    if (!super.equals(o)) return false;    final FuzzyQuery fuzzyQuery = (FuzzyQuery) o;    if (minimumSimilarity != fuzzyQuery.minimumSimilarity) return false;    if (prefixLength != fuzzyQuery.prefixLength) return false;    return true;  }  public int hashCode() {    int result = super.hashCode();    result = 29 * result + minimumSimilarity != +0.0f ? Float.floatToIntBits(minimumSimilarity) : 0;    result = 29 * result + prefixLength;    return result;  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -