📄 fuzzyquery.java
字号:
package org.apache.lucene.search;/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.Term;import org.apache.lucene.util.PriorityQueue;import org.apache.lucene.util.ToStringUtils;import java.io.IOException;/** Implements the fuzzy search query. The similiarity measurement * is based on the Levenshtein (edit distance) algorithm. */public class FuzzyQuery extends MultiTermQuery { public final static float defaultMinSimilarity = 0.5f; public final static int defaultPrefixLength = 0; private float minimumSimilarity; private int prefixLength; /** * Create a new FuzzyQuery that will match terms with a similarity * of at least <code>minimumSimilarity</code> to <code>term</code>. * If a <code>prefixLength</code> > 0 is specified, a common prefix * of that length is also required. * * @param term the term to search for * @param minimumSimilarity a value between 0 and 1 to set the required similarity * between the query term and the matching terms. For example, for a * <code>minimumSimilarity</code> of <code>0.5</code> a term of the same length * as the query term is considered similar to the query term if the edit distance * between both terms is less than <code>length(term)*0.5</code> * @param prefixLength length of common (non-fuzzy) prefix * @throws IllegalArgumentException if minimumSimilarity is >= 1 or < 0 * or if prefixLength < 0 */ public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength) throws IllegalArgumentException { super(term); if (minimumSimilarity >= 1.0f) throw new IllegalArgumentException("minimumSimilarity >= 1"); else if (minimumSimilarity < 0.0f) throw new IllegalArgumentException("minimumSimilarity < 0"); if (prefixLength < 0) throw new IllegalArgumentException("prefixLength < 0"); this.minimumSimilarity = minimumSimilarity; this.prefixLength = prefixLength; } /** * Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, 0)}. */ public FuzzyQuery(Term term, float minimumSimilarity) throws IllegalArgumentException { this(term, minimumSimilarity, defaultPrefixLength); } /** * Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, 0.5f, 0)}. */ public FuzzyQuery(Term term) { this(term, defaultMinSimilarity, defaultPrefixLength); } /** * Returns the minimum similarity that is required for this query to match. * @return float value between 0.0 and 1.0 */ public float getMinSimilarity() { return minimumSimilarity; } /** * Returns the non-fuzzy prefix length. This is the number of characters at the start * of a term that must be identical (not fuzzy) to the query term if the query * is to match that term. */ public int getPrefixLength() { return prefixLength; } protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity, prefixLength); } public Query rewrite(IndexReader reader) throws IOException { FilteredTermEnum enumerator = getEnum(reader); int maxClauseCount = BooleanQuery.getMaxClauseCount(); ScoreTermQueue stQueue = new ScoreTermQueue(maxClauseCount); ScoreTerm reusableST = null; try { do { float score = 0.0f; Term t = enumerator.term(); if (t != null) { score = enumerator.difference(); if (reusableST == null) { reusableST = new ScoreTerm(t, score); } else if (score >= reusableST.score) { // reusableST holds the last "rejected" entry, so, if // this new score is not better than that, there's no // need to try inserting it reusableST.score = score; reusableST.term = t; } else { continue; } reusableST = (ScoreTerm) stQueue.insertWithOverflow(reusableST); } } while (enumerator.next()); } finally { enumerator.close(); } BooleanQuery query = new BooleanQuery(true); int size = stQueue.size(); for(int i = 0; i < size; i++){ ScoreTerm st = (ScoreTerm) stQueue.pop(); TermQuery tq = new TermQuery(st.term); // found a match tq.setBoost(getBoost() * st.score); // set the boost query.add(tq, BooleanClause.Occur.SHOULD); // add to query } return query; } public String toString(String field) { StringBuffer buffer = new StringBuffer(); Term term = getTerm(); if (!term.field().equals(field)) { buffer.append(term.field()); buffer.append(":"); } buffer.append(term.text()); buffer.append('~'); buffer.append(Float.toString(minimumSimilarity)); buffer.append(ToStringUtils.boost(getBoost())); return buffer.toString(); } protected static class ScoreTerm { public Term term; public float score; public ScoreTerm(Term term, float score){ this.term = term; this.score = score; } } protected static class ScoreTermQueue extends PriorityQueue { public ScoreTermQueue(int size){ initialize(size); } /* (non-Javadoc) * @see org.apache.lucene.util.PriorityQueue#lessThan(java.lang.Object, java.lang.Object) */ protected boolean lessThan(Object a, Object b) { ScoreTerm termA = (ScoreTerm)a; ScoreTerm termB = (ScoreTerm)b; if (termA.score == termB.score) return termA.term.compareTo(termB.term) > 0; else return termA.score < termB.score; } } public boolean equals(Object o) { if (this == o) return true; if (!(o instanceof FuzzyQuery)) return false; if (!super.equals(o)) return false; final FuzzyQuery fuzzyQuery = (FuzzyQuery) o; if (minimumSimilarity != fuzzyQuery.minimumSimilarity) return false; if (prefixLength != fuzzyQuery.prefixLength) return false; return true; } public int hashCode() { int result = super.hashCode(); result = 29 * result + minimumSimilarity != +0.0f ? Float.floatToIntBits(minimumSimilarity) : 0; result = 29 * result + prefixLength; return result; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -