⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 disjunctionmaxquery.java

📁 lucene-2.4.0 是一个全文收索的工具包
💻 JAVA
字号:
package org.apache.lucene.search;/** * Copyright 2004 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import org.apache.lucene.index.IndexReader;import java.io.IOException;import java.util.ArrayList;import java.util.Iterator;import java.util.Collection;import java.util.Set;/** * A query that generates the union of documents produced by its subqueries, and that scores each document with the maximum * score for that document as produced by any subquery, plus a tie breaking increment for any additional matching subqueries. * This is useful when searching for a word in multiple fields with different boost factors (so that the fields cannot be * combined equivalently into a single search field).  We want the primary score to be the one associated with the highest boost, * not the sum of the field scores (as BooleanQuery would give). * If the query is "albino elephant" this ensures that "albino" matching one field and "elephant" matching * another gets a higher score than "albino" matching both fields. * To get this result, use both BooleanQuery and DisjunctionMaxQuery:  for each term a DisjunctionMaxQuery searches for it in * each field, while the set of these DisjunctionMaxQuery's is combined into a BooleanQuery. * The tie breaker capability allows results that include the same term in multiple fields to be judged better than results that * include this term in only the best of those multiple fields, without confusing this with the better case of two different terms * in the multiple fields. */public class DisjunctionMaxQuery extends Query {  /* The subqueries */  private ArrayList disjuncts = new ArrayList();  /* Multiple of the non-max disjunct scores added into our final score.  Non-zero values support tie-breaking. */  private float tieBreakerMultiplier = 0.0f;  /** Creates a new empty DisjunctionMaxQuery.  Use add() to add the subqueries.   * @param tieBreakerMultiplier the score of each non-maximum disjunct for a document is multiplied by this weight   *        and added into the final score.  If non-zero, the value should be small, on the order of 0.1, which says that   *        10 occurrences of word in a lower-scored field that is also in a higher scored field is just as good as a unique   *        word in the lower scored field (i.e., one that is not in any higher scored field.   */  public DisjunctionMaxQuery(float tieBreakerMultiplier) {    this.tieBreakerMultiplier = tieBreakerMultiplier;  }  /**   * Creates a new DisjunctionMaxQuery   * @param disjuncts a Collection<Query> of all the disjuncts to add   * @param tieBreakerMultiplier   the weight to give to each matching non-maximum disjunct   */  public DisjunctionMaxQuery(Collection disjuncts, float tieBreakerMultiplier) {    this.tieBreakerMultiplier = tieBreakerMultiplier;    add(disjuncts);  }  /** Add a subquery to this disjunction   * @param query the disjunct added   */  public void add(Query query) {    disjuncts.add(query);  }  /** Add a collection of disjuncts to this disjunction   * via Iterable<Query>   */  public void add(Collection disjuncts) {    this.disjuncts.addAll(disjuncts);  }  /** An Iterator<Query> over the disjuncts */  public Iterator iterator() {    return disjuncts.iterator();  }  /* The Weight for DisjunctionMaxQuery's, used to normalize, score and explain these queries */  private class DisjunctionMaxWeight implements Weight {    private Similarity similarity;   // The similarity which we are associated.    private ArrayList weights = new ArrayList();  // The Weight's for our subqueries, in 1-1 correspondence with disjuncts    /* Construct the Weight for this Query searched by searcher.  Recursively construct subquery weights. */    public DisjunctionMaxWeight(Searcher searcher) throws IOException {      this.similarity = searcher.getSimilarity();      for (int i = 0; i < disjuncts.size(); i++)        weights.add(((Query) disjuncts.get(i)).createWeight(searcher));    }    /* Return our associated DisjunctionMaxQuery */    public Query getQuery() { return DisjunctionMaxQuery.this; }    /* Return our boost */    public float getValue() { return getBoost(); }    /* Compute the sub of squared weights of us applied to our subqueries.  Used for normalization. */    public float sumOfSquaredWeights() throws IOException {      float max = 0.0f, sum = 0.0f;      for (int i = 0; i < weights.size(); i++) {        float sub = ((Weight) weights.get(i)).sumOfSquaredWeights();        sum += sub;        max = Math.max(max, sub);      }      return (((sum - max) * tieBreakerMultiplier * tieBreakerMultiplier) + max) * getBoost() * getBoost();    }    /* Apply the computed normalization factor to our subqueries */    public void normalize(float norm) {      norm *= getBoost();  // Incorporate our boost      for (int i = 0 ; i < weights.size(); i++)        ((Weight) weights.get(i)).normalize(norm);    }    /* Create the scorer used to score our associated DisjunctionMaxQuery */    public Scorer scorer(IndexReader reader) throws IOException {      DisjunctionMaxScorer result = new DisjunctionMaxScorer(tieBreakerMultiplier, similarity);      for (int i = 0 ; i < weights.size(); i++) {        Weight w = (Weight) weights.get(i);        Scorer subScorer = w.scorer(reader);        if (subScorer == null) return null;        result.add(subScorer);      }      return result;    }    /* Explain the score we computed for doc */    public Explanation explain(IndexReader reader, int doc) throws IOException {      if ( disjuncts.size() == 1) return ((Weight) weights.get(0)).explain(reader,doc);      ComplexExplanation result = new ComplexExplanation();      float max = 0.0f, sum = 0.0f;      result.setDescription(tieBreakerMultiplier == 0.0f ? "max of:" : "max plus " + tieBreakerMultiplier + " times others of:");      for (int i = 0 ; i < weights.size(); i++) {        Explanation e = ((Weight) weights.get(i)).explain(reader, doc);        if (e.isMatch()) {          result.setMatch(Boolean.TRUE);          result.addDetail(e);          sum += e.getValue();          max = Math.max(max, e.getValue());        }      }      result.setValue(max + (sum - max)*tieBreakerMultiplier);      return result;    }  }  // end of DisjunctionMaxWeight inner class  /* Create the Weight used to score us */  protected Weight createWeight(Searcher searcher) throws IOException {    return new DisjunctionMaxWeight(searcher);  }  /** Optimize our representation and our subqueries representations   * @param reader the IndexReader we query   * @return an optimized copy of us (which may not be a copy if there is nothing to optimize) */  public Query rewrite(IndexReader reader) throws IOException {    if (disjuncts.size() == 1) {      Query singleton = (Query) disjuncts.get(0);      Query result = singleton.rewrite(reader);      if (getBoost() != 1.0f) {        if (result == singleton) result = (Query)result.clone();        result.setBoost(getBoost() * result.getBoost());      }      return result;    }    DisjunctionMaxQuery clone = null;    for (int i = 0 ; i < disjuncts.size(); i++) {      Query clause = (Query) disjuncts.get(i);      Query rewrite = clause.rewrite(reader);      if (rewrite != clause) {        if (clone == null) clone = (DisjunctionMaxQuery)this.clone();        clone.disjuncts.set(i, rewrite);      }    }    if (clone != null) return clone;    else return this;  }  /** Create a shallow copy of us -- used in rewriting if necessary   * @return a copy of us (but reuse, don't copy, our subqueries) */  public Object clone() {    DisjunctionMaxQuery clone = (DisjunctionMaxQuery)super.clone();    clone.disjuncts = (ArrayList)this.disjuncts.clone();    return clone;  }  // inherit javadoc  public void extractTerms(Set terms) {      for (int i = 0; i < disjuncts.size(); i++) {          ((Query)disjuncts.get(i)).extractTerms(terms);      }  }  /** Prettyprint us.   * @param field the field to which we are applied   * @return a string that shows what we do, of the form "(disjunct1 | disjunct2 | ... | disjunctn)^boost"   */  public String toString(String field) {    StringBuffer buffer = new StringBuffer();    buffer.append("(");    for (int i = 0 ; i < disjuncts.size(); i++) {      Query subquery = (Query) disjuncts.get(i);      if (subquery instanceof BooleanQuery) {   // wrap sub-bools in parens        buffer.append("(");        buffer.append(subquery.toString(field));        buffer.append(")");      }      else buffer.append(subquery.toString(field));      if (i != disjuncts.size()-1) buffer.append(" | ");    }    buffer.append(")");    if (tieBreakerMultiplier != 0.0f) {      buffer.append("~");      buffer.append(tieBreakerMultiplier);    }    if (getBoost() != 1.0) {      buffer.append("^");      buffer.append(getBoost());    }    return buffer.toString();  }  /** Return true iff we represent the same query as o   * @param o another object   * @return true iff o is a DisjunctionMaxQuery with the same boost and the same subqueries, in the same order, as us   */  public boolean equals(Object o) {    if (! (o instanceof DisjunctionMaxQuery) ) return false;    DisjunctionMaxQuery other = (DisjunctionMaxQuery)o;    return this.getBoost() == other.getBoost()            && this.tieBreakerMultiplier == other.tieBreakerMultiplier            && this.disjuncts.equals(other.disjuncts);  }  /** Compute a hash code for hashing us   * @return the hash code   */  public int hashCode() {    return Float.floatToIntBits(getBoost())            + Float.floatToIntBits(tieBreakerMultiplier)            + disjuncts.hashCode();  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -