📄 queryengine.java
字号:
package it.unimi.dsi.mg4j.query;/* * MG4J: Managing Gigabytes for Java * * Copyright (C) 2005-2007 Sebastiano Vigna * * This library is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published by the Free * Software Foundation; either version 2.1 of the License, or (at your option) * any later version. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */import it.unimi.dsi.Util;import it.unimi.dsi.fastutil.ints.IntOpenHashSet;import it.unimi.dsi.fastutil.ints.IntSet;import it.unimi.dsi.fastutil.objects.Object2ReferenceMap;import it.unimi.dsi.fastutil.objects.ObjectArrayList;import it.unimi.dsi.fastutil.objects.Reference2DoubleMap;import it.unimi.dsi.fastutil.objects.Reference2DoubleOpenHashMap;import it.unimi.dsi.fastutil.objects.Reference2ObjectArrayMap;import it.unimi.dsi.fastutil.objects.Reference2ObjectMap;import it.unimi.dsi.lang.FlyweightPrototype;import it.unimi.dsi.lang.FlyweightPrototypes;import it.unimi.dsi.mg4j.index.Index;import it.unimi.dsi.mg4j.query.nodes.Query;import it.unimi.dsi.mg4j.query.nodes.QueryBuilderVisitor;import it.unimi.dsi.mg4j.query.nodes.QueryBuilderVisitorException;import it.unimi.dsi.mg4j.query.nodes.QueryTransformer;import it.unimi.dsi.mg4j.query.parser.QueryParser;import it.unimi.dsi.mg4j.query.parser.QueryParserException;import it.unimi.dsi.mg4j.search.DocumentIterator;import it.unimi.dsi.mg4j.search.score.AbstractAggregator;import it.unimi.dsi.mg4j.search.score.DocumentScoreInfo;import it.unimi.dsi.mg4j.search.score.LinearAggregator;import it.unimi.dsi.mg4j.search.score.ScoredDocumentBoundedSizeQueue;import it.unimi.dsi.mg4j.search.score.Scorer;import java.io.IOException;import java.util.Arrays;import java.util.Iterator;import org.apache.log4j.Logger;import cern.colt.Sorting;/** An engine that takes a query and returns results, using a programmable * set of scorers and policies. * * <p>This class embodies most of the work that must be done when answering a query. * Basically, {@link #process(String, int, int, ObjectArrayList) process(query,offset,length,results)} takes <code>query</code>, * parses it, turns it into a document iterator, scans the results, and deposits * <code>length</code> results starting at <code>offset</code> into the list <code>results</code>. * * <p>There however several additional features available. First of all, either by separating * several queries with commas, or using directly {@link #process(Query[], int, int, ObjectArrayList)} * it is possible to resolve a series of queries with an “and-then” semantics: results * are added from each query, provided they did not appear before. * * <p>It is possible to {@linkplain #score(Scorer[], double[]) score queries} using one or * more scorer with different weights (see {@link it.unimi.dsi.mg4j.search.score}), and also * set {@linkplain #setWeights(Reference2DoubleMap) different weights for different indices} (they * will be passed to the scorers). The scorers influence the order when processing each query, * but results from different “and-then” queries are simply concatenated. * * <p>When using multiple scorers, <em>{@linkplain #equalize(int) equalisation}</em> can be used * to avoid the problem associated with the potentially different value ranges of each scorer. Equalisation * evaluates a settable number of sample documents and normalize the scorers using the maximum value in * the sample. See {@link it.unimi.dsi.mg4j.search.score.AbstractAggregator} for some elaboration. * * <p><em>{@linkplain #multiplex Multiplexing}</em> transforms a query <samp><var>q</var></samp> into <samp>index0:<var>q</var> | index1:<var>q</var> …</samp>. * In other words, the query is multiplexed on all available indices. Note that if inside <samp><var>q</var></samp> * there are selection operators that specify an index, the inner specification will overwrite * the external one, so that the semantics of the query is only amplified, but never contradicted. * * <p>The results returned are instances of {@link it.unimi.dsi.mg4j.search.score.DocumentScoreInfo}. If * an {@linkplain #intervalSelector interval selector} has been set, * the <code>info</code> field will contain a map from indices to arrays of {@linkplain it.unimi.dsi.mg4j.query.SelectedInterval selected intervals} * satisfying the query (see {@link it.unimi.dsi.mg4j.search} for some elaboration on minimal-interval semantics support in MG4J). * * <p>For examples of usage of this class, please look at {@link it.unimi.dsi.mg4j.query.Query} * and {@link it.unimi.dsi.mg4j.query.QueryServlet}. * * <p><strong>Warning:</strong> This class is <strong>highly experimental</strong>. It has become * definitely more decent in MG4J, but still needs some refactoring. * * <p><strong>Warning</strong>: This class is not * thread safe, but it provides {@linkplain it.unimi.dsi.lang.FlyweightPrototype flyweight copies}. * The {@link #copy()} method is strengthened so to return an object implementing this interface. * * @author Sebastiano Vigna * @author Paolo Boldi * @since 1.0 */public class QueryEngine implements FlyweightPrototype<QueryEngine> { private static final Logger LOGGER = Util.getLogger( QueryEngine.class ); private static final boolean ASSERTS = false; /** The parser used to parse queries. */ public final QueryParser queryParser; /** A map from names to indices. */ public final Object2ReferenceMap<String,Index> indexMap; /** The number of indices used by {@link #queryParser}. */ public final int numIndices; /** Whether multiplex is active. */ public volatile boolean multiplex; /** The current interval selector, if any. */ public volatile IntervalSelector intervalSelector; /** The current scorer, or <code>null</code> if no scorer is in use. */ private Scorer scorer; /** The builder visitor used to make queries into document iterators. */ private final QueryBuilderVisitor<DocumentIterator> builderVisitor; /** A map associating a weight with each index. */ protected final Reference2DoubleOpenHashMap<Index> index2Weight; /** A transformer that will be applied to queries before resolving them, or <code>null</code>. */ private QueryTransformer transformer; /** Creates a new query engine. * * @param queryParser a query parser, or <code>null</code> if this query engine will {@linkplain #process(Query[], int, int, ObjectArrayList) just process pre-parsed queries}. * @param builderVisitor a builder visitor to transform {@linkplain Query queries} into {@linkplain DocumentIterator document iterators}. * @param indexMap a map from symbolic name to indices (used for multiplexing and default weight initialisation). */ public QueryEngine( final QueryParser queryParser, final QueryBuilderVisitor<DocumentIterator> builderVisitor, final Object2ReferenceMap<String,Index> indexMap ) { this.queryParser = queryParser; this.builderVisitor = builderVisitor; this.indexMap = indexMap; this.numIndices = indexMap.size(); this.index2Weight = new Reference2DoubleOpenHashMap<Index>(); // At start, all indices are equal. this.index2Weight.defaultReturnValue( 1.0 / numIndices ); } @SuppressWarnings("unchecked") public synchronized QueryEngine copy() { final QueryEngine newEngine = new QueryEngine( FlyweightPrototypes.copy( queryParser ), builderVisitor.copy(), indexMap ); newEngine.multiplex = multiplex; newEngine.intervalSelector = FlyweightPrototypes.copy( intervalSelector ); newEngine.scorer = FlyweightPrototypes.copy( scorer ); newEngine.setWeights( index2Weight ); return newEngine; } /** Activate equalisation with the given number of samples- * * @param samples the number of samples for equalisation, or 0 for no equalisation. */ public synchronized void equalize( final int samples ) { if ( scorer == null ) throw new IllegalStateException( "There is no scorer" ); if ( ! ( scorer instanceof AbstractAggregator ) ) throw new IllegalStateException( "The current scorer is not aggregated" ); ((AbstractAggregator)scorer).equalize( samples ); } /** Sets the scorers for this query engine. * * <p>If <code>scorer</code> has length zero, scoring is disabled. If it has length 1, * the only scorer is used for scoring, and the only element of <code>weight</code> is * discarded. Otherwise, a {@link LinearAggregator} is used to combine results from * the given scorers, using the given weights. * * @param scorer an array of {@linkplain Scorer scorers}. * @param weight a parallel array of weights (not to be confused with <em>index</em> weights). */ public synchronized void score( final Scorer[] scorer, final double[] weight ) { if ( scorer.length == 0 ) this.scorer = null; else { if ( scorer.length == 1 ) this.scorer = scorer[ 0 ]; else this.scorer = new LinearAggregator( scorer, weight );
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -