📄 queryengine.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
package it.unimi.dsi.mg4j.query;/*		  * MG4J: Managing Gigabytes for Java * * Copyright (C) 2005-2007 Sebastiano Vigna  * *  This library is free software; you can redistribute it and/or modify it *  under the terms of the GNU Lesser General Public License as published by the Free *  Software Foundation; either version 2.1 of the License, or (at your option) *  any later version. * *  This library is distributed in the hope that it will be useful, but *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License *  for more details. * *  You should have received a copy of the GNU Lesser General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */import it.unimi.dsi.Util;import it.unimi.dsi.fastutil.ints.IntOpenHashSet;import it.unimi.dsi.fastutil.ints.IntSet;import it.unimi.dsi.fastutil.objects.Object2ReferenceMap;import it.unimi.dsi.fastutil.objects.ObjectArrayList;import it.unimi.dsi.fastutil.objects.Reference2DoubleMap;import it.unimi.dsi.fastutil.objects.Reference2DoubleOpenHashMap;import it.unimi.dsi.fastutil.objects.Reference2ObjectArrayMap;import it.unimi.dsi.fastutil.objects.Reference2ObjectMap;import it.unimi.dsi.lang.FlyweightPrototype;import it.unimi.dsi.lang.FlyweightPrototypes;import it.unimi.dsi.mg4j.index.Index;import it.unimi.dsi.mg4j.query.nodes.Query;import it.unimi.dsi.mg4j.query.nodes.QueryBuilderVisitor;import it.unimi.dsi.mg4j.query.nodes.QueryBuilderVisitorException;import it.unimi.dsi.mg4j.query.nodes.QueryTransformer;import it.unimi.dsi.mg4j.query.parser.QueryParser;import it.unimi.dsi.mg4j.query.parser.QueryParserException;import it.unimi.dsi.mg4j.search.DocumentIterator;import it.unimi.dsi.mg4j.search.score.AbstractAggregator;import it.unimi.dsi.mg4j.search.score.DocumentScoreInfo;import it.unimi.dsi.mg4j.search.score.LinearAggregator;import it.unimi.dsi.mg4j.search.score.ScoredDocumentBoundedSizeQueue;import it.unimi.dsi.mg4j.search.score.Scorer;import java.io.IOException;import java.util.Arrays;import java.util.Iterator;import org.apache.log4j.Logger;import cern.colt.Sorting;/** An engine that takes a query and returns results, using a programmable * set of scorers and policies. *  * <p>This class embodies most of the work that must be done when answering a query. * Basically, {@link #process(String, int, int, ObjectArrayList) process(query,offset,length,results)} takes <code>query</code>, * parses it, turns it into a document iterator, scans the results, and deposits * <code>length</code> results starting at <code>offset</code> into the list <code>results</code>. *  * <p>There however several additional features available. First of all, either by separating * several queries with commas, or using directly {@link #process(Query[], int, int, ObjectArrayList)} * it is possible to resolve a series of queries with an &ldquo;and-then&rdquo; semantics: results * are added from each query, provided they did not appear before. *  * <p>It is possible to {@linkplain #score(Scorer[], double[]) score queries} using one or * more scorer with different weights (see {@link it.unimi.dsi.mg4j.search.score}), and also * set {@linkplain #setWeights(Reference2DoubleMap) different weights for different indices} (they  * will be passed to the scorers). The scorers influence the order when processing each query, * but results from different &ldquo;and-then&rdquo; queries are simply concatenated. *  * <p>When using multiple scorers, <em>{@linkplain #equalize(int) equalisation}</em> can be used * to avoid the problem associated with the potentially different value ranges of each scorer. Equalisation * evaluates a settable number of sample documents and normalize the scorers using the maximum value in * the sample. See {@link it.unimi.dsi.mg4j.search.score.AbstractAggregator} for some elaboration. *  * <p><em>{@linkplain #multiplex Multiplexing}</em> transforms a query <samp><var>q</var></samp> into <samp>index0:<var>q</var> | index1:<var>q</var> &hellip;</samp>. * In other words, the query is multiplexed on all available indices. Note that if inside <samp><var>q</var></samp> * there are selection operators that specify an index, the inner specification will overwrite * the external one, so that the semantics of the query is only amplified, but never contradicted. *  * <p>The results returned are instances of {@link it.unimi.dsi.mg4j.search.score.DocumentScoreInfo}. If * an {@linkplain #intervalSelector interval selector} has been set,  * the <code>info</code> field will contain a map from indices to arrays of {@linkplain it.unimi.dsi.mg4j.query.SelectedInterval selected intervals} * satisfying the query (see {@link it.unimi.dsi.mg4j.search} for some elaboration on minimal-interval semantics support in MG4J).  *  * <p>For examples of usage of this class, please look at {@link it.unimi.dsi.mg4j.query.Query} * and {@link it.unimi.dsi.mg4j.query.QueryServlet}. *  * <p><strong>Warning:</strong> This class is <strong>highly experimental</strong>. It has become * definitely more decent in MG4J, but still needs some refactoring. *  * <p><strong>Warning</strong>: This class is not * thread safe, but it provides {@linkplain it.unimi.dsi.lang.FlyweightPrototype flyweight copies}. * The {@link #copy()} method is strengthened so to return an object implementing this interface. *  * @author Sebastiano Vigna * @author Paolo Boldi * @since 1.0 */public class QueryEngine implements FlyweightPrototype<QueryEngine> {	private static final Logger LOGGER = Util.getLogger( QueryEngine.class );	private static final boolean ASSERTS = false;		/** The parser used to parse queries. */	public final QueryParser queryParser;	/** A map from names to indices. */	public final Object2ReferenceMap<String,Index> indexMap;	/** The number of indices used by {@link #queryParser}. */	public final int numIndices;	/** Whether multiplex is active. */	public volatile boolean multiplex;	/** The current interval selector, if any. */	public volatile IntervalSelector intervalSelector;	/** The current scorer, or <code>null</code> if no scorer is in use. */	private Scorer scorer;	/** The builder visitor used to make queries into document iterators. */	private final QueryBuilderVisitor<DocumentIterator> builderVisitor;	/** A map associating a weight with each index. */	protected final Reference2DoubleOpenHashMap<Index> index2Weight;		/** A transformer that will be applied to queries before resolving them, or <code>null</code>. */	private QueryTransformer transformer;	/** Creates a new query engine.	 * 	 * @param queryParser a query parser, or <code>null</code> if this query engine will {@linkplain #process(Query[], int, int, ObjectArrayList) just process pre-parsed queries}.	 * @param builderVisitor a builder visitor to transform {@linkplain Query queries} into {@linkplain DocumentIterator document iterators}.	 * @param indexMap a map from symbolic name to indices (used for multiplexing and default weight initialisation).	 */		public QueryEngine( final QueryParser queryParser, final QueryBuilderVisitor<DocumentIterator> builderVisitor, final Object2ReferenceMap<String,Index> indexMap ) {		this.queryParser = queryParser;		this.builderVisitor = builderVisitor;		this.indexMap = indexMap;		this.numIndices = indexMap.size();		this.index2Weight = new Reference2DoubleOpenHashMap<Index>();		// At start, all indices are equal.		this.index2Weight.defaultReturnValue( 1.0 / numIndices );	}	@SuppressWarnings("unchecked")	public synchronized QueryEngine copy() {		final QueryEngine newEngine = new QueryEngine( FlyweightPrototypes.copy( queryParser ), builderVisitor.copy(), indexMap );		newEngine.multiplex = multiplex;		newEngine.intervalSelector = FlyweightPrototypes.copy( intervalSelector );		newEngine.scorer = FlyweightPrototypes.copy( scorer );		newEngine.setWeights( index2Weight );		return newEngine;					}	/** Activate equalisation with the given number of samples-	 * 	 * @param samples the number of samples for equalisation, or 0 for no equalisation.	 */		public synchronized void equalize( final int samples ) {		if ( scorer == null ) throw new IllegalStateException( "There is no scorer" );		if ( ! ( scorer instanceof AbstractAggregator ) ) throw new IllegalStateException( "The current scorer is not aggregated" );		((AbstractAggregator)scorer).equalize( samples );	}			/** Sets the scorers for this query engine.	 *	 * <p>If <code>scorer</code> has length zero, scoring is disabled. If it has length 1,	 * the only scorer is used for scoring, and the only element of <code>weight</code> is	 * discarded. Otherwise, a {@link LinearAggregator} is used to combine results from	 * the given scorers, using the given weights.	 * 	 * @param scorer an array of {@linkplain Scorer scorers}.	 * @param weight a parallel array of weights (not to be confused with <em>index</em> weights).	 */			public synchronized void score( final Scorer[] scorer, final double[] weight ) {		if ( scorer.length == 0 ) this.scorer = null;		else {			if ( scorer.length == 1 ) this.scorer = scorer[ 0 ];			else this.scorer = new LinearAggregator( scorer, weight );
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -