⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 query.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
package it.unimi.dsi.mg4j.query;/*		  * MG4J: Managing Gigabytes for Java * * Copyright (C) 2005-2007 Paolo Boldi and Sebastiano Vigna  * *  This library is free software; you can redistribute it and/or modify it *  under the terms of the GNU Lesser General Public License as published by the Free *  Software Foundation; either version 2.1 of the License, or (at your option) *  any later version. * *  This library is distributed in the hope that it will be useful, but *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License *  for more details. * *  You should have received a copy of the GNU Lesser General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */import it.unimi.dsi.Util;import it.unimi.dsi.fastutil.Hash;import it.unimi.dsi.fastutil.io.BinIO;import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;import it.unimi.dsi.fastutil.objects.Object2ObjectOpenHashMap;import it.unimi.dsi.fastutil.objects.Object2ReferenceLinkedOpenHashMap;import it.unimi.dsi.fastutil.objects.Object2ReferenceMap;import it.unimi.dsi.fastutil.objects.ObjectArrayList;import it.unimi.dsi.fastutil.objects.Reference2DoubleMap;import it.unimi.dsi.fastutil.objects.Reference2DoubleOpenHashMap;import it.unimi.dsi.fastutil.objects.Reference2ObjectMap;import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;import it.unimi.dsi.fastutil.objects.Reference2ReferenceOpenHashMap;import it.unimi.dsi.mg4j.document.Document;import it.unimi.dsi.mg4j.document.DocumentCollection;import it.unimi.dsi.mg4j.document.DocumentFactory;import it.unimi.dsi.mg4j.index.Index;import it.unimi.dsi.mg4j.index.TermProcessor;import it.unimi.dsi.mg4j.query.nodes.QueryTransformer;import it.unimi.dsi.mg4j.query.parser.QueryParserException;import it.unimi.dsi.mg4j.query.parser.SimpleParser;import it.unimi.dsi.mg4j.search.DocumentIteratorBuilderVisitor;import it.unimi.dsi.mg4j.search.score.BM25Scorer;import it.unimi.dsi.mg4j.search.score.DocumentScoreInfo;import it.unimi.dsi.mg4j.search.score.Scorer;import it.unimi.dsi.mg4j.search.score.VignaScorer;import it.unimi.dsi.mg4j.util.MG4JClassParser;import java.io.BufferedReader;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStreamReader;import java.io.PrintStream;import java.io.Reader;import java.lang.reflect.Constructor;import java.lang.reflect.InvocationTargetException;import java.net.URISyntaxException;import java.util.Arrays;import java.util.Comparator;import java.util.List;import org.apache.commons.configuration.ConfigurationException;import org.apache.log4j.Logger;import com.martiansoftware.jsap.FlaggedOption;import com.martiansoftware.jsap.JSAP;import com.martiansoftware.jsap.JSAPResult;import com.martiansoftware.jsap.Parameter;import com.martiansoftware.jsap.SimpleJSAP;import com.martiansoftware.jsap.Switch;import com.martiansoftware.jsap.UnflaggedOption;/** A command-line interpreter to query indices. *  * <p>This class can be used to start a {@linkplain it.unimi.dsi.mg4j.query.QueryEngine query engine} * from the command line. Optionally, it can * start a {@linkplain HttpQueryServer web server} that will serve the results in a  * search-engine-like environment. Changes * to the query engine made on the command line will reflect on subsequent queries (also on the * web server). The web server access is fully multithreaded. *  * <p>This class does not provide command-line history or editing: to get that effect, * we suggest to rely on some operating-system utility such as  * <a href="http://utopia.knoware.nl/~hlub/uck/rlwrap/"><samp>rlwrap</samp></a>. *  * <p><strong>Warning:</strong> This class is <strong>highly experimental</strong> (it is the  * place that we tweak to experiment every kind of new indexing/ranking method). */public class Query {	private static final Logger LOGGER = Util.getLogger( Query.class );	/** A formatter for TREC results. */	private static final java.text.NumberFormat FORMATTER = new java.text.DecimalFormat( "0.0000000000" );		public final static int MAX_STEMMING = 1024;	public static enum Command {		MODE,		LIMIT,		SELECT,		SCORE,		MPLEX,		EXPAND,		DIVERT,		WEIGHT,		EQUALIZE,		QUIT	}		public static enum OutputType {		/** Display just timings. */		TIME,		/** Display document pointers, but not intervals. */		SHORT,		/** Display document pointers and not intervals (requires an index with positions). */		LONG,		/** Display document pointers and snippets (requires an index with positions and a collection). */		SNIPPET,		/** Display results in TREC format. */		TREC;	}		/** The maximum number of items output to the console. */	private int maxOutput = 10; 	/** Current topic number, for {@link OutputType#TREC} only. */	private int trecTopicNumber;	/** Current run tag, for {@link OutputType#TREC} only. */	private String trecRunTag;	/** The current display mode. */	private OutputType displayMode = OutputType.SHORT;	/** The current output stream, changeable with <samp>$divert</samp>. */	private PrintStream output = System.out;	/** The current query engine. */	private final QueryEngine queryEngine;			public Query( final QueryEngine queryEngine ) {		this.queryEngine = queryEngine;	}		/** Parses a given array of index URIs/weights, loading the correspoding indices	 * and writing the result of parsing in the given maps.	 * 	 * @param basenameWeight an array of index URIs of the form <samp><var>uri</var>[:<var>weight</var>]</samp>, specifying	 * the URI of an index and the weight for the index (1, if missing).	 * @param loadSizes forces size loading.	 * @param documentCollection an optional document collection, or <code>null</code>.	 * @param name2Index an empty, writable map that will be filled with pairs given by an index basename (or field name, if available) and an {@link Index}.	 * @param index2Weight an empty, writable map that will be filled with a map from indices to respective weights.	 */	private static void loadIndicesFromSpec( final String[] basenameWeight, boolean loadSizes, final DocumentCollection documentCollection, final Object2ReferenceMap<String,Index> name2Index, final Reference2DoubleMap<Index> index2Weight ) throws IOException, ConfigurationException, URISyntaxException, ClassNotFoundException, SecurityException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {		for ( int i = 0; i < basenameWeight.length; i++ ) {						// We must be careful, as ":" is used by Windows to separate the device from the path.			final int split = basenameWeight[ i ].lastIndexOf( ':' );			double weight = 1;			if ( split != -1 ) {				try {					weight = Double.parseDouble( basenameWeight[ i ].substring( split + 1 ) );				}				catch( NumberFormatException e ) {}			}							final Index index;			if ( split == -1 || basenameWeight[ i ].startsWith("mg4j://") ) {				index = Index.getInstance( basenameWeight[ i ], true, loadSizes );				index2Weight.put( index, 1 );			}			else {				index = Index.getInstance( basenameWeight[ i ].substring( 0, split ) );				index2Weight.put( index, weight );			}			if ( documentCollection != null && index.numberOfDocuments != documentCollection.size() ) LOGGER.warn( "Index " + index + " has " + index.numberOfDocuments + " documents, but the document collection has size " + documentCollection.size() );			name2Index.put( index.field != null ? index.field : basenameWeight[ i ], index );		}	}	/** Parses a specification of the form <samp>class(&lt;arg>,&hellip;)[:weight]</samp> and returns the weight	 *  (1 if missing) as result, assigning the just created object in the given index of the given array.	 *  The arguments are all considered as strings.	 * 	 * @param spec the specification.	 * @param array the array where the object is going to be stored.	 * @param index the offset within the array.	 * @return the weight (1 if missing).	 */	@SuppressWarnings("unchecked")	private static <S> double loadClassFromSpec( String spec, final S[] array, final int index ) throws IllegalArgumentException, InstantiationException, IllegalAccessException, InvocationTargetException {		int pos = spec.indexOf( ':' );		Class<S> type = (Class<S>)array.getClass().getComponentType();		double weightSpec = 1;		if ( pos >= 0 ) {			try {				weightSpec = Double.parseDouble ( spec.substring( pos + 1 ) );			} catch ( NumberFormatException e ) {				throw new IllegalArgumentException( "Malformed weight " + spec.substring( 0, pos ) );			}			spec = spec.substring( 0, pos );		}		int endOfName = spec.indexOf( '(' );		if ( endOfName < 0 ) endOfName = spec.length();		Class<? extends S> scorerClass = null;		try {			scorerClass = (Class<? extends S>)Class.forName (spec.substring(0, endOfName));			if ( ! type.isAssignableFrom( scorerClass ) ) throw new ClassCastException( "Class " + scorerClass.getSimpleName() + " is not assignable to " + type );		} catch ( ClassNotFoundException e ) {			try {				scorerClass = (Class<? extends S>)Class.forName( "it.unimi.dsi.mg4j.search.score." + spec.substring( 0, endOfName ) );				if ( ! type.isAssignableFrom( scorerClass ) ) throw new ClassCastException( "Class " + scorerClass.getSimpleName() + " is not assignable to " + type );			} catch ( ClassNotFoundException e1 ) {				throw new IllegalArgumentException( "Unknown or improper class " + "[it.unimi.dsi.mg4j.search.score.]" + spec.substring( 0, endOfName ) );			}		}		String[] args = new String[ 0 ];		if ( endOfName < spec.length() ) {			if ( spec.charAt( spec.length() - 1 ) != ')' ) throw new IllegalArgumentException( ") missing at the end of argument list" );			args = spec.substring( endOfName + 1 , spec.length() - 1 ).split( "," );		}		Class[] argTypes = new Class[ args.length ];		for ( int i = 0; i < argTypes.length; i++ ) argTypes[ i ] = String.class;		Constructor constr;		try {			constr = scorerClass.getConstructor( argTypes );		} catch ( Exception e ) {			throw new IllegalArgumentException( "No constructor with " + argTypes.length + " strings as argument for class " + scorerClass.getName() + ": " + e );		}		array[ index ] = (S)constr.newInstance( (Object [])args );		return weightSpec;	}		/** Interpret the given command, changing the static variables.	 *  See the help printing code for possible commands.	 * 	 * @param line the command line.	 * @return false iff we should exit after this command.	 */	public boolean interpretCommand( final String line ) {		String[] part = line.substring( 1 ).split( "[ \t\n\r]+" );		final Command command;		int i;		if ( part[ 0 ].length() == 0 ) {			System.err.println( "$                                                       prints this help." );			System.err.println( "$mode [time|short|long|snippet|trec <topicNo> <runTag>] chooses display mode." );			System.err.println( "$select [<maxIntervals> <maxLength>] [all]              installs or removes an interval selector." );			System.err.println( "$limit <max>                                            output at most <max> results per query." );			System.err.println( "$divert [<filename>]                                    diverts output to <filename> or to stdout." );			System.err.println( "$weight {index:weight}                                  set index weights (unspecified weights are set to 1)." );			System.err.println( "$mplex [<on>|<off>]                                     set/unset multiplex mode." );			System.err.println( "$equalize <sample>                                      equalize scores using the given sample size." );			System.err.println( "$score {<scorerClass>(<arg>,...)[:<weight>]}            order documents according to <scorerClass>." );			System.err.println( "$expand {<expanderClass>(<arg>,...)}                    expand terms and prefixes according to <expanderClass>." );			System.err.println( "$quit                                                   quits." );			return true;		}		try {			command = Command.valueOf( part[ 0 ].toUpperCase() );		}		catch( IllegalArgumentException e ) {			System.err.println( "Invalid command \"" + part[ 0 ] + "\"; type $ for help." );			return true;		}		switch( command ) {		case MODE:			if ( part.length >= 2 ) {				try {					final OutputType tempMode = OutputType.valueOf( part[ 1 ].toUpperCase() );					if ( tempMode != OutputType.TREC && part.length > 2 ) System.err.println( "Extra arguments." ); 					else if ( tempMode == OutputType.TREC && part.length != 4 ) System.err.println( "Missing or extra arguments." ); 					else {						displayMode = tempMode;						if ( displayMode == OutputType.TREC ) {							trecTopicNumber = Integer.parseInt( part[ 2 ] );							trecRunTag = part[ 3 ];						}					}				}				catch( IllegalArgumentException e ) {					System.err.println( "Unknown mode: " + part[ 1 ] );				}			}			else System.err.println( "Missing mode." );			break;					case LIMIT:			int out = -1;			if ( part.length == 2 ) {				try {					out = Integer.parseInt( part[ 1 ] );				}				catch( NumberFormatException e ) {}				if ( out >= 0 ) maxOutput = out;			}			if ( out < 0 ) System.err.println( "Missing or incorrect limit." );			break;					case SELECT:			int maxIntervals = -1, maxLength = -1;			if ( part.length == 1 ) {				queryEngine.intervalSelector = null;				System.err.println( "Intervals have been disabled." );

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -