📄 queryservlet.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
字号:
package it.unimi.dsi.mg4j.query;/*		  * MG4J: Managing Gigabytes for Java * * Copyright (C) 2005-2007 Sebastiano Vigna  * *  This program is free software; you can redistribute it and/or modify it *  under the terms of the GNU General Public License as published by the Free *  Software Foundation; either version 2 of the License, or (at your option) *  any later version. * *  This program is distributed in the hope that it will be useful, but *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License *  for more details. * *  You should have received a copy of the GNU General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */import it.unimi.dsi.Util;import it.unimi.dsi.fastutil.objects.Object2ReferenceMap;import it.unimi.dsi.fastutil.objects.ObjectArrayList;import it.unimi.dsi.fastutil.objects.Reference2ObjectMap;import it.unimi.dsi.lang.MutableString;import it.unimi.dsi.mg4j.document.Document;import it.unimi.dsi.mg4j.document.DocumentCollection;import it.unimi.dsi.mg4j.document.DocumentFactory;import it.unimi.dsi.mg4j.index.Index;import it.unimi.dsi.mg4j.query.nodes.QueryBuilderVisitorException;import it.unimi.dsi.mg4j.query.parser.QueryParserException;import it.unimi.dsi.mg4j.search.score.DocumentScoreInfo;import java.io.File;import java.io.Reader;import java.net.URLEncoder;import java.util.Arrays;import java.util.Comparator;import java.util.Iterator;import java.util.List;import javax.servlet.ServletContext;import javax.servlet.ServletException;import javax.servlet.http.HttpServletRequest;import javax.servlet.http.HttpServletResponse;import org.apache.commons.lang.StringEscapeUtils;import org.apache.log4j.Logger;import org.apache.velocity.Template;import org.apache.velocity.app.Velocity;import org.apache.velocity.context.Context;import org.apache.velocity.tools.view.servlet.VelocityViewServlet;/** A query servlet. *  * <p>This class provides a basic servlet for searching a collection. * It expects some data (a collection, an index map and a path)  * in the {@link javax.servlet.ServletContext} (see the code for {@link #init()}). It * can be used to search in a collection, but it is essentially a worked-out example. *  * <p>The three parameters are <samp>q</samp>, the query, <samp>m</samp>, the maximum * number of results to be displayed, and <samp>s</samp>, the first result to be displayed. *  * <p>Usually, the URI associated to each result is taken from the collection. Alternatively, each * result will point to the <samp>/Item</samp> path with some query arguments (<samp>doc</samp>, containing * the document pointer, <samp>uri</samp>, containing the original URI, and <samp>m</samp>, containing * an optional suggested MIME type). See, for instance, {@link it.unimi.dsi.mg4j.query.GenericItem} and {@link it.unimi.dsi.mg4j.query.InputStreamItem}. *  * <p>The Velocity template used by this servlet can be set using the initialisation parameter * <samp>template</samp> (or using a context attribute with the same name). If you're using * this servlet via {@link HttpQueryServer}, please read the documentation therein for * information about template resolution order. *  * <p>This servlet is thread safe. Each instance uses its own flyweight copies of the * {@linkplain it.unimi.dsi.mg4j.document.DocumentCollection collection} and * {@linkplain it.unimi.dsi.mg4j.query.QueryEngine query engine} to return the result (in particular, snippets). In a production * site it might be more sensible to pool and reuse such classes. */public class QueryServlet extends VelocityViewServlet {	private static final long serialVersionUID = 1L;	private final static Logger LOGGER = Util.getLogger( QueryServlet.class );	/** Standard maximum number of items to be displayed (may be altered with the <samp>m</samp> query parameter). */	private final static int STD_MAX_NUM_ITEMS = 10;	/** The default Velocity template used by this servlet; may be overriden in the context using an attribute named <samp>template</samp>. */	protected final static String DEFAULT_TEMPLATE = "it/unimi/dsi/mg4j/query/query.velocity";	/** The actual template used by this servlet (default: {@link #DEFAULT_TEMPLATE}). */	protected String template;	/** The query engine. */	protected QueryEngine queryEngine;	/** The document collection. */	protected DocumentCollection documentCollection;	/** An optional title list if the document collection is not present. */	protected List<CharSequence> titleList;	/** A sorted map from index names to indices: the first entry is the default index. */	protected Object2ReferenceMap<String,Index> indexMap;	/** The indices of the fields specified in the index map, in increasing order (for document access).  */	private Index[] sortedIndex;	/** If not <code>null</code>, a MIME type suggested to the servlet. */	private String mimeType;	/** If true, the link associated to each item must be built using the document URI. */	private boolean useUri;	/** If true, URIs are files that should be derelativised. */	private boolean derelativise;		@SuppressWarnings("unchecked")	public void init() throws ServletException {		ServletContext context = getServletContext();		if ( ( template = (String)getServletContext().getAttribute( "template" ) ) == null &&				( template = getInitParameter( "template" ) ) == null ) template = DEFAULT_TEMPLATE; 		queryEngine = (QueryEngine)context.getAttribute( "queryEngine" );		documentCollection = (DocumentCollection)context.getAttribute( "collection" );		titleList = (List<CharSequence>)context.getAttribute( "titleList" );		indexMap = queryEngine.indexMap;		mimeType = (String)context.getAttribute( "mimeType" );		useUri = context.getAttribute( "uri" ) == Boolean.TRUE;		derelativise = context.getAttribute( "derelativise" ) == Boolean.TRUE;		if ( documentCollection != null ) {			sortedIndex = new Index[ indexMap.size() ];			indexMap.values().toArray( sortedIndex );			Arrays.sort( sortedIndex, new Comparator<Index>() {				public int compare( final Index x, final Index y ) {					return documentCollection.factory().fieldIndex( x.field ) - documentCollection.factory().fieldIndex( y.field );				}			});		}	}		public Template handleRequest( final HttpServletRequest request, final HttpServletResponse response, final Context context ) {    		try {			response.setCharacterEncoding( "UTF-8" );						// This string is URL-encoded, and with the wrong coding.			//String query = request.getParameter( "q" ) != null ? new String( request.getParameter( "q" ).getBytes( "ISO-8859-1" ), "UTF-8" ) : null;			String query = request.getParameter( "q" );			context.put( "action", request.getContextPath() + request.getServletPath() );						// Sanitise parameters.			int start = 0, maxNumItems = STD_MAX_NUM_ITEMS;			try { maxNumItems = Integer.parseInt( request.getParameter( "m" ) ); } catch( NumberFormatException dontCare ) {}			try { start = Integer.parseInt( request.getParameter( "s" ) ); } catch( NumberFormatException dontCare ) {}						if ( maxNumItems < 0 || maxNumItems > 1000 ) maxNumItems = STD_MAX_NUM_ITEMS;			if ( start < 0 ) start = 0;        				if ( query != null && query.length() != 0 ) {								// This is used to display again the query in the input control.				context.put( "q", StringEscapeUtils.escapeHtml( query ) );				// This is used to put the query in URLs.				context.put( "qUrl", URLEncoder.encode( query, "UTF-8" ) );				context.put( "firstItem", new Integer( start ) );				// First of all, we check that the query is correct				long time = -System.currentTimeMillis();				ObjectArrayList<DocumentScoreInfo<Reference2ObjectMap<Index,SelectedInterval[]>>> results = new ObjectArrayList<DocumentScoreInfo<Reference2ObjectMap<Index,SelectedInterval[]>>>();				int globNumItems;				try {					globNumItems = queryEngine.copy().process( query, start, maxNumItems, results );				}				catch( QueryBuilderVisitorException e ) {					context.put( "errmsg", StringEscapeUtils.escapeHtml( e.getCause().toString() ) );					return Velocity.getTemplate( template );				}				catch( QueryParserException e ) {					context.put( "errmsg", StringEscapeUtils.escapeHtml( e.getCause().toString() ) );					return Velocity.getTemplate( template );				}				catch( Exception e ) {					context.put( "errmsg", StringEscapeUtils.escapeHtml( e.toString() ) );					return Velocity.getTemplate( template );				}				time += System.currentTimeMillis();				ObjectArrayList<ResultItem> resultItems = new ObjectArrayList<ResultItem>();				if ( ! results.isEmpty() ) {					SelectedInterval[] selectedInterval = null;					final DocumentCollection collection = documentCollection != null ? documentCollection.copy() : null;					for( int i = 0; i < results.size(); i++ ) {						DocumentScoreInfo<Reference2ObjectMap<Index,SelectedInterval[]>> dsi = results.get( i );						LOGGER.debug( "Intervals for item " + i );						final ResultItem resultItem = new ResultItem( dsi.document, dsi.score );						resultItems.add( resultItem );						if ( collection != null ) {							final Document document = collection.document( dsi.document );							// If both collection and title list are present, we override the collection title (cfr. Query)							resultItem.title = StringEscapeUtils.escapeHtml( titleList != null ? titleList.get( resultItem.doc ).toString() : document.title().toString() );							if ( useUri ) {								if ( document.uri() != null ) resultItem.uri = StringEscapeUtils.escapeHtml( document.uri().toString() );							}							else {								if ( document.uri() != null ) {									String stringUri = document.uri().toString();									// TODO: this is a quick patch to get the file server running with relative files									final String documentUri = derelativise									? StringEscapeUtils.escapeHtml( new File( stringUri.startsWith( "file:" ) ? stringUri.substring( 5 ) : stringUri ).getAbsoluteFile().toURI().toASCIIString() )											: StringEscapeUtils.escapeHtml( document.uri().toString() );									resultItem.uri = new MutableString( "./Item?doc=" ).append( resultItem.doc ).append( "&m=" ).append( mimeType ).append( "&uri=" ).append( documentUri );								}								else resultItem.uri = new MutableString( "./Item?doc=" ).append( resultItem.doc ).append( "&m=" ).append( mimeType );							}														MarkingMutableString snippet = new MarkingMutableString( TextMarker.HTML_STRONG, MarkingMutableString.HTML_ESCAPE ); 														for( int j = 0; j < sortedIndex.length; j++ ) {								if ( ! sortedIndex[ j ].hasPositions || dsi.info == null ) continue;								selectedInterval = dsi.info.get( sortedIndex[ j ] );								if ( selectedInterval != null ) {									final int field = documentCollection.factory().fieldIndex( sortedIndex[ j ].field );									// If the field is not present (e.g., because of parallel indexing) or it is not text we skip									if ( field == -1 || documentCollection.factory().fieldType( field ) != DocumentFactory.FieldType.TEXT ) continue;									LOGGER.debug( "Found intervals for " + sortedIndex[ j ].field + " (" + field + ")" );									final Reader content = (Reader)document.content( field );									snippet.startField( selectedInterval ).appendAndMark( document.wordReader( field ).setReader( content ) ).endField();								}								if ( LOGGER.isDebugEnabled() ) LOGGER.debug( sortedIndex[ j ].field + ": " + ( selectedInterval == null ? null : Arrays.asList( selectedInterval ) ) ); 								document.close();							}														resultItem.text = snippet; 						}						else {							if ( titleList != null ) {								// TODO: this is a bit radical								resultItem.title = resultItem.uri = titleList.get( resultItem.doc );							}							else {								resultItem.title = "Document #" +  resultItem.doc;								resultItem.uri = new MutableString( "./Item?doc=" ).append( resultItem.doc ).append( "&m=" ).append( mimeType );							}														MutableString text = new MutableString();							for( Iterator<Index> j = indexMap.values().iterator(); j.hasNext(); ) {								final Index index = j.next();								selectedInterval = dsi.info.get( index );								if ( selectedInterval != null )									text.append( "<p>" ).append( index.field ).append( ": " ).append( Arrays.asList( selectedInterval ) );								LOGGER.debug( index.field + ": " + ( selectedInterval == null ? null : Arrays.asList( selectedInterval ) ) ); 							}							resultItem.text = text;						}					}										if ( collection != null ) collection.close();				}								// Note that if we pass an array to the template we lose the possibility of measuring its length.				context.put( "result", resultItems );				/* Note that this number is just the number of relevant documents met while				   trying to obtain the current results. Due to the short-circuit semantics of the				   "and then" operator, it  might not reflect accurately the overall number of				   results of the query. */				context.put( "globNumItems", new Integer( globNumItems ) );				context.put( "start", new Integer( start ) );				context.put( "maxNumItems", new Integer( maxNumItems ) );				context.put( "time", new Integer( (int)time ) );				context.put( "speed", new Long( (int)( globNumItems * 1000L / ( time + 1 ) ) ) );			}			return Velocity.getTemplate( template );		}		catch( Exception e ) { 			e.printStackTrace( System.err );			return null;		}	}}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -