⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 simpleparser.jj

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JJ
📖 第 1 页 / 共 2 页
字号:
options {	STATIC = false;	UNICODE_INPUT = true;}PARSER_BEGIN(SimpleParser)	package it.unimi.dsi.mg4j.query.parser;	import it.unimi.dsi.lang.*;	import it.unimi.dsi.mg4j.index.*;	import it.unimi.dsi.mg4j.query.nodes.*;	import it.unimi.dsi.fastutil.objects.*;	import it.unimi.dsi.fastutil.ints.*;	import java.util.*;/*		  * MG4J: Managing Gigabytes for Java * * Copyright (C) 2006-2007 Paolo Boldi and Sebastiano Vigna * *  This library is free software; you can redistribute it and/or modify it *  under the terms of the GNU Lesser General Public License as published by the Free *  Software Foundation; either version 2.1 of the License, or (at your option) *  any later version. * *  This library is distributed in the hope that it will be useful, but *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License *  for more details. * *  You should have received a copy of the GNU Lesser General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * *//** A simple parser that transform a query string into a {@linkplain it.unimi.dsi.mg4j.query.nodes query}.  * *  <P>The parser supports multiple indices. You must provide a set of  *  <em>index aliases</em> that the user will use to select indices, and the name of the default index *  alias to be used. After that, you parse a query by using the {@link #parse(String)} method. * * <P>The parser generated by JavaCC for this class will break terms using operators and * nonspace-to-space transitions. Operators can be included in terms, if needed, using * the backslash, which acts as an escape character, and makes the next character  * (usually an operator) a standard character. The syntax of the parser can be seen in action * in the documentation of the package {@link it.unimi.dsi.mg4j.search}. *  * <P>The parser returns a {@link it.unimi.dsi.mg4j.query.nodes.Query}&mdash;an abstract * representation of the query string that can be turned later into a  * {@link it.unimi.dsi.mg4j.search.DocumentIterator}. * * <P>If a {@link TermProcessor} is specified, it will be applied to the terms found in the * query string. This can lead to transformations (e.g., downcasing) or generate an exception * if the query string contains terms filtered by the term processor. * * @author Sebastiano Vigna * @author Paolo Boldi * @since 1.0.1 * */public class SimpleParser implements QueryParser {	    /** The set of index aliases. */		public Set<String> indices;		/** The default index alias. */		public String defaultIndex;		/** The term processor for each index. */		public Map<String,? extends TermProcessor> termProcessors;		/** The query visitor used to check for Select nodes. */		private CheckForSelectQueryVisitor visitor;		private final static boolean DEBUG = false;	    /** Creates a parser. 	     *	     *   @param indices the set of index aliases.	     *   @param defaultIndex the default index alias to be used when parsing the query.	     *   @param termProcessors a map from index aliases to the corresponding term processor, or <code>null</code>	     *		for no term processing.	     */	    public SimpleParser( final Set<String> indices, final String defaultIndex, final Map<String,? extends TermProcessor> termProcessors ) {			this( new java.io.StringReader( "" ) ); // Just for initialization purposes...			this.indices = indices;	    	this.defaultIndex = defaultIndex;	    	this.termProcessors = termProcessors;	    	this.visitor = new CheckForSelectQueryVisitor( defaultIndex );	    }	    /** Creates a parser with no term processing.	     *	     *   @param indices the set of index aliases.	     *   @param defaultIndex the default index alias to be used when parsing the query.	     */	    public SimpleParser( final Set<String> indices, final String defaultIndex ) {			this( indices, defaultIndex, null );	    }	    /** Creates a parser for a single nameless index with no term processing.		 *		 *  <P>Parsers created by this constructor allow only nameless access,		 *  both in the query and in the interval-iterator methods.	     */	    public SimpleParser() {			this( ObjectSets.singleton( "" ), "" );	    }	    /** Creates a parser for a single nameless index with a given term processor.		 *		 *  <P>Parsers created by this constructor allow only nameless access,		 *  both in the query and in the interval-iterator methods.	     */	    public SimpleParser( final TermProcessor termProcessor ) {			this( ObjectSets.singleton( "" ), "", Object2ObjectMaps.singleton( "", termProcessor ) );	    }	    	    public SimpleParser copy() {	    	return new SimpleParser( indices, defaultIndex, termProcessors );	    }	   /** Parses the given query, returning the corresponding query result.	     *  @param text the query to be parsed.	     *  @return an abstract representation of <code>query</code>. 	     *   @throws QueryParserException if a parse exception has taken place during query parsing.	     *   @throws TokenMgrError if a tokenization exception has taken place during query parsing.	     */	   public Query parse( String text ) throws QueryParserException {			ReInit( new java.io.StringReader( text ) );			try {				final Query query = query( defaultIndex );				try {					visitor.prepare();					if ( query.accept( visitor ) == null ) throw new ParseException( visitor.errorMessage );				}				catch( QueryBuilderVisitorException e ) {}				return query;			}			catch( ParseException e ) {				throw new QueryParserException( e );			}	   }	}PARSER_END(SimpleParser)/** Lexer. */// This stuff separates termsSKIP: { " " | "\t" | "\n" | "\r" }// OperatorsTOKEN: { < AND: "AND" | "&"  > }TOKEN: { < OAND: "<"  > }TOKEN: { < OR: "OR" | "|" > }TOKEN: { < NOT: "NOT" | "!" > }TOKEN: { < ALIGN: "^" > }TOKEN: { < OPEN_PAREN: "(" > }TOKEN: { < CLOSE_PAREN: ")" > }TOKEN: { < OPEN_RANGE: "[" > }TOKEN: { < CLOSE_RANGE: "]" > } // Brokeback MountainTOKEN: { < OPEN_ENLARGE: "[[" > }TOKEN: { < CLOSE_ENLARGE: "]]" > }TOKEN: { < QUOTE: "\"" > }TOKEN: { < COLON: ":" > }TOKEN: { < TILDA: "~" > }TOKEN: { < PLUS: "+" > }TOKEN: { < MINUS: "-" > }TOKEN: { < SHARP: "#" > } TOKEN: { < PREFIX: "*" > } TOKEN: { < HOLE: "$" > } TOKEN: { < INTERVAL_SEPARATOR: ".." > }/* A word is a sequence of word characters (non-operator and non-control characters)  * and escaped characters (anything prefixed with a backslash). Note that it would * be nice to use a separate token for these two classes of characters, but then * JavaCC refuses to recognize words made by one character (a bug? a feature?). */ TOKEN:	{	< WORD: ( 						( ~[ "&", "$", "<", "|", "!", "(", ")", "[", "]", ",", "\"", ":", "~", "#", "-", "+", "*", "\\", "^", "\u0000"-"\u0020" ] )					|	( "\\" ~["\u0000"-"\u0020"] ) 					)+ 			> 		}/** Parser. *//** Starting rule for a difference query.  *   @param indexAlias the index alias for the default index to be used for the query that is going to be parsed. *   @return the result of the query. */Query query( String indexAlias ):{ 	Query minuend, subtrahend = null;	Token leftMargin = null, rightMargin = null;}{	minuend = orQuery( indexAlias )	[ <MINUS>		{ if ( DEBUG ) System.err.println( "Building difference query" ); }		[ <OPEN_ENLARGE> leftMargin = <WORD> <COLON> rightMargin = <WORD> <CLOSE_ENLARGE> ]		{ subtrahend = orQuery( indexAlias ); }	]	{ 		if ( subtrahend == null ) return minuend;		return new Difference( minuend, subtrahend, leftMargin == null ? 0 : Integer.parseInt( leftMargin.image ), rightMargin == null ? 0 : Integer.parseInt( rightMargin.image ) );	}}/** Starting rule for a OR-query.  *   @param indexAlias the index alias for the default index to be used for the query that is going to be parsed. *   @return the result of the query. */Query orQuery( String indexAlias ):{ 	Query res;	ObjectArrayList<Query> qrm = new ObjectArrayList<Query>(); }{	res = orderedAndQuery( indexAlias )	{ qrm.add( res ); }	(		<OR>		{ if ( DEBUG ) System.err.println( "Building OR query" ); }		res = orderedAndQuery( indexAlias )		{   qrm.add( res ); }	)*		{ 		if ( qrm.size() == 1 ) return res;		return new Or( qrm.toArray( Queries.EMPTY_ARRAY ) );	}}/** Starting rule for an AND-query (the AND token is optional).  *   @param indexAlias the index alias for the default index to be used for the query that is going to be parsed. *   @return the result of the query. */Query andQuery( String indexAlias ):{ 	Query res;	ObjectArrayList<Query> qrm = new ObjectArrayList<Query>(); }{	res = multiTermQuery( indexAlias )	{ if ( DEBUG ) System.err.println( "Building AND query: " + res ); }	{ qrm.add( res ); }	(		[ <AND> ]		res = multiTermQuery( indexAlias )

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -