📄 simpleparser.jj
字号:
options { STATIC = false; UNICODE_INPUT = true;}PARSER_BEGIN(SimpleParser) package it.unimi.dsi.mg4j.query.parser; import it.unimi.dsi.lang.*; import it.unimi.dsi.mg4j.index.*; import it.unimi.dsi.mg4j.query.nodes.*; import it.unimi.dsi.fastutil.objects.*; import it.unimi.dsi.fastutil.ints.*; import java.util.*;/* * MG4J: Managing Gigabytes for Java * * Copyright (C) 2006-2007 Paolo Boldi and Sebastiano Vigna * * This library is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published by the Free * Software Foundation; either version 2.1 of the License, or (at your option) * any later version. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * *//** A simple parser that transform a query string into a {@linkplain it.unimi.dsi.mg4j.query.nodes query}. * * <P>The parser supports multiple indices. You must provide a set of * <em>index aliases</em> that the user will use to select indices, and the name of the default index * alias to be used. After that, you parse a query by using the {@link #parse(String)} method. * * <P>The parser generated by JavaCC for this class will break terms using operators and * nonspace-to-space transitions. Operators can be included in terms, if needed, using * the backslash, which acts as an escape character, and makes the next character * (usually an operator) a standard character. The syntax of the parser can be seen in action * in the documentation of the package {@link it.unimi.dsi.mg4j.search}. * * <P>The parser returns a {@link it.unimi.dsi.mg4j.query.nodes.Query}—an abstract * representation of the query string that can be turned later into a * {@link it.unimi.dsi.mg4j.search.DocumentIterator}. * * <P>If a {@link TermProcessor} is specified, it will be applied to the terms found in the * query string. This can lead to transformations (e.g., downcasing) or generate an exception * if the query string contains terms filtered by the term processor. * * @author Sebastiano Vigna * @author Paolo Boldi * @since 1.0.1 * */public class SimpleParser implements QueryParser { /** The set of index aliases. */ public Set<String> indices; /** The default index alias. */ public String defaultIndex; /** The term processor for each index. */ public Map<String,? extends TermProcessor> termProcessors; /** The query visitor used to check for Select nodes. */ private CheckForSelectQueryVisitor visitor; private final static boolean DEBUG = false; /** Creates a parser. * * @param indices the set of index aliases. * @param defaultIndex the default index alias to be used when parsing the query. * @param termProcessors a map from index aliases to the corresponding term processor, or <code>null</code> * for no term processing. */ public SimpleParser( final Set<String> indices, final String defaultIndex, final Map<String,? extends TermProcessor> termProcessors ) { this( new java.io.StringReader( "" ) ); // Just for initialization purposes... this.indices = indices; this.defaultIndex = defaultIndex; this.termProcessors = termProcessors; this.visitor = new CheckForSelectQueryVisitor( defaultIndex ); } /** Creates a parser with no term processing. * * @param indices the set of index aliases. * @param defaultIndex the default index alias to be used when parsing the query. */ public SimpleParser( final Set<String> indices, final String defaultIndex ) { this( indices, defaultIndex, null ); } /** Creates a parser for a single nameless index with no term processing. * * <P>Parsers created by this constructor allow only nameless access, * both in the query and in the interval-iterator methods. */ public SimpleParser() { this( ObjectSets.singleton( "" ), "" ); } /** Creates a parser for a single nameless index with a given term processor. * * <P>Parsers created by this constructor allow only nameless access, * both in the query and in the interval-iterator methods. */ public SimpleParser( final TermProcessor termProcessor ) { this( ObjectSets.singleton( "" ), "", Object2ObjectMaps.singleton( "", termProcessor ) ); } public SimpleParser copy() { return new SimpleParser( indices, defaultIndex, termProcessors ); } /** Parses the given query, returning the corresponding query result. * @param text the query to be parsed. * @return an abstract representation of <code>query</code>. * @throws QueryParserException if a parse exception has taken place during query parsing. * @throws TokenMgrError if a tokenization exception has taken place during query parsing. */ public Query parse( String text ) throws QueryParserException { ReInit( new java.io.StringReader( text ) ); try { final Query query = query( defaultIndex ); try { visitor.prepare(); if ( query.accept( visitor ) == null ) throw new ParseException( visitor.errorMessage ); } catch( QueryBuilderVisitorException e ) {} return query; } catch( ParseException e ) { throw new QueryParserException( e ); } } }PARSER_END(SimpleParser)/** Lexer. */// This stuff separates termsSKIP: { " " | "\t" | "\n" | "\r" }// OperatorsTOKEN: { < AND: "AND" | "&" > }TOKEN: { < OAND: "<" > }TOKEN: { < OR: "OR" | "|" > }TOKEN: { < NOT: "NOT" | "!" > }TOKEN: { < ALIGN: "^" > }TOKEN: { < OPEN_PAREN: "(" > }TOKEN: { < CLOSE_PAREN: ")" > }TOKEN: { < OPEN_RANGE: "[" > }TOKEN: { < CLOSE_RANGE: "]" > } // Brokeback MountainTOKEN: { < OPEN_ENLARGE: "[[" > }TOKEN: { < CLOSE_ENLARGE: "]]" > }TOKEN: { < QUOTE: "\"" > }TOKEN: { < COLON: ":" > }TOKEN: { < TILDA: "~" > }TOKEN: { < PLUS: "+" > }TOKEN: { < MINUS: "-" > }TOKEN: { < SHARP: "#" > } TOKEN: { < PREFIX: "*" > } TOKEN: { < HOLE: "$" > } TOKEN: { < INTERVAL_SEPARATOR: ".." > }/* A word is a sequence of word characters (non-operator and non-control characters) * and escaped characters (anything prefixed with a backslash). Note that it would * be nice to use a separate token for these two classes of characters, but then * JavaCC refuses to recognize words made by one character (a bug? a feature?). */ TOKEN: { < WORD: ( ( ~[ "&", "$", "<", "|", "!", "(", ")", "[", "]", ",", "\"", ":", "~", "#", "-", "+", "*", "\\", "^", "\u0000"-"\u0020" ] ) | ( "\\" ~["\u0000"-"\u0020"] ) )+ > }/** Parser. *//** Starting rule for a difference query. * @param indexAlias the index alias for the default index to be used for the query that is going to be parsed. * @return the result of the query. */Query query( String indexAlias ):{ Query minuend, subtrahend = null; Token leftMargin = null, rightMargin = null;}{ minuend = orQuery( indexAlias ) [ <MINUS> { if ( DEBUG ) System.err.println( "Building difference query" ); } [ <OPEN_ENLARGE> leftMargin = <WORD> <COLON> rightMargin = <WORD> <CLOSE_ENLARGE> ] { subtrahend = orQuery( indexAlias ); } ] { if ( subtrahend == null ) return minuend; return new Difference( minuend, subtrahend, leftMargin == null ? 0 : Integer.parseInt( leftMargin.image ), rightMargin == null ? 0 : Integer.parseInt( rightMargin.image ) ); }}/** Starting rule for a OR-query. * @param indexAlias the index alias for the default index to be used for the query that is going to be parsed. * @return the result of the query. */Query orQuery( String indexAlias ):{ Query res; ObjectArrayList<Query> qrm = new ObjectArrayList<Query>(); }{ res = orderedAndQuery( indexAlias ) { qrm.add( res ); } ( <OR> { if ( DEBUG ) System.err.println( "Building OR query" ); } res = orderedAndQuery( indexAlias ) { qrm.add( res ); } )* { if ( qrm.size() == 1 ) return res; return new Or( qrm.toArray( Queries.EMPTY_ARRAY ) ); }}/** Starting rule for an AND-query (the AND token is optional). * @param indexAlias the index alias for the default index to be used for the query that is going to be parsed. * @return the result of the query. */Query andQuery( String indexAlias ):{ Query res; ObjectArrayList<Query> qrm = new ObjectArrayList<Query>(); }{ res = multiTermQuery( indexAlias ) { if ( DEBUG ) System.err.println( "Building AND query: " + res ); } { qrm.add( res ); } ( [ <AND> ] res = multiTermQuery( indexAlias )
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -