📄 queryparser.jj
字号:
/* ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2001 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache" and "Apache Software Foundation" and * "Apache Lucene" must not be used to endorse or promote products * derived from this software without prior written permission. For * written permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache", * "Apache Lucene", nor may "Apache" appear in their name, without * prior written permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. */options { STATIC=false; JAVA_UNICODE_ESCAPE=true; USER_CHAR_STREAM=true;}PARSER_BEGIN(QueryParser)package org.apache.lucene.queryParser;import java.util.Vector;import java.io.*;import org.apache.lucene.index.Term;import org.apache.lucene.analysis.*;import org.apache.lucene.search.*;/** * This class is generated by JavaCC. The only method that clients should need * to call is <a href="#parse">parse()</a>. * * The syntax for query strings is as follows: * A Query is a series of clauses. * A clause may be prefixed by: * <ul> * <li> a plus (<code>+</code>) or a minus (<code>-</code>) sign, indicating * that the clause is required or prohibited respectively; or * <li> a term followed by a colon, indicating the field to be searched. * This enables one to construct queries which search multiple fields. * </ul> * * A clause may be either: * <ul> * <li> a term, indicating all the documents that contain this term; or * <li> a nested query, enclosed in parentheses. Note that this may be used * with a <code>+</code>/<code>-</code> prefix to require any of a set of * terms. * </ul> * * Thus, in BNF, the query grammar is: * <pre> * Query ::= ( Clause )* * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" ) * </pre> * * <p> * Examples of appropriately formatted queries can be found in the <a * href="http://jakarta.apache.org/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java">test cases</a>. * </p> * * @author Brian Goetz */public class QueryParser { /** Parses a query string, returning a {@link org.apache.lucene.search.Query}. * @param query the query string to be parsed. * @param field the default field for query terms. * @param analyzer used to find terms in the query text. * @throws ParseException if the parsing fails */ static public Query parse(String query, String field, Analyzer analyzer) throws ParseException { try { QueryParser parser = new QueryParser(field, analyzer); return parser.parse(query); } catch (TokenMgrError tme) { throw new ParseException(tme.getMessage()); } } Analyzer analyzer; String field; int phraseSlop = 0; /** Constructs a query parser. * @param field the default field for query terms. * @param analyzer used to find terms in the query text. */ public QueryParser(String f, Analyzer a) { this(new FastCharStream(new StringReader(""))); analyzer = a; field = f; } /** Parses a query string, returning a * <a href="lucene.search.Query.html">Query</a>. * @param query the query string to be parsed. * @throws ParseException if the parsing fails * @throws TokenMgrError if ther parsing fails */ public Query parse(String query) throws ParseException, TokenMgrError { ReInit(new FastCharStream(new StringReader(query))); return Query(field); } /** Sets the default slop for phrases. If zero, then exact phrase matches are required. Zero by default. */ public void setPhraseSlop(int s) { phraseSlop = s; } /** Gets the default slop for phrases. */ public int getPhraseSlop() { return phraseSlop; } private void addClause(Vector clauses, int conj, int mods, Query q) { boolean required, prohibited; // If this term is introduced by AND, make the preceding term required, // unless it's already prohibited if (conj == CONJ_AND) { BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1); if (!c.prohibited) c.required = true; } // We might have been passed a null query; the term might have been // filtered away by the analyzer. if (q == null) return; // We set REQUIRED if we're introduced by AND or +; PROHIBITED if // introduced by NOT or -; make sure not to set both. prohibited = (mods == MOD_NOT); required = (mods == MOD_REQ); if (conj == CONJ_AND && !prohibited) required = true; clauses.addElement(new BooleanClause(q, required, prohibited)); } private Query getFieldQuery(String field, Analyzer analyzer, String queryText) { // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count TokenStream source = analyzer.tokenStream(field, new StringReader(queryText)); Vector v = new Vector(); org.apache.lucene.analysis.Token t; while (true) { try { t = source.next(); } catch (IOException e) { t = null; } if (t == null) break; v.addElement(t.termText()); } if (v.size() == 0) return null; else if (v.size() == 1) return new TermQuery(new Term(field, (String) v.elementAt(0))); else { PhraseQuery q = new PhraseQuery(); q.setSlop(phraseSlop); for (int i=0; i<v.size(); i++) { q.add(new Term(field, (String) v.elementAt(i))); } return q; } } private Query getRangeQuery(String field, Analyzer analyzer, String queryText, boolean inclusive) { // Use the analyzer to get all the tokens. There should be 1 or 2. TokenStream source = analyzer.tokenStream(field, new StringReader(queryText)); Term[] terms = new Term[2]; org.apache.lucene.analysis.Token t; for (int i = 0; i < 2; i++) { try { t = source.next(); } catch (IOException e) { t = null; } if (t != null) { String text = t.termText(); if (!text.equalsIgnoreCase("NULL")) { terms[i] = new Term(field, text); } } } return new RangeQuery(terms[0], terms[1], inclusive); } public static void main(String[] args) throws Exception { QueryParser qp = new QueryParser("field", new org.apache.lucene.analysis.SimpleAnalyzer()); Query q = qp.parse(args[0]); System.out.println(q.toString("field")); } private static final int CONJ_NONE = 0; private static final int CONJ_AND = 1; private static final int CONJ_OR = 2; private static final int MOD_NONE = 0; private static final int MOD_NOT = 10; private static final int MOD_REQ = 11;}PARSER_END(QueryParser)/* ***************** *//* Token Definitions *//* ***************** */<*> TOKEN : { <#_NUM_CHAR: ["0"-"9"] >| <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^", "[", "]", "\"", "{", "}", "~", "*", "?" ] >| <#_TERM_START_CHAR: ( ~[ " ", "\t", "+", "-", "!", "(", ")", ":", "^", "[", "]", "\"", "{", "}", "~", "*", "?" ] | <_ESCAPED_CHAR> ) >| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> ) >| <#_WHITESPACE: ( " " | "\t" ) >}<DEFAULT> SKIP : { <<_WHITESPACE>>}<DEFAULT> TOKEN : { <AND: ("AND" | "&&") >| <OR: ("OR" | "||") >| <NOT: ("NOT" | "!") >| <PLUS: "+" >| <MINUS: "-" >| <LPAREN: "(" >| <RPAREN: ")" >| <COLON: ":" >| <CARAT: "^" > : Boost| <QUOTED: "\"" (~["\""])+ "\"">| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >| <FUZZY: "~" >| <SLOP: "~" (<_NUM_CHAR>)+ >| <PREFIXTERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" >| <WILDTERM: <_TERM_START_CHAR> (<_TERM_CHAR> | ( [ "*", "?" ] ))* >| <RANGEIN: "[" ( ~[ "]" ] )+ "]">| <RANGEEX: "{" ( ~[ "}" ] )+ "}">}<Boost> TOKEN : {<NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT}// * Query ::= ( Clause )*// * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )int Conjunction() : { int ret = CONJ_NONE;}{ [ <AND> { ret = CONJ_AND; } | <OR> { ret = CONJ_OR; } ] { return ret; }}int Modifiers() : { int ret = MOD_NONE;}{ [ <PLUS> { ret = MOD_REQ; } | <MINUS> { ret = MOD_NOT; } | <NOT> { ret = MOD_NOT; } ] { return ret; }}Query Query(String field) :{ Vector clauses = new Vector(); Query q, firstQuery=null; int conj, mods; }{ mods=Modifiers() q=Clause(field) { addClause(clauses, CONJ_NONE, mods, q); if (mods == MOD_NONE) firstQuery=q; } ( conj=Conjunction() mods=Modifiers() q=Clause(field) { addClause(clauses, conj, mods, q); } )* { if (clauses.size() == 1 && firstQuery != null) return firstQuery; else { BooleanQuery query = new BooleanQuery(); for (int i = 0; i < clauses.size(); i++) query.add((BooleanClause)clauses.elementAt(i)); return query; } }}Query Clause(String field) : { Query q; Token fieldToken=null;}{ [ LOOKAHEAD(2) fieldToken=<TERM> <COLON> { field = fieldToken.image; } ] ( q=Term(field) | <LPAREN> q=Query(field) <RPAREN> ) { return q; }} Query Term(String field) : { Token term, boost=null, slop=null; boolean prefix = false; boolean wildcard = false; boolean fuzzy = false; boolean rangein = false; Query q;}{ ( ( term=<TERM> | term=<PREFIXTERM> { prefix=true; } | term=<WILDTERM> { wildcard=true; } | term=<NUMBER> ) [ <FUZZY> { fuzzy=true; } ] [ <CARAT> boost=<NUMBER> [ <FUZZY> { fuzzy=true; } ] ] { if (wildcard) q = new WildcardQuery(new Term(field, term.image)); else if (prefix) q = new PrefixQuery(new Term(field, term.image.substring (0, term.image.length()-1))); else if (fuzzy) q = new FuzzyQuery(new Term(field, term.image)); else q = getFieldQuery(field, analyzer, term.image); } | ( term=<RANGEIN> { rangein=true; } | term=<RANGEEX> ) [ <CARAT> boost=<NUMBER> ] { q = getRangeQuery(field, analyzer, term.image.substring(1, term.image.length()-1), rangein); } | term=<QUOTED> [ slop=<SLOP> ] [ <CARAT> boost=<NUMBER> ] { q = getFieldQuery(field, analyzer, term.image.substring(1, term.image.length()-1)); if (slop != null && q instanceof PhraseQuery) { try { int s = Float.valueOf(slop.image.substring(1)).intValue(); ((PhraseQuery) q).setSlop(s); } catch (Exception ignored) { } } } ) { if (boost != null) { float f = (float) 1.0; try { f = Float.valueOf(boost.image).floatValue(); } catch (Exception ignored) { } q.setBoost(f); } return q; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -