📄 queryparser.jj

📁 索引aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
💻 JJ
📖 第 1 页 / 共 2 页
字号:
上一页 12
   * that has just a single * character at the end)   *<p>   * Depending on settings, prefix term may be lower-cased   * automatically. It will not go through the default Analyzer,   * however, since normal Analyzers are unlikely to work properly   * with wildcard templates.   *<p>   * Can be overridden by extending classes, to provide custom handling for   * wildcard queries, which may be necessary due to missing analyzer calls.   *   * @param field Name of the field query will use.   * @param termStr Term token that contains one or more wild card   *   characters (? or *), but is not simple prefix term   *   * @return Resulting {@link Query} built for the term   * @exception ParseException throw in overridden method to disallow   */  protected Query getWildcardQuery(String field, String termStr) throws ParseException  {    if (lowercaseWildcardTerms) {  termStr = termStr.toLowerCase();    }    Term t = new Term(field, termStr);    return new WildcardQuery(t);  }  /**   * Factory method for generating a query (similar to   * ({@link #getWildcardQuery}). Called when parser parses an input term   * token that uses prefix notation; that is, contains a single '*' wildcard   * character as its last character. Since this is a special case   * of generic wildcard term, and such a query can be optimized easily,   * this usually results in a different query object.   *<p>   * Depending on settings, a prefix term may be lower-cased   * automatically. It will not go through the default Analyzer,   * however, since normal Analyzers are unlikely to work properly   * with wildcard templates.   *<p>   * Can be overridden by extending classes, to provide custom handling for   * wild card queries, which may be necessary due to missing analyzer calls.   *   * @param field Name of the field query will use.   * @param termStr Term token to use for building term for the query   *    (<b>without</b> trailing '*' character!)   *   * @return Resulting {@link Query} built for the term   * @exception ParseException throw in overridden method to disallow   */  protected Query getPrefixQuery(String field, String termStr) throws ParseException  {    if (lowercaseWildcardTerms) {  termStr = termStr.toLowerCase();    }    Term t = new Term(field, termStr);    return new PrefixQuery(t);  }  /**   * Factory method for generating a query (similar to   * ({@link #getWildcardQuery}). Called when parser parses   * an input term token that has the fuzzy suffix (~) appended.   *   * @param field Name of the field query will use.   * @param termStr Term token to use for building term for the query   *   * @return Resulting {@link Query} built for the term   * @exception ParseException throw in overridden method to disallow   */  protected Query getFuzzyQuery(String field, String termStr) throws ParseException  {    Term t = new Term(field, termStr);    return new FuzzyQuery(t);  }  /**   * Returns a String where the escape char has been   * removed, or kept only once if there was a double escape.   */  private String discardEscapeChar(String input) {    char[] caSource = input.toCharArray();    char[] caDest = new char[caSource.length];    int j = 0;    for (int i = 0; i < caSource.length; i++) {      if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) {        caDest[j++]=caSource[i];      }    }    return new String(caDest, 0, j);  }  public static void main(String[] args) throws Exception {    QueryParser qp = new QueryParser("field",                           new org.apache.lucene.analysis.SimpleAnalyzer());    Query q = qp.parse(args[0]);    System.out.println(q.toString("field"));  }}PARSER_END(QueryParser)/* ***************** *//* Token Definitions *//* ***************** */<*> TOKEN : {  <#_NUM_CHAR:   ["0"-"9"] >| <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^",                          "[", "]", "\"", "{", "}", "~", "*", "?" ] >| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "+", "-", "!", "(", ")", ":", "^",                           "[", "]", "\"", "{", "}", "~", "*", "?" ]                       | <_ESCAPED_CHAR> ) >| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r") >}<DEFAULT, RangeIn, RangeEx> SKIP : {  <<_WHITESPACE>>}// OG: to support prefix queries:// http://nagoya.apache.org/bugzilla/show_bug.cgi?id=12137// Change from:// | <WILDTERM:  <_TERM_START_CHAR>//              (<_TERM_CHAR> | ( [ "*", "?" ] ))* >// To://// | <WILDTERM:  (<_TERM_CHAR> | ( [ "*", "?" ] ))* ><DEFAULT> TOKEN : {  <AND:       ("AND" | "&&") >| <OR:        ("OR" | "||") >| <NOT:       ("NOT" | "!") >| <PLUS:      "+" >| <MINUS:     "-" >| <LPAREN:    "(" >| <RPAREN:    ")" >| <COLON:     ":" >| <CARAT:     "^" > : Boost| <QUOTED:     "\"" (~["\""])+ "\"">| <TERM:      <_TERM_START_CHAR> (<_TERM_CHAR>)*  >| <FUZZY:     "~" >| <SLOP:      "~" (<_NUM_CHAR>)+ >| <PREFIXTERM:  <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" >| <WILDTERM:  <_TERM_START_CHAR>              (<_TERM_CHAR> | ( [ "*", "?" ] ))* >| <RANGEIN_START: "[" > : RangeIn| <RANGEEX_START: "{" > : RangeEx}<Boost> TOKEN : {<NUMBER:    (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT}<RangeIn> TOKEN : {<RANGEIN_TO: "TO">| <RANGEIN_END: "]"> : DEFAULT| <RANGEIN_QUOTED: "\"" (~["\""])+ "\"">| <RANGEIN_GOOP: (~[ " ", "]" ])+ >}<RangeEx> TOKEN : {<RANGEEX_TO: "TO">| <RANGEEX_END: "}"> : DEFAULT| <RANGEEX_QUOTED: "\"" (~["\""])+ "\"">| <RANGEEX_GOOP: (~[ " ", "}" ])+ >}// *   Query  ::= ( Clause )*// *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )int Conjunction() : {  int ret = CONJ_NONE;}{  [    <AND> { ret = CONJ_AND; }    | <OR>  { ret = CONJ_OR; }  ]  { return ret; }}int Modifiers() : {  int ret = MOD_NONE;}{  [     <PLUS> { ret = MOD_REQ; }     | <MINUS> { ret = MOD_NOT; }     | <NOT> { ret = MOD_NOT; }  ]  { return ret; }}Query Query(String field) :{  Vector clauses = new Vector();  Query q, firstQuery=null;  int conj, mods;}{  mods=Modifiers() q=Clause(field)  {    addClause(clauses, CONJ_NONE, mods, q);    if (mods == MOD_NONE)        firstQuery=q;  }  (    conj=Conjunction() mods=Modifiers() q=Clause(field)    { addClause(clauses, conj, mods, q); }  )*    {      if (clauses.size() == 1 && firstQuery != null)        return firstQuery;      else {  return getBooleanQuery(clauses);      }    }}Query Clause(String field) : {  Query q;  Token fieldToken=null, boost=null;}{  [    LOOKAHEAD(2)    fieldToken=<TERM> <COLON> {      field=discardEscapeChar(fieldToken.image);    }  ]  (   q=Term(field)   | <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)?  )    {      if (boost != null) {        float f = (float)1.0;  try {    f = Float.valueOf(boost.image).floatValue();          q.setBoost(f);  } catch (Exception ignored) { }      }      return q;    }}Query Term(String field) : {  Token term, boost=null, slop=null, goop1, goop2;  boolean prefix = false;  boolean wildcard = false;  boolean fuzzy = false;  boolean rangein = false;  Query q;}{  (     (       term=<TERM>       | term=<PREFIXTERM> { prefix=true; }       | term=<WILDTERM> { wildcard=true; }       | term=<NUMBER>     )     [ <FUZZY> { fuzzy=true; } ]     [ <CARAT> boost=<NUMBER> [ <FUZZY> { fuzzy=true; } ] ]     {       String termImage=discardEscapeChar(term.image);       if (wildcard) {       q = getWildcardQuery(field, termImage);       } else if (prefix) {         q = getPrefixQuery(field,           discardEscapeChar(term.image.substring          (0, term.image.length()-1)));       } else if (fuzzy) {         q = getFuzzyQuery(field, termImage);       } else {         q = getFieldQuery(field, analyzer, termImage);       }     }     | ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )         [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> )         <RANGEIN_END> )       [ <CARAT> boost=<NUMBER> ]        {          if (goop1.kind == RANGEIN_QUOTED) {            goop1.image = goop1.image.substring(1, goop1.image.length()-1);          } else {            goop1.image = discardEscapeChar(goop1.image);          }          if (goop2.kind == RANGEIN_QUOTED) {            goop2.image = goop2.image.substring(1, goop2.image.length()-1);      } else {        goop2.image = discardEscapeChar(goop2.image);      }          q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true);        }     | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> )         [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> )         <RANGEEX_END> )       [ <CARAT> boost=<NUMBER> ]        {          if (goop1.kind == RANGEEX_QUOTED) {            goop1.image = goop1.image.substring(1, goop1.image.length()-1);          } else {            goop1.image = discardEscapeChar(goop1.image);          }          if (goop2.kind == RANGEEX_QUOTED) {            goop2.image = goop2.image.substring(1, goop2.image.length()-1);      } else {        goop2.image = discardEscapeChar(goop2.image);      }          q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false);        }     | term=<QUOTED>       [ slop=<SLOP> ]       [ <CARAT> boost=<NUMBER> ]       {         int s = phraseSlop;         if (slop != null) {           try {             s = Float.valueOf(slop.image.substring(1)).intValue();           }           catch (Exception ignored) { }         }         q = getFieldQuery(field, analyzer,                           term.image.substring(1, term.image.length()-1),                           s);       }  )  {    if (boost != null) {      float f = (float) 1.0;      try {        f = Float.valueOf(boost.image).floatValue();      }      catch (Exception ignored) {    /* Should this be handled somehow? (defaults to "no boost", if     * boost number is invalid)     */      }      // avoid boosting null queries, such as those caused by stop words      if (q != null) {        q.setBoost(f);      }    }    return q;  }}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -