📄 precedencequeryparser.jj

📁 lucene2.2.0版本
💻 JJ
📖 第 1 页 / 共 2 页
字号:
上一页 12
   * @exception ParseException throw in overridden method to disallow   */  protected Query getBooleanQuery(Vector clauses) throws ParseException  {    return getBooleanQuery(clauses, false);  }  /**   * Factory method for generating query, given a set of clauses.   * By default creates a boolean query composed of clauses passed in.   *   * Can be overridden by extending classes, to modify query being   * returned.   *   * @param clauses Vector that contains {@link BooleanClause} instances   *    to join.   * @param disableCoord true if coord scoring should be disabled.   *   * @return Resulting {@link Query} object.   * @exception ParseException throw in overridden method to disallow   */  protected Query getBooleanQuery(Vector clauses, boolean disableCoord)    throws ParseException  {    if (clauses == null || clauses.size() == 0)      return null;    BooleanQuery query = new BooleanQuery(disableCoord);    for (int i = 0; i < clauses.size(); i++) {      query.add((BooleanClause)clauses.elementAt(i));    }    return query;  }  /**   * Factory method for generating a query. Called when parser   * parses an input term token that contains one or more wildcard   * characters (? and *), but is not a prefix term token (one   * that has just a single * character at the end)   *<p>   * Depending on settings, prefix term may be lower-cased   * automatically. It will not go through the default Analyzer,   * however, since normal Analyzers are unlikely to work properly   * with wildcard templates.   *<p>   * Can be overridden by extending classes, to provide custom handling for   * wildcard queries, which may be necessary due to missing analyzer calls.   *   * @param field Name of the field query will use.   * @param termStr Term token that contains one or more wild card   *   characters (? or *), but is not simple prefix term   *   * @return Resulting {@link Query} built for the term   * @exception ParseException throw in overridden method to disallow   */  protected Query getWildcardQuery(String field, String termStr) throws ParseException  {    if (lowercaseExpandedTerms) {      termStr = termStr.toLowerCase();    }    Term t = new Term(field, termStr);    return new WildcardQuery(t);  }  /**   * Factory method for generating a query (similar to   * {@link #getWildcardQuery}). Called when parser parses an input term   * token that uses prefix notation; that is, contains a single '*' wildcard   * character as its last character. Since this is a special case   * of generic wildcard term, and such a query can be optimized easily,   * this usually results in a different query object.   *<p>   * Depending on settings, a prefix term may be lower-cased   * automatically. It will not go through the default Analyzer,   * however, since normal Analyzers are unlikely to work properly   * with wildcard templates.   *<p>   * Can be overridden by extending classes, to provide custom handling for   * wild card queries, which may be necessary due to missing analyzer calls.   *   * @param field Name of the field query will use.   * @param termStr Term token to use for building term for the query   *    (<b>without</b> trailing '*' character!)   *   * @return Resulting {@link Query} built for the term   * @exception ParseException throw in overridden method to disallow   */  protected Query getPrefixQuery(String field, String termStr) throws ParseException  {    if (lowercaseExpandedTerms) {      termStr = termStr.toLowerCase();    }    Term t = new Term(field, termStr);    return new PrefixQuery(t);  }     /**   * Factory method for generating a query (similar to   * {@link #getWildcardQuery}). Called when parser parses   * an input term token that has the fuzzy suffix (~) appended.   *   * @param field Name of the field query will use.   * @param termStr Term token to use for building term for the query   *   * @return Resulting {@link Query} built for the term   * @exception ParseException throw in overridden method to disallow   */  protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException  {    if (lowercaseExpandedTerms) {      termStr = termStr.toLowerCase();    }    Term t = new Term(field, termStr);    return new FuzzyQuery(t, minSimilarity, fuzzyPrefixLength);  }  /**   * Returns a String where the escape char has been   * removed, or kept only once if there was a double escape.   */  private String discardEscapeChar(String input) {    char[] caSource = input.toCharArray();    char[] caDest = new char[caSource.length];    int j = 0;    for (int i = 0; i < caSource.length; i++) {      if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) {        caDest[j++]=caSource[i];      }    }    return new String(caDest, 0, j);  }  /**   * Returns a String where those characters that QueryParser   * expects to be escaped are escaped by a preceding <code>\</code>.   */  public static String escape(String s) {    StringBuffer sb = new StringBuffer();    for (int i = 0; i < s.length(); i++) {      char c = s.charAt(i);      // NOTE: keep this in sync with _ESCAPED_CHAR below!      if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'        || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'        || c == '*' || c == '?') {        sb.append('\\');      }      sb.append(c);    }    return sb.toString();  }  /**   * Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}.   * Usage:<br>   * <code>java org.apache.lucene.queryParser.QueryParser &lt;input&gt;</code>   */  public static void main(String[] args) throws Exception {    if (args.length == 0) {      System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");      System.exit(0);    }    PrecedenceQueryParser qp = new PrecedenceQueryParser("field",                           new org.apache.lucene.analysis.SimpleAnalyzer());    Query q = qp.parse(args[0]);    System.out.println(q.toString("field"));  }}PARSER_END(PrecedenceQueryParser)/* ***************** *//* Token Definitions *//* ***************** */<*> TOKEN : {  <#_NUM_CHAR:   ["0"-"9"] >// NOTE: keep this in sync with escape(String) above!| <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^",                          "[", "]", "\"", "{", "}", "~", "*", "?" ] >| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "+", "-", "!", "(", ")", ":", "^",                           "[", "]", "\"", "{", "}", "~", "*", "?" ]                       | <_ESCAPED_CHAR> ) >| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r") >}<DEFAULT, RangeIn, RangeEx> SKIP : {  <<_WHITESPACE>>}// OG: to support prefix queries:// http://nagoya.apache.org/bugzilla/show_bug.cgi?id=12137// Change from:// | <WILDTERM:  <_TERM_START_CHAR>//              (<_TERM_CHAR> | ( [ "*", "?" ] ))* >// To://// | <WILDTERM:  (<_TERM_CHAR> | ( [ "*", "?" ] ))* ><DEFAULT> TOKEN : {  <AND:       ("AND" | "&&") >| <OR:        ("OR" | "||") >| <NOT:       ("NOT" | "!") >| <PLUS:      "+" >| <MINUS:     "-" >| <LPAREN:    "(" >| <RPAREN:    ")" >| <COLON:     ":" >| <CARAT:     "^" > : Boost| <QUOTED:     "\"" (~["\""])+ "\"">| <TERM:      <_TERM_START_CHAR> (<_TERM_CHAR>)*  >| <FUZZY_SLOP:     "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >| <PREFIXTERM:  <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" >| <WILDTERM:  <_TERM_START_CHAR>              (<_TERM_CHAR> | ( [ "*", "?" ] ))* >| <RANGEIN_START: "[" > : RangeIn| <RANGEEX_START: "{" > : RangeEx}<Boost> TOKEN : {<NUMBER:    (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT}<RangeIn> TOKEN : {<RANGEIN_TO: "TO">| <RANGEIN_END: "]"> : DEFAULT| <RANGEIN_QUOTED: "\"" (~["\""])+ "\"">| <RANGEIN_GOOP: (~[ " ", "]" ])+ >}<RangeEx> TOKEN : {<RANGEEX_TO: "TO">| <RANGEEX_END: "}"> : DEFAULT| <RANGEEX_QUOTED: "\"" (~["\""])+ "\"">| <RANGEEX_GOOP: (~[ " ", "}" ])+ >}// *   Query  ::= ( Clause )*// *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )int Conjunction() : {  int ret = CONJ_NONE;}{  [    <AND> { ret = CONJ_AND; }    | <OR>  { ret = CONJ_OR; }  ]  { return ret; }}int Modifier() : {  int ret = MOD_NONE;}{  [     <PLUS> { ret = MOD_REQ; }     | <MINUS> { ret = MOD_NOT; }     | <NOT> { ret = MOD_NOT; }  ]  { return ret; }}Query Query(String field) :{  Vector clauses = new Vector();  Query q, firstQuery=null;  boolean orPresent = false;  int modifier;}{  modifier=Modifier() q=andExpression(field)  {    addClause(clauses, CONJ_NONE, modifier, q);    if (modifier == MOD_NONE)      firstQuery = q;  }  (    [<OR> { orPresent=true; }] modifier=Modifier() q=andExpression(field)    { addClause(clauses, orPresent ? CONJ_OR : CONJ_NONE, modifier, q); }  )*    {      if (clauses.size() == 1 && firstQuery != null)        return firstQuery;      else {        return getBooleanQuery(clauses);      }    }}Query andExpression(String field) :{  Vector clauses = new Vector();  Query q, firstQuery=null;  int modifier;}{  q=Clause(field)  {    addClause(clauses, CONJ_NONE, MOD_NONE, q);    firstQuery = q;  }  (    <AND> modifier=Modifier() q=Clause(field)    { addClause(clauses, CONJ_AND, modifier, q); }  )*    {      if (clauses.size() == 1 && firstQuery != null)        return firstQuery;      else {        return getBooleanQuery(clauses);      }    }}Query Clause(String field) : {  Query q;  Token fieldToken=null, boost=null;}{  [    LOOKAHEAD(2)    fieldToken=<TERM> <COLON> {      field=discardEscapeChar(fieldToken.image);    }  ]  (   q=Term(field)   | <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)?  )    {      if (boost != null) {        float f = (float)1.0;  try {    f = Float.valueOf(boost.image).floatValue();          q.setBoost(f);  } catch (Exception ignored) { }      }      return q;    }}Query Term(String field) : {  Token term, boost=null, fuzzySlop=null, goop1, goop2;  boolean prefix = false;  boolean wildcard = false;  boolean fuzzy = false;  Query q;}{  (     (       term=<TERM>       | term=<PREFIXTERM> { prefix=true; }       | term=<WILDTERM> { wildcard=true; }       | term=<NUMBER>     )     [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]     [ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ]     {       String termImage=discardEscapeChar(term.image);       if (wildcard) {       q = getWildcardQuery(field, termImage);       } else if (prefix) {         q = getPrefixQuery(field,           discardEscapeChar(term.image.substring          (0, term.image.length()-1)));       } else if (fuzzy) {       	  float fms = fuzzyMinSim;       	  try {            fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();       	  } catch (Exception ignored) { }       	 if(fms < 0.0f || fms > 1.0f){       	   throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");       	 }         q = getFuzzyQuery(field, termImage, fms);       } else {         q = getFieldQuery(field, termImage);       }     }     | ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )         [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> )         <RANGEIN_END> )       [ <CARAT> boost=<NUMBER> ]        {          if (goop1.kind == RANGEIN_QUOTED) {            goop1.image = goop1.image.substring(1, goop1.image.length()-1);          } else {            goop1.image = discardEscapeChar(goop1.image);          }          if (goop2.kind == RANGEIN_QUOTED) {            goop2.image = goop2.image.substring(1, goop2.image.length()-1);      } else {        goop2.image = discardEscapeChar(goop2.image);      }          q = getRangeQuery(field, goop1.image, goop2.image, true);        }     | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> )         [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> )         <RANGEEX_END> )       [ <CARAT> boost=<NUMBER> ]        {          if (goop1.kind == RANGEEX_QUOTED) {            goop1.image = goop1.image.substring(1, goop1.image.length()-1);          } else {            goop1.image = discardEscapeChar(goop1.image);          }          if (goop2.kind == RANGEEX_QUOTED) {            goop2.image = goop2.image.substring(1, goop2.image.length()-1);      } else {        goop2.image = discardEscapeChar(goop2.image);      }          q = getRangeQuery(field, goop1.image, goop2.image, false);        }     | term=<QUOTED>       [ fuzzySlop=<FUZZY_SLOP> ]       [ <CARAT> boost=<NUMBER> ]       {         int s = phraseSlop;         if (fuzzySlop != null) {           try {             s = Float.valueOf(fuzzySlop.image.substring(1)).intValue();           }           catch (Exception ignored) { }         }         q = getFieldQuery(field, term.image.substring(1, term.image.length()-1), s);       }  )  {    if (boost != null) {      float f = (float) 1.0;      try {        f = Float.valueOf(boost.image).floatValue();      }      catch (Exception ignored) {    /* Should this be handled somehow? (defaults to "no boost", if     * boost number is invalid)     */      }      // avoid boosting null queries, such as those caused by stop words      if (q != null) {        q.setBoost(f);      }    }    return q;  }}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -