📄 queryparser.jjt

📁 jsr170接口的java实现。是个apache的开源项目。
💻 JJT
📖 第 1 页 / 共 2 页
字号:
上一页 12
   * that has just a single * character at the end)   *<p>   * Depending on settings, prefix term may be lower-cased   * automatically. It will not go through the default Analyzer,   * however, since normal Analyzers are unlikely to work properly   * with wildcard templates.   *<p>   * Can be overridden by extending classes, to provide custom handling for   * wildcard queries, which may be necessary due to missing analyzer calls.   *   * @param field Name of the field query will use.   * @param termStr Term token that contains one or more wild card   *   characters (? or *), but is not simple prefix term   *   * @return Resulting {@link Query} built for the term   * @exception ParseException throw in overridden method to disallow   */  protected Query getWildcardQuery(String field, String termStr) throws ParseException  {    if (lowercaseWildcardTerms) {  termStr = termStr.toLowerCase();    }    return new WildcardQuery(field, null, translateWildcards(termStr));  }  /**   * Factory method for generating a query (similar to   * ({@link #getWildcardQuery}). Called when parser parses an input term   * token that uses prefix notation; that is, contains a single '*' wildcard   * character as its last character. Since this is a special case   * of generic wildcard term, and such a query can be optimized easily,   * this usually results in a different query object.   *<p>   * Depending on settings, a prefix term may be lower-cased   * automatically. It will not go through the default Analyzer,   * however, since normal Analyzers are unlikely to work properly   * with wildcard templates.   *<p>   * Can be overridden by extending classes, to provide custom handling for   * wild card queries, which may be necessary due to missing analyzer calls.   *   * @param field Name of the field query will use.   * @param termStr Term token to use for building term for the query   *    (<b>without</b> trailing '*' character!)   *   * @return Resulting {@link Query} built for the term   * @exception ParseException throw in overridden method to disallow   */  protected Query getPrefixQuery(String field, String termStr) throws ParseException  {    return getWildcardQuery(field, termStr + "*");  }  /**   * Factory method for generating a query (similar to   * ({@link #getWildcardQuery}). Called when parser parses   * an input term token that has the fuzzy suffix (~) appended.   *   * @param field Name of the field query will use.   * @param termStr Term token to use for building term for the query   *   * @return Resulting {@link Query} built for the term   * @exception ParseException throw in overridden method to disallow   */  protected Query getFuzzyQuery(String field, String termStr) throws ParseException {    return getFuzzyQuery(field, termStr, fuzzyMinSim);  }    /**     * Translates unescaped wildcards '*' and '?' into '%' and '_'.     *     * @param input the input String.     * @return the translated String.     */    private String translateWildcards(String input) {        StringBuffer translated = new StringBuffer(input.length());        boolean escaped = false;        for (int i = 0; i < input.length(); i++) {            if (input.charAt(i) == '\\') {                if (escaped) {                    translated.append("\\\\");                    escaped = false;                } else {                    escaped = true;                }            } else if (input.charAt(i) == '*') {                if (escaped) {                    translated.append('*');                    escaped = false;                } else {                    translated.append('%');                }            } else if (input.charAt(i) == '?') {                if (escaped) {                    translated.append('?');                    escaped = false;                } else {                    translated.append('_');                }            } else if (input.charAt(i) == '%' || input.charAt(i) == '_') {                // escape every occurrence of '%' and '_'                escaped = false;                translated.append('\\').append(input.charAt(i));            } else {                if (escaped) {                    translated.append('\\');                    escaped = false;                }                translated.append(input.charAt(i));            }        }        return translated.toString();    }  /**   * Factory method for generating a query (similar to   * ({@link #getWildcardQuery}). Called when parser parses   * an input term token that has the fuzzy suffix (~floatNumber) appended.   *   * @param field Name of the field query will use.   * @param termStr Term token to use for building term for the query   * @param minSimilarity the minimum similarity required for a fuzzy match   *   * @return Resulting {@link Query} built for the term   * @exception ParseException throw in overridden method to disallow   */  protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException  {    Term t = new Term(field, termStr);    return new FuzzyQuery(t, minSimilarity);  }  /**   * Returns a String where the escape char has been   * removed, or kept only once if there was a double escape.   */  private String discardEscapeChar(String input) {    char[] caSource = input.toCharArray();    char[] caDest = new char[caSource.length];    int j = 0;    for (int i = 0; i < caSource.length; i++) {      if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) {        caDest[j++]=caSource[i];      }    }    return new String(caDest, 0, j);  }  /**   * Returns a String where those characters that QueryParser   * expects to be escaped are escaped, i.e. preceded by a <code>\</code>.   */  public static String escape(String s) {    StringBuffer sb = new StringBuffer();    for (int i = 0; i < s.length(); i++) {      char c = s.charAt(i);      // NOTE: keep this in sync with _ESCAPED_CHAR below!      if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'        || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'        || c == '*' || c == '?') {        sb.append('\\');      }      sb.append(c);    }    return sb.toString();  }  public static void main(String[] args) throws Exception {    QueryParser qp = new QueryParser("field",                           new org.apache.lucene.analysis.SimpleAnalyzer());    Query q = qp.parse(args[0]);    System.out.println(q.toString("field"));  }}PARSER_END(QueryParser)/* ***************** *//* Token Definitions *//* ***************** */<*> TOKEN : {  <#_NUM_CHAR:   ["0"-"9"] >// NOTE: keep this in sync with escape(String) above!| <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^",                          "[", "]", "\"", "{", "}", "~", "*", "?" ] >| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "+", "-", "!", "(", ")", ":", "^",                           "[", "]", "\"", "{", "}", "~", "*", "?" ]                       | <_ESCAPED_CHAR> ) >| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r") >}<DEFAULT, RangeIn, RangeEx> SKIP : {  < <_WHITESPACE>>}// OG: to support prefix queries:// http://nagoya.apache.org/bugzilla/show_bug.cgi?id=12137// Change from:// | <WILDTERM:  <_TERM_START_CHAR>//              (<_TERM_CHAR> | ( [ "*", "?" ] ))* >// To://// | <WILDTERM: (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* ><DEFAULT> TOKEN : {  <AND:       ("AND" | "&&") >| <OR:        ("OR" | "||") >| <NOT:       ("NOT" | "!") >| <PLUS:      "+" >| <MINUS:     "-" >| <LPAREN:    "(" >| <RPAREN:    ")" >| <COLON:     ":" >| <CARAT:     "^" > : Boost| <QUOTED:     "\"" (~["\""])+ "\"">| <TERM:      <_TERM_START_CHAR> (<_TERM_CHAR>)*  >| <FUZZY_SLOP:     "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >| <PREFIXTERM:  <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" >// support for prefix queries enabled!| <WILDTERM: (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >| <RANGEIN_START: "[" > : RangeIn| <RANGEEX_START: "{" > : RangeEx}<Boost> TOKEN : {<NUMBER:    (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT}<RangeIn> TOKEN : {<RANGEIN_TO: "TO">| <RANGEIN_END: "]"> : DEFAULT| <RANGEIN_QUOTED: "\"" (~["\""])+ "\"">| <RANGEIN_GOOP: (~[ " ", "]" ])+ >}<RangeEx> TOKEN : {<RANGEEX_TO: "TO">| <RANGEEX_END: "}"> : DEFAULT| <RANGEEX_QUOTED: "\"" (~["\""])+ "\"">| <RANGEEX_GOOP: (~[ " ", "}" ])+ >}// *   Query  ::= ( Clause )*// *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )int Conjunction() : {  int ret = CONJ_NONE;}{  [    <AND> { ret = CONJ_AND; }    | <OR>  { ret = CONJ_OR; }  ]  { return ret; }}int Modifiers() : {  int ret = MOD_NONE;}{  [     <PLUS> { ret = MOD_REQ; }     | <MINUS> { ret = MOD_NOT; }     | <NOT> { ret = MOD_NOT; }  ]  { return ret; }}Query Query(String field) :{  Vector clauses = new Vector();  Query q, firstQuery=null;  int conj, mods;}{  mods=Modifiers() q=Clause(field)  {    addClause(clauses, CONJ_NONE, mods, q);    if (mods == MOD_NONE)        firstQuery=q;  }  (    conj=Conjunction() mods=Modifiers() q=Clause(field)    { addClause(clauses, conj, mods, q); }  )*    {      if (clauses.size() == 1 && firstQuery != null)        return firstQuery;      else {  return getBooleanQuery(clauses);      }    }}Query Clause(String field) : {  Query q;  Token fieldToken=null, boost=null;}{  [    LOOKAHEAD(2)    fieldToken=<TERM> <COLON> {      field=discardEscapeChar(fieldToken.image);    }  ]  (   q=Term(field)   | <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)?  )    {      if (boost != null) {        float f = (float)1.0;  try {    f = Float.valueOf(boost.image).floatValue();          q.setBoost(f);  } catch (Exception ignored) { }      }      return q;    }}Query Term(String field) : {  Token term, boost=null, fuzzySlop=null, goop1, goop2;  boolean prefix = false;  boolean wildcard = false;  boolean fuzzy = false;  boolean rangein = false;  Query q;}{  (     (       term=<TERM>       | term=<PREFIXTERM> { prefix=true; }       | term=<WILDTERM> { wildcard=true; }       | term=<NUMBER>     )     [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]     [ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ]     {       String termImage=discardEscapeChar(term.image);       if (wildcard) {       q = getWildcardQuery(field, termImage);       } else if (prefix) {         q = getPrefixQuery(field,           discardEscapeChar(term.image.substring          (0, term.image.length()-1)));       } else if (fuzzy) {       	  float fms = fuzzyMinSim;       	  try {            fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();       	  } catch (Exception ignored) { }       	 if(fms < 0.0f || fms > 1.0f){       	   throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");       	 }       	 if(fms == fuzzyMinSim)       	   q = getFuzzyQuery(field, termImage);       	 else           q = getFuzzyQuery(field, termImage, fms);       } else {         q = getFieldQuery(field, analyzer, termImage);       }     }     | ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )         [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> )         <RANGEIN_END> )       [ <CARAT> boost=<NUMBER> ]        {          if (goop1.kind == RANGEIN_QUOTED) {            goop1.image = goop1.image.substring(1, goop1.image.length()-1);          } else {            goop1.image = discardEscapeChar(goop1.image);          }          if (goop2.kind == RANGEIN_QUOTED) {            goop2.image = goop2.image.substring(1, goop2.image.length()-1);      } else {        goop2.image = discardEscapeChar(goop2.image);      }          q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true);        }     | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> )         [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> )         <RANGEEX_END> )       [ <CARAT> boost=<NUMBER> ]        {          if (goop1.kind == RANGEEX_QUOTED) {            goop1.image = goop1.image.substring(1, goop1.image.length()-1);          } else {            goop1.image = discardEscapeChar(goop1.image);          }          if (goop2.kind == RANGEEX_QUOTED) {            goop2.image = goop2.image.substring(1, goop2.image.length()-1);      } else {        goop2.image = discardEscapeChar(goop2.image);      }          q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false);        }     | term=<QUOTED>       [ fuzzySlop=<FUZZY_SLOP> ]       [ <CARAT> boost=<NUMBER> ]       {         int s = phraseSlop;         if (fuzzySlop != null) {           try {             s = Float.valueOf(fuzzySlop.image.substring(1)).intValue();           }           catch (Exception ignored) { }         }         q = getFieldQuery(field, analyzer, term.image.substring(1, term.image.length()-1), s);       }  )  {    if (boost != null) {      float f = (float) 1.0;      try {        f = Float.valueOf(boost.image).floatValue();      }      catch (Exception ignored) {    /* Should this be handled somehow? (defaults to "no boost", if     * boost number is invalid)     */      }      // avoid boosting null queries, such as those caused by stop words      if (q != null) {        q.setBoost(f);      }    }    return q;  }}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -