📄 queryparser.jj

📁 Lucene a java open-source SearchEngine Framework
💻 JJ
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
   */  protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException  {    if (lowercaseExpandedTerms) {      termStr = termStr.toLowerCase();    }    Term t = new Term(field, termStr);    return new FuzzyQuery(t, minSimilarity, fuzzyPrefixLength);  }  /**   * Returns a String where the escape char has been   * removed, or kept only once if there was a double escape.   *    * Supports escaped unicode characters, e. g. translates   * <code>\u0041</code> to <code>A</code>.   *    */  private String discardEscapeChar(String input) throws ParseException {    // Create char array to hold unescaped char sequence    char[] output = new char[input.length()];          // The length of the output can be less than the input    // due to discarded escape chars. This variable holds    // the actual length of the output    int length = 0;          // We remember whether the last processed character was     // an escape character    boolean lastCharWasEscapeChar = false;          // The multiplier the current unicode digit must be multiplied with.    // E. g. the first digit must be multiplied with 16^3, the second with 16^2...    int codePointMultiplier = 0;          // Used to calculate the codepoint of the escaped unicode character    int codePoint = 0;          for (int i = 0; i < input.length(); i++) {      char curChar = input.charAt(i);      if (codePointMultiplier > 0) {        codePoint += hexToInt(curChar) * codePointMultiplier;        codePointMultiplier >>>= 4;        if (codePointMultiplier == 0) {          output[length++] = (char)codePoint;          codePoint = 0;        }      } else if (lastCharWasEscapeChar) {        if (curChar == 'u') {          // found an escaped unicode character          codePointMultiplier = 16 * 16 * 16;        } else {           // this character was escaped          output[length] = curChar;              length++;        }        lastCharWasEscapeChar = false;      } else {        if (curChar == '\\') {          lastCharWasEscapeChar = true;        } else {          output[length] = curChar;          length++;        }      }    }          if (codePointMultiplier > 0) {      throw new ParseException("Truncated unicode escape sequence.");    }        if (lastCharWasEscapeChar) {      throw new ParseException("Term can not end with escape character.");    }          return new String(output, 0, length);  }    /** Returns the numeric value of the hexadecimal character */  private static final int hexToInt(char c) throws ParseException {    if ('0' <= c && c <= '9') {      return c - '0';    } else if ('a' <= c && c <= 'f'){      return c - 'a' + 10;    } else if ('A' <= c && c <= 'F') {      return c - 'A' + 10;    } else {      throw new ParseException("None-hex character in unicode escape sequence: " + c);    }  }    /**   * Returns a String where those characters that QueryParser   * expects to be escaped are escaped by a preceding <code>\</code>.   */  public static String escape(String s) {    StringBuffer sb = new StringBuffer();    for (int i = 0; i < s.length(); i++) {      char c = s.charAt(i);      // These characters are part of the query syntax and must be escaped      if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'        || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'        || c == '*' || c == '?' || c == '|' || c == '&') {        sb.append('\\');      }      sb.append(c);    }    return sb.toString();  }  /**   * Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}.   * Usage:<br>   * <code>java org.apache.lucene.queryParser.QueryParser &lt;input&gt;</code>   */  public static void main(String[] args) throws Exception {    if (args.length == 0) {      System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");      System.exit(0);    }    QueryParser qp = new QueryParser("field",                           new org.apache.lucene.analysis.SimpleAnalyzer());    Query q = qp.parse(args[0]);    System.out.println(q.toString("field"));  }}PARSER_END(QueryParser)/* ***************** *//* Token Definitions *//* ***************** */<*> TOKEN : {  <#_NUM_CHAR:   ["0"-"9"] >// every character that follows a backslash is considered as an escaped character| <#_ESCAPED_CHAR: "\\" ~[] >| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "+", "-", "!", "(", ")", ":", "^",                           "[", "]", "\"", "{", "}", "~", "*", "?", "\\" ]                       | <_ESCAPED_CHAR> ) >| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r") >}<DEFAULT, RangeIn, RangeEx> SKIP : {  < <_WHITESPACE>>}<DEFAULT> TOKEN : {  <AND:       ("AND" | "&&") >| <OR:        ("OR" | "||") >| <NOT:       ("NOT" | "!") >| <PLUS:      "+" >| <MINUS:     "-" >| <LPAREN:    "(" >| <RPAREN:    ")" >| <COLON:     ":" >| <STAR:      "*" >| <CARAT:     "^" > : Boost| <QUOTED:     "\"" (~["\""] | "\\\"")* "\"">| <TERM:      <_TERM_START_CHAR> (<_TERM_CHAR>)*  >| <FUZZY_SLOP:     "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >| <PREFIXTERM:  ("*") | ( <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" ) >| <WILDTERM:  (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >| <RANGEIN_START: "[" > : RangeIn| <RANGEEX_START: "{" > : RangeEx}<Boost> TOKEN : {<NUMBER:    (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT}<RangeIn> TOKEN : {<RANGEIN_TO: "TO">| <RANGEIN_END: "]"> : DEFAULT| <RANGEIN_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">| <RANGEIN_GOOP: (~[ " ", "]" ])+ >}<RangeEx> TOKEN : {<RANGEEX_TO: "TO">| <RANGEEX_END: "}"> : DEFAULT| <RANGEEX_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">| <RANGEEX_GOOP: (~[ " ", "}" ])+ >}// *   Query  ::= ( Clause )*// *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )int Conjunction() : {  int ret = CONJ_NONE;}{  [    <AND> { ret = CONJ_AND; }    | <OR>  { ret = CONJ_OR; }  ]  { return ret; }}int Modifiers() : {  int ret = MOD_NONE;}{  [     <PLUS> { ret = MOD_REQ; }     | <MINUS> { ret = MOD_NOT; }     | <NOT> { ret = MOD_NOT; }  ]  { return ret; }}// This makes sure that there is no garbage after the query stringQuery TopLevelQuery(String field) : {	Query q;}{	q=Query(field) <EOF>	{		return q;	}}Query Query(String field) :{  Vector clauses = new Vector();  Query q, firstQuery=null;  int conj, mods;}{  mods=Modifiers() q=Clause(field)  {    addClause(clauses, CONJ_NONE, mods, q);    if (mods == MOD_NONE)        firstQuery=q;  }  (    conj=Conjunction() mods=Modifiers() q=Clause(field)    { addClause(clauses, conj, mods, q); }  )*    {      if (clauses.size() == 1 && firstQuery != null)        return firstQuery;      else {  return getBooleanQuery(clauses);      }    }}Query Clause(String field) : {  Query q;  Token fieldToken=null, boost=null;}{  [    LOOKAHEAD(2)    (    fieldToken=<TERM> <COLON> {field=discardEscapeChar(fieldToken.image);}    | <STAR> <COLON> {field="*";}    )  ]  (   q=Term(field)   | <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)?  )    {      if (boost != null) {        float f = (float)1.0;  try {    f = Float.valueOf(boost.image).floatValue();          q.setBoost(f);  } catch (Exception ignored) { }      }      return q;    }}Query Term(String field) : {  Token term, boost=null, fuzzySlop=null, goop1, goop2;  boolean prefix = false;  boolean wildcard = false;  boolean fuzzy = false;  boolean rangein = false;  Query q;}{  (     (       term=<TERM>       | term=<STAR> { wildcard=true; }       | term=<PREFIXTERM> { prefix=true; }       | term=<WILDTERM> { wildcard=true; }       | term=<NUMBER>     )     [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]     [ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ]     {       String termImage=discardEscapeChar(term.image);       if (wildcard) {       q = getWildcardQuery(field, termImage);       } else if (prefix) {         q = getPrefixQuery(field,           discardEscapeChar(term.image.substring          (0, term.image.length()-1)));       } else if (fuzzy) {       	  float fms = fuzzyMinSim;       	  try {            fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();       	  } catch (Exception ignored) { }       	 if(fms < 0.0f || fms > 1.0f){       	   throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");       	 }       	 q = getFuzzyQuery(field, termImage,fms);       } else {         q = getFieldQuery(field, termImage);       }     }     | ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )         [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> )         <RANGEIN_END> )       [ <CARAT> boost=<NUMBER> ]        {          if (goop1.kind == RANGEIN_QUOTED) {            goop1.image = goop1.image.substring(1, goop1.image.length()-1);          }          if (goop2.kind == RANGEIN_QUOTED) {            goop2.image = goop2.image.substring(1, goop2.image.length()-1);          }          q = getRangeQuery(field, discardEscapeChar(goop1.image), discardEscapeChar(goop2.image), true);        }     | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> )         [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> )         <RANGEEX_END> )       [ <CARAT> boost=<NUMBER> ]        {          if (goop1.kind == RANGEEX_QUOTED) {            goop1.image = goop1.image.substring(1, goop1.image.length()-1);          }          if (goop2.kind == RANGEEX_QUOTED) {            goop2.image = goop2.image.substring(1, goop2.image.length()-1);          }          q = getRangeQuery(field, discardEscapeChar(goop1.image), discardEscapeChar(goop2.image), false);        }     | term=<QUOTED>       [ fuzzySlop=<FUZZY_SLOP> ]       [ <CARAT> boost=<NUMBER> ]       {         int s = phraseSlop;         if (fuzzySlop != null) {           try {             s = Float.valueOf(fuzzySlop.image.substring(1)).intValue();           }           catch (Exception ignored) { }         }         q = getFieldQuery(field, discardEscapeChar(term.image.substring(1, term.image.length()-1)), s);       }  )  {    if (boost != null) {      float f = (float) 1.0;      try {        f = Float.valueOf(boost.image).floatValue();      }      catch (Exception ignored) {    /* Should this be handled somehow? (defaults to "no boost", if     * boost number is invalid)     */      }      // avoid boosting null queries, such as those caused by stop words      if (q != null) {        q.setBoost(f);      }    }    return q;  }}
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -