📄 patternanalyzer.java

📁 lucene2.2.0版本
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
  public boolean equals(Object other) {    if (this  == other) return true;    if (this  == DEFAULT_ANALYZER && other == EXTENDED_ANALYZER) return false;    if (other == DEFAULT_ANALYZER && this  == EXTENDED_ANALYZER) return false;        if (other instanceof PatternAnalyzer) {      PatternAnalyzer p2 = (PatternAnalyzer) other;      return         toLowerCase == p2.toLowerCase &&        eqPattern(pattern, p2.pattern) &&        eq(stopWords, p2.stopWords);    }    return false;  }    /**   * Returns a hash code value for the object.   *    * @return the hash code.   */  public int hashCode() {    if (this == DEFAULT_ANALYZER) return -1218418418; // fast path    if (this == EXTENDED_ANALYZER) return 1303507063; // fast path        int h = 1;    h = 31*h + pattern.pattern().hashCode();    h = 31*h + pattern.flags();    h = 31*h + (toLowerCase ? 1231 : 1237);    h = 31*h + (stopWords != null ? stopWords.hashCode() : 0);    return h;  }    /** equality where o1 and/or o2 can be null */  private static boolean eq(Object o1, Object o2) {    return (o1 == o2) || (o1 != null ? o1.equals(o2) : false);  }    /** assumes p1 and p2 are not null */  private static boolean eqPattern(Pattern p1, Pattern p2) {    return p1 == p2 || (p1.flags() == p2.flags() && p1.pattern().equals(p2.pattern()));  }      /**   * Reads until end-of-stream and returns all read chars, finally closes the stream.   *    * @param input the input stream   * @throws IOException if an I/O error occurs while reading the stream   */  private static String toString(Reader input) throws IOException {    try {      int len = 256;      char[] buffer = new char[len];      char[] output = new char[len];            len = 0;      int n;      while ((n = input.read(buffer)) >= 0) {        if (len + n > output.length) { // grow capacity          char[] tmp = new char[Math.max(output.length << 1, len + n)];          System.arraycopy(output, 0, tmp, 0, len);          System.arraycopy(buffer, 0, tmp, len, n);          buffer = output; // use larger buffer for future larger bulk reads          output = tmp;        } else {          System.arraycopy(buffer, 0, output, len, n);        }        len += n;      }      return new String(output, 0, len);    } finally {      if (input != null) input.close();    }  }      /** somewhat oversized to minimize hash collisions */  private static Set makeStopSet(String[] stopWords) {    Set stops = new HashSet(stopWords.length * 2, 0.3f);     stops.addAll(Arrays.asList(stopWords));    return stops;//    return Collections.unmodifiableSet(stops);  }    ///////////////////////////////////////////////////////////////////////////////  // Nested classes:  ///////////////////////////////////////////////////////////////////////////////  /**   * The work horse; performance isn't fantastic, but it's not nearly as bad   * as one might think - kudos to the Sun regex developers.   */  private static final class PatternTokenizer extends TokenStream {        private final String str;    private final boolean toLowerCase;    private Matcher matcher;    private int pos = 0;    private static final Locale locale = Locale.getDefault();        public PatternTokenizer(String str, Pattern pattern, boolean toLowerCase) {      this.str = str;      this.matcher = pattern.matcher(str);      this.toLowerCase = toLowerCase;    }    public Token next() {      if (matcher == null) return null;            while (true) { // loop takes care of leading and trailing boundary cases        int start = pos;        int end;        boolean isMatch = matcher.find();        if (isMatch) {          end = matcher.start();          pos = matcher.end();        } else {           end = str.length();          matcher = null; // we're finished        }                if (start != end) { // non-empty match (header/trailer)          String text = str.substring(start, end);          if (toLowerCase) text = text.toLowerCase(locale);          return new Token(text, start, end);        }        if (!isMatch) return null;      }    }      }       ///////////////////////////////////////////////////////////////////////////////  // Nested classes:  ///////////////////////////////////////////////////////////////////////////////  /**   * Special-case class for best performance in common cases; this class is   * otherwise unnecessary.   */  private static final class FastStringTokenizer extends TokenStream {        private final String str;    private int pos;    private final boolean isLetter;    private final boolean toLowerCase;    private final Set stopWords;    private static final Locale locale = Locale.getDefault();        public FastStringTokenizer(String str, boolean isLetter, boolean toLowerCase, Set stopWords) {      this.str = str;      this.isLetter = isLetter;      this.toLowerCase = toLowerCase;      this.stopWords = stopWords;    }    public Token next() {      // cache loop instance vars (performance)      String s = str;      int len = s.length();      int i = pos;      boolean letter = isLetter;            int start = 0;      String text;      do {        // find beginning of token        text = null;        while (i < len && !isTokenChar(s.charAt(i), letter)) {          i++;        }                if (i < len) { // found beginning; now find end of token          start = i;          while (i < len && isTokenChar(s.charAt(i), letter)) {            i++;          }                    text = s.substring(start, i);          if (toLowerCase) text = text.toLowerCase(locale);//          if (toLowerCase) {            ////            use next line once JDK 1.5 String.toLowerCase() performance regression is fixed////            see http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6265809//            text = s.substring(start, i).toLowerCase(); ////            char[] chars = new char[i-start];////            for (int j=start; j < i; j++) chars[j-start] = Character.toLowerCase(s.charAt(j));////            text = new String(chars);//          } else {//            text = s.substring(start, i);//          }        }      } while (text != null && isStopWord(text));            pos = i;      return text != null ? new Token(text, start, i) : null;    }        private boolean isTokenChar(char c, boolean isLetter) {      return isLetter ? Character.isLetter(c) : !Character.isWhitespace(c);    }        private boolean isStopWord(String text) {      return stopWords != null && stopWords.contains(text);    }      }    ///////////////////////////////////////////////////////////////////////////////  // Nested classes:  ///////////////////////////////////////////////////////////////////////////////  /**   * A StringReader that exposes it's contained string for fast direct access.   * Might make sense to generalize this to CharSequence and make it public?   */  static final class FastStringReader extends StringReader {    private final String s;        FastStringReader(String s) {      super(s);      this.s = s;    }        String getString() {      return s;    }  }  }
上一页 12
💿 文件大小 5913 K
👤 上传用户 jjjjjkkkkjkjkjk
📂 所属分类 Java编程
🏷️ 相关标签

#lucene #版本
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -