📄 patternanalyzer.java
字号:
public boolean equals(Object other) { if (this == other) return true; if (this == DEFAULT_ANALYZER && other == EXTENDED_ANALYZER) return false; if (other == DEFAULT_ANALYZER && this == EXTENDED_ANALYZER) return false; if (other instanceof PatternAnalyzer) { PatternAnalyzer p2 = (PatternAnalyzer) other; return toLowerCase == p2.toLowerCase && eqPattern(pattern, p2.pattern) && eq(stopWords, p2.stopWords); } return false; } /** * Returns a hash code value for the object. * * @return the hash code. */ public int hashCode() { if (this == DEFAULT_ANALYZER) return -1218418418; // fast path if (this == EXTENDED_ANALYZER) return 1303507063; // fast path int h = 1; h = 31*h + pattern.pattern().hashCode(); h = 31*h + pattern.flags(); h = 31*h + (toLowerCase ? 1231 : 1237); h = 31*h + (stopWords != null ? stopWords.hashCode() : 0); return h; } /** equality where o1 and/or o2 can be null */ private static boolean eq(Object o1, Object o2) { return (o1 == o2) || (o1 != null ? o1.equals(o2) : false); } /** assumes p1 and p2 are not null */ private static boolean eqPattern(Pattern p1, Pattern p2) { return p1 == p2 || (p1.flags() == p2.flags() && p1.pattern().equals(p2.pattern())); } /** * Reads until end-of-stream and returns all read chars, finally closes the stream. * * @param input the input stream * @throws IOException if an I/O error occurs while reading the stream */ private static String toString(Reader input) throws IOException { try { int len = 256; char[] buffer = new char[len]; char[] output = new char[len]; len = 0; int n; while ((n = input.read(buffer)) >= 0) { if (len + n > output.length) { // grow capacity char[] tmp = new char[Math.max(output.length << 1, len + n)]; System.arraycopy(output, 0, tmp, 0, len); System.arraycopy(buffer, 0, tmp, len, n); buffer = output; // use larger buffer for future larger bulk reads output = tmp; } else { System.arraycopy(buffer, 0, output, len, n); } len += n; } return new String(output, 0, len); } finally { if (input != null) input.close(); } } /** somewhat oversized to minimize hash collisions */ private static Set makeStopSet(String[] stopWords) { Set stops = new HashSet(stopWords.length * 2, 0.3f); stops.addAll(Arrays.asList(stopWords)); return stops;// return Collections.unmodifiableSet(stops); } /////////////////////////////////////////////////////////////////////////////// // Nested classes: /////////////////////////////////////////////////////////////////////////////// /** * The work horse; performance isn't fantastic, but it's not nearly as bad * as one might think - kudos to the Sun regex developers. */ private static final class PatternTokenizer extends TokenStream { private final String str; private final boolean toLowerCase; private Matcher matcher; private int pos = 0; private static final Locale locale = Locale.getDefault(); public PatternTokenizer(String str, Pattern pattern, boolean toLowerCase) { this.str = str; this.matcher = pattern.matcher(str); this.toLowerCase = toLowerCase; } public Token next() { if (matcher == null) return null; while (true) { // loop takes care of leading and trailing boundary cases int start = pos; int end; boolean isMatch = matcher.find(); if (isMatch) { end = matcher.start(); pos = matcher.end(); } else { end = str.length(); matcher = null; // we're finished } if (start != end) { // non-empty match (header/trailer) String text = str.substring(start, end); if (toLowerCase) text = text.toLowerCase(locale); return new Token(text, start, end); } if (!isMatch) return null; } } } /////////////////////////////////////////////////////////////////////////////// // Nested classes: /////////////////////////////////////////////////////////////////////////////// /** * Special-case class for best performance in common cases; this class is * otherwise unnecessary. */ private static final class FastStringTokenizer extends TokenStream { private final String str; private int pos; private final boolean isLetter; private final boolean toLowerCase; private final Set stopWords; private static final Locale locale = Locale.getDefault(); public FastStringTokenizer(String str, boolean isLetter, boolean toLowerCase, Set stopWords) { this.str = str; this.isLetter = isLetter; this.toLowerCase = toLowerCase; this.stopWords = stopWords; } public Token next() { // cache loop instance vars (performance) String s = str; int len = s.length(); int i = pos; boolean letter = isLetter; int start = 0; String text; do { // find beginning of token text = null; while (i < len && !isTokenChar(s.charAt(i), letter)) { i++; } if (i < len) { // found beginning; now find end of token start = i; while (i < len && isTokenChar(s.charAt(i), letter)) { i++; } text = s.substring(start, i); if (toLowerCase) text = text.toLowerCase(locale);// if (toLowerCase) { //// use next line once JDK 1.5 String.toLowerCase() performance regression is fixed//// see http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6265809// text = s.substring(start, i).toLowerCase(); //// char[] chars = new char[i-start];//// for (int j=start; j < i; j++) chars[j-start] = Character.toLowerCase(s.charAt(j));//// text = new String(chars);// } else {// text = s.substring(start, i);// } } } while (text != null && isStopWord(text)); pos = i; return text != null ? new Token(text, start, i) : null; } private boolean isTokenChar(char c, boolean isLetter) { return isLetter ? Character.isLetter(c) : !Character.isWhitespace(c); } private boolean isStopWord(String text) { return stopWords != null && stopWords.contains(text); } } /////////////////////////////////////////////////////////////////////////////// // Nested classes: /////////////////////////////////////////////////////////////////////////////// /** * A StringReader that exposes it's contained string for fast direct access. * Might make sense to generalize this to CharSequence and make it public? */ static final class FastStringReader extends StringReader { private final String s; FastStringReader(String s) { super(s); this.s = s; } String getString() { return s; } } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -