📄 matcher.java
字号:
* Attempts to find the next subsequence of the input sequence that matches * the pattern. * * <p> This method starts at the beginning of the input sequence or, if a * previous invocation of the method was successful and the matcher has not * since been reset, at the first character not matched by the previous * match. * * <p> If the match succeeds then more information can be obtained via the * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods. </p> * * @return <tt>true</tt> if, and only if, a subsequence of the input * sequence matches this matcher's pattern */ public boolean find() { if (last == first) last++; if (last > to) { for (int i = 0; i < groups.length; i++) groups[i] = -1; return false; } return find(last, getTextLength()); } /** * Resets this matcher and then attempts to find the next subsequence of * the input sequence that matches the pattern, starting at the specified * index. * * <p> If the match succeeds then more information can be obtained via the * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods, and subsequent * invocations of the {@link #find()} method will start at the first * character not matched by this match. </p> * * @throws IndexOutOfBoundsException * If start is less than zero or if start is greater than the * length of the input sequence. * * @return <tt>true</tt> if, and only if, a subsequence of the input * sequence starting at the given index matches this matcher's * pattern */ public boolean find(int start) { int limit = getTextLength(); if ((start < 0) || (start > limit)) throw new IndexOutOfBoundsException("Illegal start index"); reset(); return find(start, limit); } /** * Attempts to match the input sequence, starting at the beginning, against * the pattern. * * <p> Like the {@link #matches matches} method, this method always starts * at the beginning of the input sequence; unlike that method, it does not * require that the entire input sequence be matched. * * <p> If the match succeeds then more information can be obtained via the * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods. </p> * * @return <tt>true</tt> if, and only if, a prefix of the input * sequence matches this matcher's pattern */ public boolean lookingAt() { reset(); return match(0, getTextLength(), NOANCHOR); } /** * Implements a non-terminal append-and-replace step. * * <p> This method performs the following actions: </p> * * <ol> * * <li><p> It reads characters from the input sequence, starting at the * append position, and appends them to the given string buffer. It * stops after reading the last character preceding the previous match, * that is, the character at index {@link * #start()} <tt>-</tt> <tt>1</tt>. </p></li> * * <li><p> It appends the given replacement string to the string buffer. * </p></li> * * <li><p> It sets the append position of this matcher to the index of * the last character matched, plus one, that is, to {@link #end()}. * </p></li> * * </ol> * * <p> The replacement string may contain references to subsequences * captured during the previous match: Each occurrence of * <tt>$</tt><i>g</i><tt></tt> will be replaced by the result of * evaluating {@link #group(int) group}<tt>(</tt><i>g</i><tt>)</tt>. * The first number after the <tt>$</tt> is always treated as part of * the group reference. Subsequent numbers are incorporated into g if * they would form a legal group reference. Only the numerals '0' * through '9' are considered as potential components of the group * reference. If the second group matched the string <tt>"foo"</tt>, for * example, then passing the replacement string <tt>"$2bar"</tt> would * cause <tt>"foobar"</tt> to be appended to the string buffer. A dollar * sign (<tt>$</tt>) may be included as a literal in the replacement * string by preceding it with a backslash (<tt>\$</tt>). * * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in * the replacement string may cause the results to be different than if it * were being treated as a literal replacement string. Dollar signs may be * treated as references to captured subsequences as described above, and * backslashes are used to escape literal characters in the replacement * string. * * <p> This method is intended to be used in a loop together with the * {@link #appendTail appendTail} and {@link #find find} methods. The * following code, for example, writes <tt>one dog two dogs in the * yard</tt> to the standard-output stream: </p> * * <blockquote><pre> * Pattern p = Pattern.compile("cat"); * Matcher m = p.matcher("one cat two cats in the yard"); * StringBuffer sb = new StringBuffer(); * while (m.find()) { * m.appendReplacement(sb, "dog"); * } * m.appendTail(sb); * System.out.println(sb.toString());</pre></blockquote> * * @param sb * The target string buffer * * @param replacement * The replacement string * * @return This matcher * * @throws IllegalStateException * If no match has yet been attempted, * or if the previous match operation failed * * @throws IndexOutOfBoundsException * If the replacement string refers to a capturing group * that does not exist in the pattern */ public Matcher appendReplacement(StringBuffer sb, String replacement) { // If no match, return error if (first < 0) throw new IllegalStateException("No match available"); // Process substitution string to replace group references with groups int cursor = 0; String s = replacement; StringBuffer result = new StringBuffer(); while (cursor < replacement.length()) { char nextChar = replacement.charAt(cursor); if (nextChar == '\\') { cursor++; nextChar = replacement.charAt(cursor); result.append(nextChar); cursor++; } else if (nextChar == '$') { // Skip past $ cursor++; // The first number is always a group int refNum = (int)replacement.charAt(cursor) - '0'; if ((refNum < 0)||(refNum > 9)) throw new IllegalArgumentException( "Illegal group reference"); cursor++; // Capture the largest legal group string boolean done = false; while (!done) { if (cursor >= replacement.length()) { break; } int nextDigit = replacement.charAt(cursor) - '0'; if ((nextDigit < 0)||(nextDigit > 9)) { // not a number break; } int newRefNum = (refNum * 10) + nextDigit; if (groupCount() < newRefNum) { done = true; } else { refNum = newRefNum; cursor++; } } // Append group if (group(refNum) != null) result.append(group(refNum)); } else { result.append(nextChar); cursor++; } } // Append the intervening text sb.append(getSubSequence(lastAppendPosition, first)); // Append the match substitution sb.append(result.toString()); lastAppendPosition = last; return this; } /** * Implements a terminal append-and-replace step. * * <p> This method reads characters from the input sequence, starting at * the append position, and appends them to the given string buffer. It is * intended to be invoked after one or more invocations of the {@link * #appendReplacement appendReplacement} method in order to copy the * remainder of the input sequence. </p> * * @param sb * The target string buffer * * @return The target string buffer */ public StringBuffer appendTail(StringBuffer sb) { sb.append(getSubSequence(lastAppendPosition, getTextLength()).toString()); return sb; } /** * Replaces every subsequence of the input sequence that matches the * pattern with the given replacement string. * * <p> This method first resets this matcher. It then scans the input * sequence looking for matches of the pattern. Characters that are not * part of any match are appended directly to the result string; each match * is replaced in the result by the replacement string. The replacement * string may contain references to captured subsequences as in the {@link * #appendReplacement appendReplacement} method. * * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in * the replacement string may cause the results to be different than if it * were being treated as a literal replacement string. Dollar signs may be * treated as references to captured subsequences as described above, and * backslashes are used to escape literal characters in the replacement * string. * * <p> Given the regular expression <tt>a*b</tt>, the input * <tt>"aabfooaabfooabfoob"</tt>, and the replacement string * <tt>"-"</tt>, an invocation of this method on a matcher for that * expression would yield the string <tt>"-foo-foo-foo-"</tt>. * * <p> Invoking this method changes this matcher's state. If the matcher * is to be used in further matching operations then it should first be * reset. </p> * * @param replacement * The replacement string * * @return The string constructed by replacing each matching subsequence * by the replacement string, substituting captured subsequences * as needed */ public String replaceAll(String replacement) { reset(); boolean result = find(); if (result) { StringBuffer sb = new StringBuffer(); do { appendReplacement(sb, replacement); result = find(); } while (result); appendTail(sb); return sb.toString(); } return text.toString(); } /** * Replaces the first subsequence of the input sequence that matches the * pattern with the given replacement string. * * <p> This method first resets this matcher. It then scans the input * sequence looking for a match of the pattern. Characters that are not * part of the match are appended directly to the result string; the match * is replaced in the result by the replacement string. The replacement * string may contain references to captured subsequences as in the {@link * #appendReplacement appendReplacement} method. * * <p> Given the regular expression <tt>dog</tt>, the input * <tt>"zzzdogzzzdogzzz"</tt>, and the replacement string * <tt>"cat"</tt>, an invocation of this method on a matcher for that * expression would yield the string <tt>"zzzcatzzzdogzzz"</tt>. </p> * * <p> Invoking this method changes this matcher's state. If the matcher * is to be used in further matching operations then it should first be * reset. </p> * * @param replacement * The replacement string * * @return The string constructed by replacing the first matching * subsequence by the replacement string, substituting captured * subsequences as needed */ public String replaceFirst(String replacement) { StringBuffer sb = new StringBuffer(); reset(); if (find()) appendReplacement(sb, replacement); appendTail(sb); return sb.toString(); } /** * Initiates a search to find a Pattern within the given bounds. * The groups are filled with default values and the match of the root * of the state machine is called. The state machine will hold the state * of the match as it proceeds in this matcher. */ private boolean find(int from, int to) { from = from < 0 ? 0 : from; this.to = to; this.first = from; this.last = -1; this.oldLast = oldLast < 0 ? from : oldLast; for (int i = 0; i < groups.length; i++) groups[i] = -1; acceptMode = NOANCHOR; boolean result = parentPattern.root.match(this, from, text); if (!result) this.first = -1; this.oldLast = this.last; return result; } /** * Initiates a search for an anchored match to a Pattern within the given * bounds. The groups are filled with default values and the match of the * root of the state machine is called. The state machine will hold the * state of the match as it proceeds in this matcher. */ private boolean match(int from, int to, int anchor) { from = from < 0 ? 0 : from; this.to = to; this.first = from; this.last = -1; this.oldLast = oldLast < 0 ? from : oldLast; for (int i = 0; i < groups.length; i++) groups[i] = -1; acceptMode = anchor; boolean result = parentPattern.matchRoot.match(this, from, text); if (!result) this.first = -1; this.oldLast = this.last; return result; } /** * Returns the end index of the text. * * @return the index after the last character in the text */ int getTextLength() { return text.length(); } /** * Generates a String from this Matcher's input in the specified range. * * @param beginIndex the beginning index, inclusive * @param endIndex the ending index, exclusive * @return A String generated from this Matcher's input */ CharSequence getSubSequence(int beginIndex, int endIndex) { return text.subSequence(beginIndex, endIndex); } /** * Returns this Matcher's input character at index i. * * @return A char from the specified index */ char charAt(int i) { return text.charAt(i); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -