📄 matcher.java
字号:
/* * @(#)Matcher.java 1.46 03/01/23 * * Copyright 2003 Sun Microsystems, Inc. All rights reserved. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. */package java.util.regex;/** * An engine that performs match operations on a {@link java.lang.CharSequence * </code>character sequence<code>} by interpreting a {@link Pattern}. * * <p> A matcher is created from a pattern by invoking the pattern's {@link * Pattern#matcher matcher} method. Once created, a matcher can be used to * perform three different kinds of match operations: * * <ul> * * <li><p> The {@link #matches matches} method attempts to match the entire * input sequence against the pattern. </p></li> * * <li><p> The {@link #lookingAt lookingAt} method attempts to match the * input sequence, starting at the beginning, against the pattern. </p></li> * * <li><p> The {@link #find find} method scans the input sequence looking for * the next subsequence that matches the pattern. </p></li> * * </ul> * * <p> Each of these methods returns a boolean indicating success or failure. * More information about a successful match can be obtained by querying the * state of the matcher. * * <p> This class also defines methods for replacing matched subsequences with * new strings whose contents can, if desired, be computed from the match * result. The {@link #appendReplacement appendReplacement} and {@link * #appendTail appendTail} methods can be used in tandem in order to collect * the result into an existing string buffer, or the more convenient {@link * #replaceAll replaceAll} method can be used to create a string in which every * matching subsequence in the input sequence is replaced. * * <p> The explicit state of a matcher includes the start and end indices of * the most recent successful match. It also includes the start and end * indices of the input subsequence captured by each <a * href="Pattern.html#cg">capturing group</a> in the pattern as well as a total * count of such subsequences. As a convenience, methods are also provided for * returning these captured subsequences in string form. * * <p> The explicit state of a matcher is initially undefined; attempting to * query any part of it before a successful match will cause an {@link * IllegalStateException} to be thrown. The explicit state of a matcher is * recomputed by every match operation. * * <p> The implicit state of a matcher includes the input character sequence as * well as the <i>append position</i>, which is initially zero and is updated * by the {@link #appendReplacement appendReplacement} method. * * <p> A matcher may be reset explicitly by invoking its {@link #reset()} * method or, if a new input sequence is desired, its {@link * #reset(java.lang.CharSequence) reset(CharSequence)} method. Resetting a * matcher discards its explicit state information and sets the append position * to zero. * * <p> Instances of this class are not safe for use by multiple concurrent * threads. </p> * * * @author Mike McCloskey * @author Mark Reinhold * @author JSR-51 Expert Group * @version 1.46, 03/01/23 * @since 1.4 * @spec JSR-51 */public final class Matcher { /** * The Pattern object that created this Matcher. */ Pattern parentPattern; /** * The storage used by groups. They may contain invalid values if * a group was skipped during the matching. */ int[] groups; /** * The range within the string that is to be matched. */ int from, to; /** * The original string being matched. */ CharSequence text; /** * Matcher state used by the last node. NOANCHOR is used when a * match does not have to consume all of the input. ENDANCHOR is * the mode used for matching all the input. */ static final int ENDANCHOR = 1; static final int NOANCHOR = 0; int acceptMode = NOANCHOR; /** * The range of string that last matched the pattern. */ int first = -1, last = -1; /** * The end index of what matched in the last match operation. */ int oldLast = -1; /** * The index of the last position appended in a substitution. */ int lastAppendPosition = 0; /** * Storage used by nodes to tell what repetition they are on in * a pattern, and where groups begin. The nodes themselves are stateless, * so they rely on this field to hold state during a match. */ int[] locals; /** * No default constructor. */ Matcher() { } /** * All matchers have the state used by Pattern during a match. */ Matcher(Pattern parent, CharSequence text) { this.parentPattern = parent; this.text = text; // Allocate state storage int parentGroupCount = Math.max(parent.groupCount, 10); groups = new int[parentGroupCount * 2]; locals = new int[parent.localCount]; // Put fields into initial states reset(); } /** * Returns the pattern that is interpreted by this matcher. </p> * * @return The pattern for which this matcher was created */ public Pattern pattern() { return parentPattern; } /** * Resets this matcher. * * <p> Resetting a matcher discards all of its explicit state information * and sets its append position to zero. </p> * * @return This matcher */ public Matcher reset() { first = -1; last = -1; oldLast = -1; for(int i=0; i<groups.length; i++) groups[i] = -1; for(int i=0; i<locals.length; i++) locals[i] = -1; lastAppendPosition = 0; return this; } /** * Resets this matcher with a new input sequence. * * <p> Resetting a matcher discards all of its explicit state information * and sets its append position to zero. </p> * * @param input * The new input character sequence * * @return This matcher */ public Matcher reset(CharSequence input) { text = input; return reset(); } /** * Returns the start index of the previous match. </p> * * @return The index of the first character matched * * @throws IllegalStateException * If no match has yet been attempted, * or if the previous match operation failed */ public int start() { if (first < 0) throw new IllegalStateException("No match available"); return first; } /** * Returns the start index of the subsequence captured by the given group * during the previous match operation. * * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left * to right, starting at one. Group zero denotes the entire pattern, so * the expression <i>m.</i><tt>start(0)</tt> is equivalent to * <i>m.</i><tt>start()</tt>. </p> * * @param group * The index of a capturing group in this matcher's pattern * * @return The index of the first character captured by the group, * or <tt>-1</tt> if the match was successful but the group * itself did not match anything * * @throws IllegalStateException * If no match has yet been attempted, * or if the previous match operation failed * * @throws IndexOutOfBoundsException * If there is no capturing group in the pattern * with the given index */ public int start(int group) { if (first < 0) throw new IllegalStateException("No match available"); if (group > groupCount()) throw new IndexOutOfBoundsException("No group " + group); return groups[group * 2]; } /** * Returns the index of the last character matched, plus one. </p> * * @return The index of the last character matched, plus one * * @throws IllegalStateException * If no match has yet been attempted, * or if the previous match operation failed */ public int end() { if (first < 0) throw new IllegalStateException("No match available"); return last; } /** * Returns the index of the last character, plus one, of the subsequence * captured by the given group during the previous match operation. * * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left * to right, starting at one. Group zero denotes the entire pattern, so * the expression <i>m.</i><tt>end(0)</tt> is equivalent to * <i>m.</i><tt>end()</tt>. </p> * * @param group * The index of a capturing group in this matcher's pattern * * @return The index of the last character captured by the group, * plus one, or <tt>-1</tt> if the match was successful * but the group itself did not match anything * * @throws IllegalStateException * If no match has yet been attempted, * or if the previous match operation failed * * @throws IndexOutOfBoundsException * If there is no capturing group in the pattern * with the given index */ public int end(int group) { if (first < 0) throw new IllegalStateException("No match available"); if (group > groupCount()) throw new IndexOutOfBoundsException("No group " + group); return groups[group * 2 + 1]; } /** * Returns the input subsequence matched by the previous match. * * <p> For a matcher <i>m</i> with input sequence <i>s</i>, * the expressions <i>m.</i><tt>group()</tt> and * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(),</tt> <i>m.</i><tt>end())</tt> * are equivalent. </p> * * <p> Note that some patterns, for example <tt>a*</tt>, match the empty * string. This method will return the empty string when the pattern * successfully matches the empty string in the input. </p> * * @return The (possibly empty) subsequence matched by the previous match, * in string form * * @throws IllegalStateException * If no match has yet been attempted, * or if the previous match operation failed */ public String group() { return group(0); } /** * Returns the input subsequence captured by the given group during the * previous match operation. * * <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index * <i>g</i>, the expressions <i>m.</i><tt>group(</tt><i>g</i><tt>)</tt> and * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(</tt><i>g</i><tt>),</tt> <i>m.</i><tt>end(</tt><i>g</i><tt>))</tt> * are equivalent. </p> * * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left * to right, starting at one. Group zero denotes the entire pattern, so * the expression <tt>m.group(0)</tt> is equivalent to <tt>m.group()</tt>. * </p> * * <p> If the match was successful but the group specified failed to match * any part of the input sequence, then <tt>null</tt> is returned. Note * that some groups, for example <tt>(a*)</tt>, match the empty string. * This method will return the empty string when such a group successfully * matches the emtpy string in the input. </p> * * @param group * The index of a capturing group in this matcher's pattern * * @return The (possibly empty) subsequence captured by the group * during the previous match, or <tt>null</tt> if the group * failed to match part of the input * * @throws IllegalStateException * If no match has yet been attempted, * or if the previous match operation failed * * @throws IndexOutOfBoundsException * If there is no capturing group in the pattern * with the given index */ public String group(int group) { if (first < 0) throw new IllegalStateException("No match found"); if (group < 0 || group > groupCount()) throw new IndexOutOfBoundsException("No group " + group); if ((groups[group*2] == -1) || (groups[group*2+1] == -1)) return null; return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString(); } /** * Returns the number of capturing groups in this matcher's pattern. * * <p> Group zero denotes the entire pattern by convention. It is not * included in this count. * * <p> Any non-negative integer smaller than or equal to the value * returned by this method is guaranteed to be a valid group index for * this matcher. </p> * * @return The number of capturing groups in this matcher's pattern */ public int groupCount() { return parentPattern.groupCount - 1; } /** * Attempts to match the entire input sequence against the pattern. * * <p> If the match succeeds then more information can be obtained via the * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods. </p> * * @return <tt>true</tt> if, and only if, the entire input sequence * matches this matcher's pattern */ public boolean matches() { reset(); return match(0, getTextLength(), ENDANCHOR); } /**
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -