📄 pattern.java
字号:
/** * Enables Unicode-aware case folding. * * <p> When this flag is specified then case-insensitive matching, when * enabled by the {@link #CASE_INSENSITIVE} flag, is done in a manner * consistent with the Unicode Standard. By default, case-insensitive * matching assumes that only characters in the US-ASCII charset are being * matched. * * <p> Unicode-aware case folding can also be enabled via the embedded flag * expression <tt>(?u)</tt>. * * <p> Specifying this flag may impose a performance penalty. </p> */ public static final int UNICODE_CASE = 0x40; /** * Enables canonical equivalence. * * <p> When this flag is specified then two characters will be considered * to match if, and only if, their full canonical decompositions match. * The expression <tt>"a\u030A"</tt>, for example, will match the * string <tt>"\u00E5"</tt> when this flag is specified. By default, * matching does not take canonical equivalence into account. * * <p> There is no embedded flag character for enabling canonical * equivalence. * * <p> Specifying this flag may impose a performance penalty. </p> */ public static final int CANON_EQ = 0x80; /* Pattern has only two serialized components: The pattern string * and the flags, which are all that is needed to recompile the pattern * when it is deserialized. */ /** use serialVersionUID from Merlin b59 for interoperability */ private static final long serialVersionUID = 5073258162644648461L; /** * The original regular-expression pattern string. * * @serial */ private String pattern; /** * The original pattern flags. * * @serial */ private int flags; /** * The normalized pattern string. */ private transient String normalizedPattern; /** * The starting point of state machine for the find operation. This allows * a match to start anywhere in the input. */ transient Node root; /** * The root of object tree for a match operation. The pattern is matched * at the beginning. This may include a find that uses BnM or a First * node. */ transient Node matchRoot; /** * Temporary storage used by parsing pattern slice. */ transient char[] buffer; /** * Temporary storage used while parsing group references. */ transient GroupHead[] groupNodes; /** * Temporary null terminating char array used by pattern compiling. */ private transient char[] temp; /** * The group count of this Pattern. Used by matchers to allocate storage * needed to perform a match. */ transient int groupCount; /** * The local variable count used by parsing tree. Used by matchers to * allocate storage needed to perform a match. */ transient int localCount; /** * Index into the pattern string that keeps track of how much has been * parsed. */ private transient int cursor; /** * Holds the length of the pattern string. */ private transient int patternLength; /** * Compiles the given regular expression into a pattern. </p> * * @param regex * The expression to be compiled * * @throws PatternSyntaxException * If the expression's syntax is invalid */ public static Pattern compile(String regex) { return new Pattern(regex, 0); } /** * Compiles the given regular expression into a pattern with the given * flags. </p> * * @param regex * The expression to be compiled * * @param flags * Match flags, a bit mask that may include * {@link #CASE_INSENSITIVE}, {@link #MULTILINE}, {@link #DOTALL}, * {@link #UNICODE_CASE}, and {@link #CANON_EQ} * * @throws IllegalArgumentException * If bit values other than those corresponding to the defined * match flags are set in <tt>flags</tt> * * @throws PatternSyntaxException * If the expression's syntax is invalid */ public static Pattern compile(String regex, int flags) { return new Pattern(regex, flags); } /** * Returns the regular expression from which this pattern was compiled. * </p> * * @return The source of this pattern */ public String pattern() { return pattern; } /** * Creates a matcher that will match the given input against this pattern. * </p> * * @param input * The character sequence to be matched * * @return A new matcher for this pattern */ public Matcher matcher(CharSequence input) { Matcher m = new Matcher(this, input); return m; } /** * Returns this pattern's match flags. </p> * * @return The match flags specified when this pattern was compiled */ public int flags() { return flags; } /** * Compiles the given regular expression and attempts to match the given * input against it. * * <p> An invocation of this convenience method of the form * * <blockquote><pre> * Pattern.matches(regex, input);</pre></blockquote> * * behaves in exactly the same way as the expression * * <blockquote><pre> * Pattern.compile(regex).matcher(input).matches()</pre></blockquote> * * <p> If a pattern is to be used multiple times, compiling it once and reusing * it will be more efficient than invoking this method each time. </p> * * @param regex * The expression to be compiled * * @param input * The character sequence to be matched * * @throws PatternSyntaxException * If the expression's syntax is invalid */ public static boolean matches(String regex, CharSequence input) { Pattern p = Pattern.compile(regex); Matcher m = p.matcher(input); return m.matches(); } /** * Splits the given input sequence around matches of this pattern. * * <p> The array returned by this method contains each substring of the * input sequence that is terminated by another subsequence that matches * this pattern or is terminated by the end of the input sequence. The * substrings in the array are in the order in which they occur in the * input. If this pattern does not match any subsequence of the input then * the resulting array has just one element, namely the input sequence in * string form. * * <p> The <tt>limit</tt> parameter controls the number of times the * pattern is applied and therefore affects the length of the resulting * array. If the limit <i>n</i> is greater than zero then the pattern * will be applied at most <i>n</i> - 1 times, the array's * length will be no greater than <i>n</i>, and the array's last entry * will contain all input beyond the last matched delimiter. If <i>n</i> * is non-positive then the pattern will be applied as many times as * possible and the array can have any length. If <i>n</i> is zero then * the pattern will be applied as many times as possible, the array can * have any length, and trailing empty strings will be discarded. * * <p> The input <tt>"boo:and:foo"</tt>, for example, yields the following * results with these parameters: * * <blockquote><table cellpadding=1 cellspacing=0 * summary="Split examples showing regex, limit, and result"> * <tr><th><P align="left"><i>Regex </i></th> * <th><P align="left"><i>Limit </i></th> * <th><P align="left"><i>Result </i></th></tr> * <tr><td align=center>:</td> * <td align=center>2</td> * <td><tt>{ "boo", "and:foo" }</tt></td></tr> * <tr><td align=center>:</td> * <td align=center>5</td> * <td><tt>{ "boo", "and", "foo" }</tt></td></tr> * <tr><td align=center>:</td> * <td align=center>-2</td> * <td><tt>{ "boo", "and", "foo" }</tt></td></tr> * <tr><td align=center>o</td> * <td align=center>5</td> * <td><tt>{ "b", "", ":and:f", "", "" }</tt></td></tr> * <tr><td align=center>o</td> * <td align=center>-2</td> * <td><tt>{ "b", "", ":and:f", "", "" }</tt></td></tr> * <tr><td align=center>o</td> * <td align=center>0</td> * <td><tt>{ "b", "", ":and:f" }</tt></td></tr> * </table></blockquote> * * * @param input * The character sequence to be split * * @param limit * The result threshold, as described above * * @return The array of strings computed by splitting the input * around matches of this pattern */ public String[] split(CharSequence input, int limit) { int index = 0; boolean matchLimited = limit > 0; ArrayList matchList = new ArrayList(); Matcher m = matcher(input); // Add segments before each match found while(m.find()) { if (!matchLimited || matchList.size() < limit - 1) { String match = input.subSequence(index, m.start()).toString(); matchList.add(match); index = m.end(); } else if (matchList.size() == limit - 1) { // last one String match = input.subSequence(index, input.length()).toString(); matchList.add(match); index = m.end(); } } // If no match was found, return this if (index == 0) return new String[] {input.toString()}; // Add remaining segment if (!matchLimited || matchList.size() < limit) matchList.add(input.subSequence(index, input.length()).toString()); // Construct result int resultSize = matchList.size(); if (limit == 0) while (resultSize > 0 && matchList.get(resultSize-1).equals("")) resultSize--; String[] result = new String[resultSize]; return (String[])matchList.subList(0, resultSize).toArray(result); } /** * Splits the given input sequence around matches of this pattern. * * <p> This method works as if by invoking the two-argument {@link * #split(java.lang.CharSequence, int) split} method with the given input * sequence and a limit argument of zero. Trailing empty strings are * therefore not included in the resulting array. </p> * * <p> The input <tt>"boo:and:foo"</tt>, for example, yields the following * results with these expressions: * * <blockquote><table cellpadding=1 cellspacing=0 * summary="Split examples showing regex and result"> * <tr><th><P align="left"><i>Regex </i></th> * <th><P align="left"><i>Result</i></th></tr> * <tr><td align=center>:</td> * <td><tt>{ "boo", "and", "foo" }</tt></td></tr> * <tr><td align=center>o</td> * <td><tt>{ "b", "", ":and:f" }</tt></td></tr> * </table></blockquote> *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -