📄 re.java
字号:
/* gnu/regexp/RE.java Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.This file is part of GNU Classpath.GNU Classpath is free software; you can redistribute it and/or modifyit under the terms of the GNU General Public License as published bythe Free Software Foundation; either version 2, or (at your option)any later version.GNU Classpath is distributed in the hope that it will be useful, butWITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNUGeneral Public License for more details.You should have received a copy of the GNU General Public Licensealong with GNU Classpath; see the file COPYING. If not, write to theFree Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA02111-1307 USA.Linking this library statically or dynamically with other modules ismaking a combined work based on this library. Thus, the terms andconditions of the GNU General Public License cover the wholecombination.As a special exception, the copyright holders of this library give youpermission to link this library with independent modules to produce anexecutable, regardless of the license terms of these independentmodules, and to copy and distribute the resulting executable underterms of your choice, provided that you also meet, for each linkedindependent module, the terms and conditions of the license of thatmodule. An independent module is a module which is not derived fromor based on this library. If you modify this library, you may extendthis exception to your version of the library, but you are notobligated to do so. If you do not wish to do so, delete thisexception statement from your version. */package gnu.regexp;import java.io.InputStream;import java.io.Serializable;import java.util.Locale;import java.util.PropertyResourceBundle;import java.util.ResourceBundle;import java.util.Vector;/** * RE provides the user interface for compiling and matching regular * expressions. * <P> * A regular expression object (class RE) is compiled by constructing it * from a String, StringBuffer or character array, with optional * compilation flags (below) * and an optional syntax specification (see RESyntax; if not specified, * <code>RESyntax.RE_SYNTAX_PERL5</code> is used). * <P> * Once compiled, a regular expression object is reusable as well as * threadsafe: multiple threads can use the RE instance simultaneously * to match against different input text. * <P> * Various methods attempt to match input text against a compiled * regular expression. These methods are: * <LI><code>isMatch</code>: returns true if the input text in its * entirety matches the regular expression pattern. * <LI><code>getMatch</code>: returns the first match found in the * input text, or null if no match is found. * <LI><code>getAllMatches</code>: returns an array of all * non-overlapping matches found in the input text. If no matches are * found, the array is zero-length. * <LI><code>substitute</code>: substitute the first occurence of the * pattern in the input text with a replacement string (which may * include metacharacters $0-$9, see REMatch.substituteInto). * <LI><code>substituteAll</code>: same as above, but repeat for each * match before returning. * <LI><code>getMatchEnumeration</code>: returns an REMatchEnumeration * object that allows iteration over the matches (see * REMatchEnumeration for some reasons why you may want to do this * instead of using <code>getAllMatches</code>. * <P> * * These methods all have similar argument lists. The input can be a * String, a character array, a StringBuffer, or an * InputStream of some sort. Note that when using an * InputStream, the stream read position cannot be guaranteed after * attempting a match (this is not a bug, but a consequence of the way * regular expressions work). Using an REMatchEnumeration can * eliminate most positioning problems. * * <P> * * The optional index argument specifies the offset from the beginning * of the text at which the search should start (see the descriptions * of some of the execution flags for how this can affect positional * pattern operators). For an InputStream, this means an * offset from the current read position, so subsequent calls with the * same index argument on an InputStream will not * necessarily access the same position on the stream, whereas * repeated searches at a given index in a fixed string will return * consistent results. * * <P> * You can optionally affect the execution environment by using a * combination of execution flags (constants listed below). * * <P> * All operations on a regular expression are performed in a * thread-safe manner. * * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A> * @version 1.1.5-dev, to be released */public class RE extends REToken { private static final class IntPair implements Serializable { public int first, second; } private static final class CharUnit implements Serializable { public char ch; public boolean bk; } // This String will be returned by getVersion() private static final String VERSION = "1.1.5-dev"; // The localized strings are kept in a separate file private static ResourceBundle messages = PropertyResourceBundle.getBundle("gnu/regexp/MessagesBundle", Locale.getDefault()); // These are, respectively, the first and last tokens in our linked list // If there is only one token, firstToken == lastToken private REToken firstToken, lastToken; // This is the number of subexpressions in this regular expression, // with a minimum value of zero. Returned by getNumSubs() private int numSubs; /** Minimum length, in characters, of any possible match. */ private int minimumLength; /** * Compilation flag. Do not differentiate case. Subsequent * searches using this RE will be case insensitive. */ public static final int REG_ICASE = 2; /** * Compilation flag. The match-any-character operator (dot) * will match a newline character. When set this overrides the syntax * bit RE_DOT_NEWLINE (see RESyntax for details). This is equivalent to * the "/s" operator in Perl. */ public static final int REG_DOT_NEWLINE = 4; /** * Compilation flag. Use multiline mode. In this mode, the ^ and $ * anchors will match based on newlines within the input. This is * equivalent to the "/m" operator in Perl. */ public static final int REG_MULTILINE = 8; /** * Execution flag. * The match-beginning operator (^) will not match at the beginning * of the input string. Useful for matching on a substring when you * know the context of the input is such that position zero of the * input to the match test is not actually position zero of the text. * <P> * This example demonstrates the results of various ways of matching on * a substring. * <P> * <CODE> * String s = "food bar fool";<BR> * RE exp = new RE("^foo.");<BR> * REMatch m0 = exp.getMatch(s);<BR> * REMatch m1 = exp.getMatch(s.substring(8));<BR> * REMatch m2 = exp.getMatch(s.substring(8),0,RE.REG_NOTBOL); <BR> * REMatch m3 = exp.getMatch(s,8); <BR> * REMatch m4 = exp.getMatch(s,8,RE.REG_ANCHORINDEX); <BR> * <P> * // Results:<BR> * // m0.toString(): "food"<BR> * // m1.toString(): "fool"<BR> * // m2.toString(): null<BR> * // m3.toString(): null<BR> * // m4.toString(): "fool"<BR> * </CODE> */ public static final int REG_NOTBOL = 16; /** * Execution flag. * The match-end operator ($) does not match at the end * of the input string. Useful for matching on substrings. */ public static final int REG_NOTEOL = 32; /** * Execution flag. * When a match method is invoked that starts matching at a non-zero * index into the input, treat the input as if it begins at the index * given. The effect of this flag is that the engine does not "see" * any text in the input before the given index. This is useful so * that the match-beginning operator (^) matches not at position 0 * in the input string, but at the position the search started at * (based on the index input given to the getMatch function). See * the example under REG_NOTBOL. It also affects the use of the \< * and \b operators. */ public static final int REG_ANCHORINDEX = 64; /** * Execution flag. * The substitute and substituteAll methods will not attempt to * interpolate occurrences of $1-$9 in the replacement text with * the corresponding subexpressions. For example, you may want to * replace all matches of "one dollar" with "$1". */ public static final int REG_NO_INTERPOLATE = 128; /** Returns a string representing the version of the gnu.regexp package. */ public static final String version() { return VERSION; } // Retrieves a message from the ResourceBundle static final String getLocalizedMessage(String key) { return messages.getString(key); } /** * Constructs a regular expression pattern buffer without any compilation * flags set, and using the default syntax (RESyntax.RE_SYNTAX_PERL5). * * @param pattern A regular expression pattern, in the form of a String, * StringBuffer or char[]. Other input types will be converted to * strings using the toString() method. * @exception REException The input pattern could not be parsed. * @exception NullPointerException The pattern was null. */ public RE(Object pattern) throws REException { this(pattern,0,RESyntax.RE_SYNTAX_PERL5,0,0); } /** * Constructs a regular expression pattern buffer using the specified * compilation flags and the default syntax (RESyntax.RE_SYNTAX_PERL5). * * @param pattern A regular expression pattern, in the form of a String, * StringBuffer, or char[]. Other input types will be converted to * strings using the toString() method. * @param cflags The logical OR of any combination of the compilation flags listed above. * @exception REException The input pattern could not be parsed. * @exception NullPointerException The pattern was null. */ public RE(Object pattern, int cflags) throws REException { this(pattern,cflags,RESyntax.RE_SYNTAX_PERL5,0,0); } /** * Constructs a regular expression pattern buffer using the specified * compilation flags and regular expression syntax. * * @param pattern A regular expression pattern, in the form of a String, * StringBuffer, or char[]. Other input types will be converted to * strings using the toString() method. * @param cflags The logical OR of any combination of the compilation flags listed above. * @param syntax The type of regular expression syntax to use. * @exception REException The input pattern could not be parsed. * @exception NullPointerException The pattern was null. */ public RE(Object pattern, int cflags, RESyntax syntax) throws REException { this(pattern,cflags,syntax,0,0); } // internal constructor used for alternation private RE(REToken first, REToken last,int subs, int subIndex, int minLength) { super(subIndex); firstToken = first; lastToken = last; numSubs = subs; minimumLength = minLength; addToken(new RETokenEndSub(subIndex)); } private RE(Object patternObj, int cflags, RESyntax syntax, int myIndex, int nextSub) throws REException { super(myIndex); // Subexpression index of this token. initialize(patternObj, cflags, syntax, myIndex, nextSub); } // For use by subclasses protected RE() { super(0); } // The meat of construction protected void initialize(Object patternObj, int cflags, RESyntax syntax, int myIndex, int nextSub) throws REException { char[] pattern; if (patternObj instanceof String) { pattern = ((String) patternObj).toCharArray(); } else if (patternObj instanceof char[]) { pattern = (char[]) patternObj; } else if (patternObj instanceof StringBuffer) { pattern = new char [((StringBuffer) patternObj).length()]; ((StringBuffer) patternObj).getChars(0,pattern.length,pattern,0); } else { pattern = patternObj.toString().toCharArray(); } int pLength = pattern.length; numSubs = 0; // Number of subexpressions in this token. Vector branches = null; // linked list of tokens (sort of -- some closed loops can exist) firstToken = lastToken = null; // Precalculate these so we don't pay for the math every time we // need to access them. boolean insens = ((cflags & REG_ICASE) > 0); // Parse pattern into tokens. Does anyone know if it's more efficient // to use char[] than a String.charAt()? I'm assuming so. // index tracks the position in the char array int index = 0; // this will be the current parse character (pattern[index]) CharUnit unit = new CharUnit(); // This is used for {x,y} calculations IntPair minMax = new IntPair(); // Buffer a token so we can create a TokenRepeated, etc. REToken currentToken = null; char ch; boolean quot = false; while (index < pLength) { // read the next character unit (including backslash escapes) index = getCharUnit(pattern,index,unit,quot); if (unit.bk) if (unit.ch == 'Q') { quot = true; continue; } else if (unit.ch == 'E') { quot = false; continue; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -