re.java
来自「JAVA的一些源码 JAVA2 STANDARD EDITION DEVELO」· Java 代码 · 共 1,752 行 · 第 1/5 页
JAVA
1,752 行
public class RE{ /** * Specifies normal, case-sensitive matching behaviour. */ public static final int MATCH_NORMAL = 0x0000; /** * Flag to indicate that matching should be case-independent (folded) */ public static final int MATCH_CASEINDEPENDENT = 0x0001; /** * Newlines should match as BOL/EOL (^ and $) */ public static final int MATCH_MULTILINE = 0x0002; /** * Consider all input a single body of text - newlines are matched by . */ public static final int MATCH_SINGLELINE = 0x0004; /************************************************ * * * The format of a node in a program is: * * * * [ OPCODE ] [ OPDATA ] [ OPNEXT ] [ OPERAND ] * * * * char OPCODE - instruction * * char OPDATA - modifying data * * char OPNEXT - next node (relative offset) * * * ************************************************/ // Opcode Char Opdata/Operand Meaning // ---------- ---------- --------------- -------------------------------------------------- static final char OP_END = 'E'; // end of program static final char OP_BOL = '^'; // match only if at beginning of line static final char OP_EOL = '$'; // match only if at end of line static final char OP_ANY = '.'; // match any single character except newline static final char OP_ANYOF = '['; // count/ranges match any char in the list of ranges static final char OP_BRANCH = '|'; // node match this alternative or the next one static final char OP_ATOM = 'A'; // length/string length of string followed by string itself static final char OP_STAR = '*'; // node kleene closure static final char OP_PLUS = '+'; // node positive closure static final char OP_MAYBE = '?'; // node optional closure static final char OP_ESCAPE = '\\'; // escape special escape code char class (escape is E_* code) static final char OP_OPEN = '('; // number nth opening paren static final char OP_CLOSE = ')'; // number nth closing paren static final char OP_BACKREF = '#'; // number reference nth already matched parenthesized string static final char OP_GOTO = 'G'; // nothing but a (back-)pointer static final char OP_NOTHING = 'N'; // match null string such as in '(a|)' static final char OP_RELUCTANTSTAR = '8'; // none/expr reluctant '*' (mnemonic for char is unshifted '*') static final char OP_RELUCTANTPLUS = '='; // none/expr reluctant '+' (mnemonic for char is unshifted '+') static final char OP_RELUCTANTMAYBE = '/'; // none/expr reluctant '?' (mnemonic for char is unshifted '?') static final char OP_POSIXCLASS = 'P'; // classid one of the posix character classes // Escape codes static final char E_ALNUM = 'w'; // Alphanumeric static final char E_NALNUM = 'W'; // Non-alphanumeric static final char E_BOUND = 'b'; // Word boundary static final char E_NBOUND = 'B'; // Non-word boundary static final char E_SPACE = 's'; // Whitespace static final char E_NSPACE = 'S'; // Non-whitespace static final char E_DIGIT = 'd'; // Digit static final char E_NDIGIT = 'D'; // Non-digit // Posix character classes static final char POSIX_CLASS_ALNUM = 'w'; // Alphanumerics static final char POSIX_CLASS_ALPHA = 'a'; // Alphabetics static final char POSIX_CLASS_BLANK = 'b'; // Blanks static final char POSIX_CLASS_CNTRL = 'c'; // Control characters static final char POSIX_CLASS_DIGIT = 'd'; // Digits static final char POSIX_CLASS_GRAPH = 'g'; // Graphic characters static final char POSIX_CLASS_LOWER = 'l'; // Lowercase characters static final char POSIX_CLASS_PRINT = 'p'; // Printable characters static final char POSIX_CLASS_PUNCT = '!'; // Punctuation static final char POSIX_CLASS_SPACE = 's'; // Spaces static final char POSIX_CLASS_UPPER = 'u'; // Uppercase characters static final char POSIX_CLASS_XDIGIT = 'x'; // Hexadecimal digits static final char POSIX_CLASS_JSTART = 'j'; // Java identifier start static final char POSIX_CLASS_JPART = 'k'; // Java identifier part // Limits static final int maxNode = 65536; // Maximum number of nodes in a program static final int maxParen = 16; // Number of paren pairs (only 9 can be backrefs) // Node layout constants static final int offsetOpcode = 0; // Opcode offset (first character) static final int offsetOpdata = 1; // Opdata offset (second char) static final int offsetNext = 2; // Next index offset (third char) static final int nodeSize = 3; // Node size (in chars) /** Line Separator */ static final String NEWLINE = System.getProperty("line.separator"); // State of current program REProgram program; // Compiled regular expression 'program' CharacterIterator search; // The string being matched against int idx; // Current index in string being searched int matchFlags; // Match behaviour flags // Parenthesized subexpressions int parenCount; // Number of subexpressions matched (num open parens + 1) int start0; // Cache of start[0] int end0; // Cache of start[0] int start1; // Cache of start[1] int end1; // Cache of start[1] int start2; // Cache of start[2] int end2; // Cache of start[2] int[] startn; // Lazy-alloced array of sub-expression starts int[] endn; // Lazy-alloced array of sub-expression ends // Backreferences int[] startBackref; // Lazy-alloced array of backref starts int[] endBackref; // Lazy-alloced array of backref ends /** * Constructs a regular expression matcher from a String by compiling it * using a new instance of RECompiler. If you will be compiling many * expressions, you may prefer to use a single RECompiler object instead. * @param pattern The regular expression pattern to compile. * @exception RESyntaxException Thrown if the regular expression has invalid syntax. * @see RECompiler * @see recompile */ public RE(String pattern) throws RESyntaxException { this(pattern, MATCH_NORMAL); } /** * Constructs a regular expression matcher from a String by compiling it * using a new instance of RECompiler. If you will be compiling many * expressions, you may prefer to use a single RECompiler object instead. * @param pattern The regular expression pattern to compile. * @param matchFlags The matching style * @exception RESyntaxException Thrown if the regular expression has invalid syntax. * @see RECompiler * @see recompile */ public RE(String pattern, int matchFlags) throws RESyntaxException { this(new RECompiler().compile(pattern)); setMatchFlags(matchFlags); } /** * Construct a matcher for a pre-compiled regular expression from program * (bytecode) data. Permits special flags to be passed in to modify matching * behaviour. * @param program Compiled regular expression program (see RECompiler and/or recompile) * @param matchFlags One or more of the RE match behaviour flags (RE.MATCH_*): * * <pre> * * MATCH_NORMAL // Normal (case-sensitive) matching * MATCH_CASEINDEPENDENT // Case folded comparisons * MATCH_MULTILINE // Newline matches as BOL/EOL * * </pre> * * @see RECompiler * @see REProgram * @see recompile */ public RE(REProgram program, int matchFlags) { setProgram(program); setMatchFlags(matchFlags); } /** * Construct a matcher for a pre-compiled regular expression from program * (bytecode) data. * @param program Compiled regular expression program * @see RECompiler * @see recompile */ public RE(REProgram program) { this(program, MATCH_NORMAL); } /** * Constructs a regular expression matcher with no initial program. * This is likely to be an uncommon practice, but is still supported. */ public RE() { this((REProgram)null, MATCH_NORMAL); } /** * Converts a 'simplified' regular expression to a full regular expression * @param pattern The pattern to convert * @return The full regular expression */ public static String simplePatternToFullRegularExpression(String pattern) { StringBuffer buf = new StringBuffer(); for (int i = 0; i < pattern.length(); i++) { char c = pattern.charAt(i); switch (c) { case '*': buf.append(".*"); break; case '.': case '[': case ']': case '\\': case '+': case '?': case '{': case '}': case '$': case '^': case '|': case '(': case ')': buf.append('\\'); default: buf.append(c); break; } } return buf.toString(); } /** * Sets match behaviour flags which alter the way RE does matching. * @param matchFlags One or more of the RE match behaviour flags (RE.MATCH_*): * * <pre> * * MATCH_NORMAL // Normal (case-sensitive) matching * MATCH_CASEINDEPENDENT // Case folded comparisons * MATCH_MULTILINE // Newline matches as BOL/EOL * * </pre> * */ public void setMatchFlags(int matchFlags) { this.matchFlags = matchFlags; } /** * Returns the current match behaviour flags. * @return Current match behaviour flags (RE.MATCH_*). * * <pre> * * MATCH_NORMAL // Normal (case-sensitive) matching * MATCH_CASEINDEPENDENT // Case folded comparisons * MATCH_MULTILINE // Newline matches as BOL/EOL * * </pre> * * @see #setMatchFlags * */ public int getMatchFlags() { return matchFlags; } /** * Sets the current regular expression program used by this matcher object. * @param program Regular expression program compiled by RECompiler. * @see RECompiler * @see REProgram * @see recompile */ public void setProgram(REProgram program) { this.program = program; } /** * Returns the current regular expression program in use by this matcher object. * @return Regular expression program * @see #setProgram */ public REProgram getProgram() { return program; } /** * Returns the number of parenthesized subexpressions available after a successful match. * @return Number of available parenthesized subexpressions */ public int getParenCount() { return parenCount; } /** * Gets the contents of a parenthesized subexpression after a successful match. * @param which Nesting level of subexpression * @return String */ public String getParen(int which) { int start; if (which < parenCount && (start = getParenStart(which)) >= 0) { return search.substring(start, getParenEnd(which)); } return null; } /** * Returns the start index of a given paren level. * @param which Nesting level of subexpression * @return String index */ public final int getParenStart(int which) { if (which < parenCount) { switch (which) { case 0: return start0; case 1: return start1; case 2: return start2; default: if (startn == null) { allocParens(); } return startn[which]; } } return -1; } /** * Returns the end index of a given paren level. * @param which Nesting level of subexpression * @return String index
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?