re.java

来自「JAVA的一些源码 JAVA2 STANDARD EDITION DEVELO」· Java 代码 · 共 1,752 行 · 第 1/5 页

JAVA
1,752
字号
public class RE{    /**     * Specifies normal, case-sensitive matching behaviour.     */    public static final int MATCH_NORMAL          = 0x0000;    /**     * Flag to indicate that matching should be case-independent (folded)     */    public static final int MATCH_CASEINDEPENDENT = 0x0001;    /**     * Newlines should match as BOL/EOL (^ and $)     */    public static final int MATCH_MULTILINE       = 0x0002;    /**     * Consider all input a single body of text - newlines are matched by .     */    public static final int MATCH_SINGLELINE      = 0x0004;    /************************************************     *                                              *     * The format of a node in a program is:        *     *                                              *     * [ OPCODE ] [ OPDATA ] [ OPNEXT ] [ OPERAND ] *     *                                              *     * char OPCODE - instruction                    *     * char OPDATA - modifying data                 *     * char OPNEXT - next node (relative offset)    *     *                                              *     ************************************************/                 //   Opcode              Char       Opdata/Operand  Meaning                 //   ----------          ---------- --------------- --------------------------------------------------    static final char OP_END              = 'E';  //                 end of program    static final char OP_BOL              = '^';  //                 match only if at beginning of line    static final char OP_EOL              = '$';  //                 match only if at end of line    static final char OP_ANY              = '.';  //                 match any single character except newline    static final char OP_ANYOF            = '[';  // count/ranges    match any char in the list of ranges    static final char OP_BRANCH           = '|';  // node            match this alternative or the next one    static final char OP_ATOM             = 'A';  // length/string   length of string followed by string itself    static final char OP_STAR             = '*';  // node            kleene closure    static final char OP_PLUS             = '+';  // node            positive closure    static final char OP_MAYBE            = '?';  // node            optional closure    static final char OP_ESCAPE           = '\\'; // escape          special escape code char class (escape is E_* code)    static final char OP_OPEN             = '(';  // number          nth opening paren    static final char OP_CLOSE            = ')';  // number          nth closing paren    static final char OP_BACKREF          = '#';  // number          reference nth already matched parenthesized string    static final char OP_GOTO             = 'G';  //                 nothing but a (back-)pointer    static final char OP_NOTHING          = 'N';  //                 match null string such as in '(a|)'    static final char OP_RELUCTANTSTAR    = '8';  // none/expr       reluctant '*' (mnemonic for char is unshifted '*')    static final char OP_RELUCTANTPLUS    = '=';  // none/expr       reluctant '+' (mnemonic for char is unshifted '+')    static final char OP_RELUCTANTMAYBE   = '/';  // none/expr       reluctant '?' (mnemonic for char is unshifted '?')    static final char OP_POSIXCLASS       = 'P';  // classid         one of the posix character classes    // Escape codes    static final char E_ALNUM             = 'w';  // Alphanumeric    static final char E_NALNUM            = 'W';  // Non-alphanumeric    static final char E_BOUND             = 'b';  // Word boundary    static final char E_NBOUND            = 'B';  // Non-word boundary    static final char E_SPACE             = 's';  // Whitespace    static final char E_NSPACE            = 'S';  // Non-whitespace    static final char E_DIGIT             = 'd';  // Digit    static final char E_NDIGIT            = 'D';  // Non-digit    // Posix character classes    static final char POSIX_CLASS_ALNUM   = 'w';  // Alphanumerics    static final char POSIX_CLASS_ALPHA   = 'a';  // Alphabetics     static final char POSIX_CLASS_BLANK   = 'b';  // Blanks    static final char POSIX_CLASS_CNTRL   = 'c';  // Control characters    static final char POSIX_CLASS_DIGIT   = 'd';  // Digits    static final char POSIX_CLASS_GRAPH   = 'g';  // Graphic characters    static final char POSIX_CLASS_LOWER   = 'l';  // Lowercase characters    static final char POSIX_CLASS_PRINT   = 'p';  // Printable characters    static final char POSIX_CLASS_PUNCT   = '!';  // Punctuation    static final char POSIX_CLASS_SPACE   = 's';  // Spaces    static final char POSIX_CLASS_UPPER   = 'u';  // Uppercase characters    static final char POSIX_CLASS_XDIGIT  = 'x';  // Hexadecimal digits    static final char POSIX_CLASS_JSTART  = 'j';  // Java identifier start    static final char POSIX_CLASS_JPART   = 'k';  // Java identifier part    // Limits    static final int maxNode  = 65536;            // Maximum number of nodes in a program    static final int maxParen = 16;               // Number of paren pairs (only 9 can be backrefs)    // Node layout constants    static final int offsetOpcode = 0;            // Opcode offset (first character)    static final int offsetOpdata = 1;            // Opdata offset (second char)    static final int offsetNext   = 2;            // Next index offset (third char)    static final int nodeSize     = 3;            // Node size (in chars)    /** Line Separator */    static final String NEWLINE = System.getProperty("line.separator");    // State of current program    REProgram program;                            // Compiled regular expression 'program'    CharacterIterator search;                                // The string being matched against    int idx;                                      // Current index in string being searched    int matchFlags;                               // Match behaviour flags    // Parenthesized subexpressions    int parenCount;                               // Number of subexpressions matched (num open parens + 1)    int start0;                                   // Cache of start[0]    int end0;                                     // Cache of start[0]    int start1;                                   // Cache of start[1]    int end1;                                     // Cache of start[1]    int start2;                                   // Cache of start[2]    int end2;                                     // Cache of start[2]    int[] startn;                                 // Lazy-alloced array of sub-expression starts    int[] endn;                                   // Lazy-alloced array of sub-expression ends    // Backreferences    int[] startBackref;                           // Lazy-alloced array of backref starts    int[] endBackref;                             // Lazy-alloced array of backref ends    /**     * Constructs a regular expression matcher from a String by compiling it     * using a new instance of RECompiler.  If you will be compiling many     * expressions, you may prefer to use a single RECompiler object instead.     * @param pattern The regular expression pattern to compile.     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.     * @see RECompiler     * @see recompile     */    public RE(String pattern) throws RESyntaxException    {        this(pattern, MATCH_NORMAL);    }    /**     * Constructs a regular expression matcher from a String by compiling it     * using a new instance of RECompiler.  If you will be compiling many     * expressions, you may prefer to use a single RECompiler object instead.     * @param pattern The regular expression pattern to compile.     * @param matchFlags The matching style     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.     * @see RECompiler     * @see recompile     */    public RE(String pattern, int matchFlags) throws RESyntaxException    {        this(new RECompiler().compile(pattern));        setMatchFlags(matchFlags);    }    /**     * Construct a matcher for a pre-compiled regular expression from program     * (bytecode) data.  Permits special flags to be passed in to modify matching     * behaviour.     * @param program Compiled regular expression program (see RECompiler and/or recompile)     * @param matchFlags One or more of the RE match behaviour flags (RE.MATCH_*):     *     * <pre>     *     *   MATCH_NORMAL              // Normal (case-sensitive) matching     *   MATCH_CASEINDEPENDENT     // Case folded comparisons     *   MATCH_MULTILINE           // Newline matches as BOL/EOL     *     * </pre>     *     * @see RECompiler     * @see REProgram     * @see recompile     */    public RE(REProgram program, int matchFlags)    {        setProgram(program);        setMatchFlags(matchFlags);    }    /**     * Construct a matcher for a pre-compiled regular expression from program     * (bytecode) data.     * @param program Compiled regular expression program     * @see RECompiler     * @see recompile     */    public RE(REProgram program)    {        this(program, MATCH_NORMAL);    }    /**     * Constructs a regular expression matcher with no initial program.     * This is likely to be an uncommon practice, but is still supported.     */    public RE()    {        this((REProgram)null, MATCH_NORMAL);    }    /**     * Converts a 'simplified' regular expression to a full regular expression     * @param pattern The pattern to convert     * @return The full regular expression     */    public static String simplePatternToFullRegularExpression(String pattern)    {        StringBuffer buf = new StringBuffer();        for (int i = 0; i < pattern.length(); i++)        {            char c = pattern.charAt(i);            switch (c)            {                case '*':                    buf.append(".*");                    break;                case '.':                case '[':                case ']':                case '\\':                case '+':                case '?':                case '{':                case '}':                case '$':                case '^':                case '|':                case '(':                case ')':                    buf.append('\\');                default:                    buf.append(c);                    break;            }        }        return buf.toString();    }    /**     * Sets match behaviour flags which alter the way RE does matching.     * @param matchFlags One or more of the RE match behaviour flags (RE.MATCH_*):     *     * <pre>     *     *   MATCH_NORMAL              // Normal (case-sensitive) matching     *   MATCH_CASEINDEPENDENT     // Case folded comparisons     *   MATCH_MULTILINE           // Newline matches as BOL/EOL     *     * </pre>     *     */    public void setMatchFlags(int matchFlags)    {        this.matchFlags = matchFlags;    }    /**     * Returns the current match behaviour flags.     * @return Current match behaviour flags (RE.MATCH_*).     *     * <pre>     *     *   MATCH_NORMAL              // Normal (case-sensitive) matching     *   MATCH_CASEINDEPENDENT     // Case folded comparisons     *   MATCH_MULTILINE           // Newline matches as BOL/EOL     *     * </pre>     *     * @see #setMatchFlags     *     */    public int getMatchFlags()    {        return matchFlags;    }    /**     * Sets the current regular expression program used by this matcher object.     * @param program Regular expression program compiled by RECompiler.     * @see RECompiler     * @see REProgram     * @see recompile     */    public void setProgram(REProgram program)    {        this.program = program;    }    /**     * Returns the current regular expression program in use by this matcher object.     * @return Regular expression program     * @see #setProgram     */    public REProgram getProgram()    {        return program;    }    /**     * Returns the number of parenthesized subexpressions available after a successful match.     * @return Number of available parenthesized subexpressions     */    public int getParenCount()    {        return parenCount;    }    /**     * Gets the contents of a parenthesized subexpression after a successful match.     * @param which Nesting level of subexpression     * @return String     */    public String getParen(int which)    {        int start;        if (which < parenCount && (start = getParenStart(which)) >= 0)        {            return search.substring(start, getParenEnd(which));        }        return null;    }    /**     * Returns the start index of a given paren level.     * @param which Nesting level of subexpression     * @return String index      */    public final int getParenStart(int which)    {        if (which < parenCount)        {            switch (which)            {                case 0:                    return start0;                                    case 1:                    return start1;                                    case 2:                    return start2;                                    default:                    if (startn == null)                    {                        allocParens();                    }                    return startn[which];            }        }        return -1;    }    /**     * Returns the end index of a given paren level.     * @param which Nesting level of subexpression     * @return String index 

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?