📄 nativeregexp.java

📁 java中比较著名的js引擎当属mozilla开源的rhino
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
        return '0' <= c && c <= '9';    }    private static boolean isWord(char c)    {        return Character.isLetter(c) || isDigit(c) || c == '_';    }    private static boolean isLineTerm(char c)    {        return ScriptRuntime.isJSLineTerminator(c);    }    private static boolean isREWhiteSpace(int c)    {        return (c == '\u0020' || c == '\u0009'                || c == '\n' || c == '\r'                || c == 0x2028 || c == 0x2029                || c == '\u000C' || c == '\u000B'                || c == '\u00A0'                || Character.getType((char)c) == Character.SPACE_SEPARATOR);    }    /*     *     * 1. If IgnoreCase is false, return ch.     * 2. Let u be ch converted to upper case as if by calling     *    String.prototype.toUpperCase on the one-character string ch.     * 3. If u does not consist of a single character, return ch.     * 4. Let cu be u's character.     * 5. If ch's code point value is greater than or equal to decimal 128 and cu's     *    code point value is less than decimal 128, then return ch.     * 6. Return cu.     */    private static char upcase(char ch)    {        if (ch < 128) {            if ('a' <= ch && ch <= 'z') {                return (char)(ch + ('A' - 'a'));            }            return ch;        }        char cu = Character.toUpperCase(ch);        if ((ch >= 128) && (cu < 128)) return ch;        return cu;    }    private static char downcase(char ch)    {        if (ch < 128) {            if ('A' <= ch && ch <= 'Z') {                return (char)(ch + ('a' - 'A'));            }            return ch;        }        char cl = Character.toLowerCase(ch);        if ((ch >= 128) && (cl < 128)) return ch;        return cl;    }/* * Validates and converts hex ascii value. */    private static int toASCIIHexDigit(int c)    {        if (c < '0')            return -1;        if (c <= '9') {            return c - '0';        }        c |= 0x20;        if ('a' <= c && c <= 'f') {            return c - 'a' + 10;        }        return -1;    }/* * Top-down regular expression grammar, based closely on Perl4. * *  regexp:     altern                  A regular expression is one or more *              altern '|' regexp       alternatives separated by vertical bar. */    private static boolean parseDisjunction(CompilerState state)    {        if (!parseAlternative(state))            return false;        char[] source = state.cpbegin;        int index = state.cp;        if (index != source.length && source[index] == '|') {            RENode altResult;            ++state.cp;            altResult = new RENode(REOP_ALT);            altResult.kid = state.result;            if (!parseDisjunction(state))                return false;            altResult.kid2 = state.result;            state.result = altResult;            /* ALT, <next>, ..., JUMP, <end> ... JUMP <end> */            state.progLength += 9;        }        return true;    }/* *  altern:     item                    An alternative is one or more items, *              item altern             concatenated together. */    private static boolean parseAlternative(CompilerState state)    {        RENode headTerm = null;        RENode tailTerm = null;        char[] source = state.cpbegin;        while (true) {            if (state.cp == state.cpend || source[state.cp] == '|'                || (state.parenNesting != 0 && source[state.cp] == ')'))            {                if (headTerm == null) {                    state.result = new RENode(REOP_EMPTY);                }                else                    state.result = headTerm;                return true;            }            if (!parseTerm(state))                return false;            if (headTerm == null)                headTerm = state.result;            else {                if (tailTerm == null) {                    headTerm.next = state.result;                    tailTerm = state.result;                    while (tailTerm.next != null) tailTerm = tailTerm.next;                }                else {                    tailTerm.next = state.result;                    tailTerm = tailTerm.next;                    while (tailTerm.next != null) tailTerm = tailTerm.next;                }            }        }    }    /* calculate the total size of the bitmap required for a class expression */    private static boolean    calculateBitmapSize(CompilerState state, RENode target, char[] src,                        int index, int end)    {        char rangeStart = 0;        char c;        int n;        int digit;        int nDigits;        int i;        int max = 0;        boolean inRange = false;        target.bmsize = 0;        if (index == end)            return true;        if (src[index] == '^')            ++index;        while (index != end) {            int localMax = 0;            nDigits = 2;            switch (src[index]) {            case '\\':                ++index;                c = src[index++];                switch (c) {                case 'b':                    localMax = 0x8;                    break;                case 'f':                    localMax = 0xC;                    break;                case 'n':                    localMax = 0xA;                    break;                case 'r':                    localMax = 0xD;                    break;                case 't':                    localMax = 0x9;                    break;                case 'v':                    localMax = 0xB;                    break;                case 'c':                    if (((index + 1) < end) && Character.isLetter(src[index + 1]))                        localMax = (char)(src[index++] & 0x1F);                    else                        localMax = '\\';                    break;                case 'u':                    nDigits += 2;                    // fall thru...                case 'x':                    n = 0;                    for (i = 0; (i < nDigits) && (index < end); i++) {                        c = src[index++];                        n = Kit.xDigitToInt(c, n);                        if (n < 0) {                            // Back off to accepting the original                            // '\' as a literal                            index -= (i + 1);                            n = '\\';                            break;                        }                    }                    localMax = n;                    break;                case 'd':                    if (inRange) {                        reportError("msg.bad.range", "");                        return false;                    }                    localMax = '9';                    break;                case 'D':                case 's':                case 'S':                case 'w':                case 'W':                    if (inRange) {                        reportError("msg.bad.range", "");                        return false;                    }                    target.bmsize = 65535;                    return true;                case '0':                case '1':                case '2':                case '3':                case '4':                case '5':                case '6':                case '7':                    /*                     *  This is a non-ECMA extension - decimal escapes (in this                     *  case, octal!) are supposed to be an error inside class                     *  ranges, but supported here for backwards compatibility.                     *                     */                    n = (c - '0');                    c = src[index];                    if ('0' <= c && c <= '7') {                        index++;                        n = 8 * n + (c - '0');                        c = src[index];                        if ('0' <= c && c <= '7') {                            index++;                            i = 8 * n + (c - '0');                            if (i <= 0377)                                n = i;                            else                                index--;                        }                    }                    localMax = n;                    break;                default:                    localMax = c;                    break;                }                break;            default:                localMax = src[index++];                break;            }            if (inRange) {                if (rangeStart > localMax) {                    reportError("msg.bad.range", "");                    return false;                }                inRange = false;            }            else {                if (index < (end - 1)) {                    if (src[index] == '-') {                        ++index;                        inRange = true;                        rangeStart = (char)localMax;                        continue;                    }                }            }            if ((state.flags & JSREG_FOLD) != 0){                char cu = upcase((char)localMax);                char cd = downcase((char)localMax);                localMax = (cu >= cd) ? cu : cd;            }            if (localMax > max)                max = localMax;        }        target.bmsize = max;        return true;    }    /*     *  item:       assertion               An item is either an assertion or     *              quantatom               a quantified atom.     *     *  assertion:  '^'                     Assertions match beginning of string     *                                      (or line if the class static property     *                                      RegExp.multiline is true).     *              '$'                     End of string (or line if the class     *                                      static property RegExp.multiline is     *                                      true).     *              '\b'                    Word boundary (between \w and \W).     *              '\B'                    Word non-boundary.     *     *  quantatom:  atom                    An unquantified atom.     *              quantatom '{' n ',' m '}'     *                                      Atom must occur between n and m times.     *              quantatom '{' n ',' '}' Atom must occur at least n times.     *              quantatom '{' n '}'     Atom must occur exactly n times.     *              quantatom '*'           Zero or more times (same as {0,}).     *              quantatom '+'           One or more times (same as {1,}).     *              quantatom '?'           Zero or one time (same as {0,1}).     *     *              any of which can be optionally followed by '?' for ungreedy     *     *  atom:       '(' regexp ')'          A parenthesized regexp (what matched     *                                      can be addressed using a backreference,     *                                      see '\' n below).     *              '.'                     Matches any char except '\n'.     *              '[' classlist ']'       A character class.     *              '[' '^' classlist ']'   A negated character class.     *              '\f'                    Form Feed.     *              '\n'                    Newline (Line Feed).     *              '\r'                    Carriage Return.     *              '\t'                    Horizontal Tab.     *              '\v'                    Vertical Tab.     *              '\d'                    A digit (same as [0-9]).     *              '\D'                    A non-digit.     *              '\w'                    A word character, [0-9a-z_A-Z].     *              '\W'                    A non-word character.     *              '\s'                    A whitespace character, [ \b\f\n\r\t\v].     *              '\S'                    A non-whitespace character.     *              '\' n                   A backreference to the nth (n decimal     *                                      and positive) parenthesized expression.     *              '\' octal               An octal escape sequence (octal must be     *                                      two or three digits long, unless it is     *                                      0 for the null character).     *              '\x' hex                A hex escape (hex must be two digits).     *              '\c' ctrl               A control character, ctrl is a letter.     *              '\' literalatomchar     Any character except one of the above     *                                      that follow '\' in an atom.     *              otheratomchar           Any character not first among the other     *                                      atom right-hand sides.
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -