📄 regexparser.java

📁 java1.6众多例子参考
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
              case 'g': return this.processBacksolidus_g();              case 'X': return this.processBacksolidus_X();              case '1':  case '2':  case '3':  case '4':              case '5':  case '6':  case '7':  case '8':  case '9':                return this.processBackreference();              case 'P':              case 'p':                int pstart = this.offset;                tok = processBacksolidus_pP(this.chardata);                if (tok == null)  throw this.ex("parser.atom.5", pstart);                break;              default:                tok = Token.createChar(this.chardata);            }            this.next();            break;          case T_CHAR:            if (this.chardata == ']' || this.chardata == '{' || this.chardata == '}')                throw this.ex("parser.atom.4", this.offset-1);            tok = Token.createChar(this.chardata);            int high = this.chardata;            this.next();            if (REUtil.isHighSurrogate(high)                && this.read() == T_CHAR && REUtil.isLowSurrogate(this.chardata)) {                char[] sur = new char[2];                sur[0] = (char)high;                sur[1] = (char)this.chardata;                tok = Token.createParen(Token.createString(new String(sur)), 0);                this.next();            }            break;          default:            throw this.ex("parser.atom.4", this.offset-1);        }        return tok;    }    protected RangeToken processBacksolidus_pP(int c) throws ParseException {        this.next();        if (this.read() != T_CHAR || this.chardata != '{')            throw this.ex("parser.atom.2", this.offset-1);        // handle category escape        boolean positive = c == 'p';        int namestart = this.offset;        int nameend = this.regex.indexOf('}', namestart);        if (nameend < 0)            throw this.ex("parser.atom.3", this.offset);        String pname = this.regex.substring(namestart, nameend);        this.offset = nameend+1;        return Token.getRange(pname, positive, this.isSet(RegularExpression.XMLSCHEMA_MODE));    }    int processCIinCharacterClass(RangeToken tok, int c) {        return this.decodeEscaped();    }    /**     * char-class ::= '[' ( '^'? range ','?)+ ']'     * range ::= '\d' | '\w' | '\s' | category-block | range-char     *           | range-char '-' range-char     * range-char ::= '\[' | '\]' | '\\' | '\' [,-efnrtv] | bmp-code | character-2     * bmp-code ::= '\' 'u' [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]     */    protected RangeToken parseCharacterClass(boolean useNrange) throws ParseException {        this.setContext(S_INBRACKETS);        this.next();                            // '['        boolean nrange = false;        RangeToken base = null;        RangeToken tok;        if (this.read() == T_CHAR && this.chardata == '^') {            nrange = true;            this.next();                        // '^'            if (useNrange) {                tok = Token.createNRange();            } else {                base = Token.createRange();                base.addRange(0, Token.UTF16_MAX);                tok = Token.createRange();            }        } else {            tok = Token.createRange();        }        int type;        boolean firstloop = true;        while ((type = this.read()) != T_EOF) {            if (type == T_CHAR && this.chardata == ']' && !firstloop)                break;            firstloop = false;            int c = this.chardata;            boolean end = false;            if (type == T_BACKSOLIDUS) {                switch (c) {                  case 'd':  case 'D':                  case 'w':  case 'W':                  case 's':  case 'S':                    tok.mergeRanges(this.getTokenForShorthand(c));                    end = true;                    break;                  case 'i':  case 'I':                  case 'c':  case 'C':                    c = this.processCIinCharacterClass(tok, c);                    if (c < 0)  end = true;                    break;                                      case 'p':                  case 'P':                    int pstart = this.offset;                    RangeToken tok2 = this.processBacksolidus_pP(c);                    if (tok2 == null)  throw this.ex("parser.atom.5", pstart);                    tok.mergeRanges(tok2);                    end = true;                    break;                  default:                    c = this.decodeEscaped();                } // \ + c            } // backsolidus                                                // POSIX Character class such as [:alnum:]            else if (type == T_POSIX_CHARCLASS_START) {                int nameend = this.regex.indexOf(':', this.offset);                if (nameend < 0) throw this.ex("parser.cc.1", this.offset);                boolean positive = true;                if (this.regex.charAt(this.offset) == '^') {                    this.offset ++;                    positive = false;                }                String name = this.regex.substring(this.offset, nameend);                RangeToken range = Token.getRange(name, positive,                                                  this.isSet(RegularExpression.XMLSCHEMA_MODE));                if (range == null)  throw this.ex("parser.cc.3", this.offset);                tok.mergeRanges(range);                end = true;                if (nameend+1 >= this.regexlen || this.regex.charAt(nameend+1) != ']')                    throw this.ex("parser.cc.1", nameend);                this.offset = nameend+2;            }            this.next();            if (!end) {                         // if not shorthands...                if (this.read() != T_CHAR || this.chardata != '-') { // Here is no '-'.                    tok.addRange(c, c);                } else {                    this.next(); // Skips '-'                    if ((type = this.read()) == T_EOF)  throw this.ex("parser.cc.2", this.offset);                    if (type == T_CHAR && this.chardata == ']') {                        tok.addRange(c, c);                        tok.addRange('-', '-');                    } else {                        int rangeend = this.chardata;                        if (type == T_BACKSOLIDUS)                            rangeend = this.decodeEscaped();                        this.next();                        tok.addRange(c, rangeend);                    }                }            }            if (this.isSet(RegularExpression.SPECIAL_COMMA)                && this.read() == T_CHAR && this.chardata == ',')                this.next();        }        if (this.read() == T_EOF)            throw this.ex("parser.cc.2", this.offset);        if (!useNrange && nrange) {            base.subtractRanges(tok);            tok = base;        }        tok.sortRanges();        tok.compactRanges();        //tok.dumpRanges();        /*        if (this.isSet(RegularExpression.IGNORE_CASE))            tok = RangeToken.createCaseInsensitiveToken(tok);        */        this.setContext(S_NORMAL);        this.next();                    // Skips ']'        return tok;    }    /**     * '(?[' ... ']' (('-' | '+' | '&') '[' ... ']')? ')'     */    protected RangeToken parseSetOperations() throws ParseException {        RangeToken tok = this.parseCharacterClass(false);        int type;        while ((type = this.read()) != T_RPAREN) {            int ch = this.chardata;            if (type == T_CHAR && (ch == '-' || ch == '&')                || type == T_PLUS) {                this.next();                if (this.read() != T_LBRACKET) throw ex("parser.ope.1", this.offset-1);                RangeToken t2 = this.parseCharacterClass(false);                if (type == T_PLUS)                    tok.mergeRanges(t2);                else if (ch == '-')                    tok.subtractRanges(t2);                else if (ch == '&')                    tok.intersectRanges(t2);                else                    throw new RuntimeException("ASSERT");            } else {                throw ex("parser.ope.2", this.offset-1);            }        }        this.next();        return tok;    }    Token getTokenForShorthand(int ch) {        Token tok;        switch (ch) {          case 'd':            tok = this.isSet(RegularExpression.USE_UNICODE_CATEGORY)                ? Token.getRange("Nd", true) : Token.token_0to9;            break;          case 'D':            tok = this.isSet(RegularExpression.USE_UNICODE_CATEGORY)                ? Token.getRange("Nd", false) : Token.token_not_0to9;            break;          case 'w':            tok = this.isSet(RegularExpression.USE_UNICODE_CATEGORY)                ? Token.getRange("IsWord", true) : Token.token_wordchars;            break;          case 'W':            tok = this.isSet(RegularExpression.USE_UNICODE_CATEGORY)                ? Token.getRange("IsWord", false) : Token.token_not_wordchars;            break;          case 's':            tok = this.isSet(RegularExpression.USE_UNICODE_CATEGORY)                ? Token.getRange("IsSpace", true) : Token.token_spaces;            break;          case 'S':            tok = this.isSet(RegularExpression.USE_UNICODE_CATEGORY)                ? Token.getRange("IsSpace", false) : Token.token_not_spaces;            break;          default:            throw new RuntimeException("Internal Error: shorthands: \\u"+Integer.toString(ch, 16));        }        return tok;    }    /**     */    int decodeEscaped() throws ParseException {        if (this.read() != T_BACKSOLIDUS)  throw ex("parser.next.1", this.offset-1);        int c = this.chardata;        switch (c) {          case 'e':  c = 0x1b;  break; // ESCAPE U+001B          case 'f':  c = '\f';  break; // FORM FEED U+000C          case 'n':  c = '\n';  break; // LINE FEED U+000A          case 'r':  c = '\r';  break; // CRRIAGE RETURN U+000D          case 't':  c = '\t';  break; // HORIZONTAL TABULATION U+0009          //case 'v':  c = 0x0b;  break; // VERTICAL TABULATION U+000B          case 'x':            this.next();            if (this.read() != T_CHAR)  throw ex("parser.descape.1", this.offset-1);            if (this.chardata == '{') {                int v1 = 0;                int uv = 0;                do {                    this.next();                    if (this.read() != T_CHAR)  throw ex("parser.descape.1", this.offset-1);                    if ((v1 = hexChar(this.chardata)) < 0)                        break;                    if (uv > uv*16) throw ex("parser.descape.2", this.offset-1);                    uv = uv*16+v1;                } while (true);                if (this.chardata != '}')  throw ex("parser.descape.3", this.offset-1);                if (uv > Token.UTF16_MAX)  throw ex("parser.descape.4", this.offset-1);                c = uv;            } else {                int v1 = 0;                if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)                    throw ex("parser.descape.1", this.offset-1);                int uv = v1;                this.next();                if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)                    throw ex("parser.descape.1", this.offset-1);                uv = uv*16+v1;                c = uv;            }            break;          case 'u':            int v1 = 0;            this.next();            if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)                throw ex("parser.descape.1", this.offset-1);            int uv = v1;            this.next();            if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)                throw ex("parser.descape.1", this.offset-1);            uv = uv*16+v1;            this.next();            if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)                throw ex("parser.descape.1", this.offset-1);            uv = uv*16+v1;            this.next();            if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)                throw ex("parser.descape.1", this.offset-1);            uv = uv*16+v1;            c = uv;            break;          case 'v':            this.next();            if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)                throw ex("parser.descape.1", this.offset-1);            uv = v1;            this.next();            if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)                throw ex("parser.descape.1", this.offset-1);            uv = uv*16+v1;            this.next();            if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)                throw ex("parser.descape.1", this.offset-1);            uv = uv*16+v1;            this.next();            if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)                throw ex("parser.descape.1", this.offset-1);            uv = uv*16+v1;            this.next();            if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)                throw ex("parser.descape.1", this.offset-1);            uv = uv*16+v1;            this.next();            if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0)                throw ex("parser.descape.1", this.offset-1);            uv = uv*16+v1;            if (uv > Token.UTF16_MAX)  throw ex("parser.descappe.4", this.offset-1);            c = uv;            break;          case 'A':          case 'Z':          case 'z':            throw ex("parser.descape.5", this.offset-2);          default:        }        return c;    }    static private final int hexChar(int ch) {        if (ch < '0')  return -1;        if (ch > 'f')  return -1;        if (ch <= '9')  return ch-'0';        if (ch < 'A')  return -1;        if (ch <= 'F')  return ch-'A'+10;        if (ch < 'a')  return -1;        return ch-'a'+10;    }}
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -