📄 regexparser.java
字号:
case 'g': return this.processBacksolidus_g(); case 'X': return this.processBacksolidus_X(); case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return this.processBackreference(); case 'P': case 'p': int pstart = this.offset; tok = processBacksolidus_pP(this.chardata); if (tok == null) throw this.ex("parser.atom.5", pstart); break; default: tok = Token.createChar(this.chardata); } this.next(); break; case T_CHAR: if (this.chardata == ']' || this.chardata == '{' || this.chardata == '}') throw this.ex("parser.atom.4", this.offset-1); tok = Token.createChar(this.chardata); int high = this.chardata; this.next(); if (REUtil.isHighSurrogate(high) && this.read() == T_CHAR && REUtil.isLowSurrogate(this.chardata)) { char[] sur = new char[2]; sur[0] = (char)high; sur[1] = (char)this.chardata; tok = Token.createParen(Token.createString(new String(sur)), 0); this.next(); } break; default: throw this.ex("parser.atom.4", this.offset-1); } return tok; } protected RangeToken processBacksolidus_pP(int c) throws ParseException { this.next(); if (this.read() != T_CHAR || this.chardata != '{') throw this.ex("parser.atom.2", this.offset-1); // handle category escape boolean positive = c == 'p'; int namestart = this.offset; int nameend = this.regex.indexOf('}', namestart); if (nameend < 0) throw this.ex("parser.atom.3", this.offset); String pname = this.regex.substring(namestart, nameend); this.offset = nameend+1; return Token.getRange(pname, positive, this.isSet(RegularExpression.XMLSCHEMA_MODE)); } int processCIinCharacterClass(RangeToken tok, int c) { return this.decodeEscaped(); } /** * char-class ::= '[' ( '^'? range ','?)+ ']' * range ::= '\d' | '\w' | '\s' | category-block | range-char * | range-char '-' range-char * range-char ::= '\[' | '\]' | '\\' | '\' [,-efnrtv] | bmp-code | character-2 * bmp-code ::= '\' 'u' [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] */ protected RangeToken parseCharacterClass(boolean useNrange) throws ParseException { this.setContext(S_INBRACKETS); this.next(); // '[' boolean nrange = false; RangeToken base = null; RangeToken tok; if (this.read() == T_CHAR && this.chardata == '^') { nrange = true; this.next(); // '^' if (useNrange) { tok = Token.createNRange(); } else { base = Token.createRange(); base.addRange(0, Token.UTF16_MAX); tok = Token.createRange(); } } else { tok = Token.createRange(); } int type; boolean firstloop = true; while ((type = this.read()) != T_EOF) { if (type == T_CHAR && this.chardata == ']' && !firstloop) break; firstloop = false; int c = this.chardata; boolean end = false; if (type == T_BACKSOLIDUS) { switch (c) { case 'd': case 'D': case 'w': case 'W': case 's': case 'S': tok.mergeRanges(this.getTokenForShorthand(c)); end = true; break; case 'i': case 'I': case 'c': case 'C': c = this.processCIinCharacterClass(tok, c); if (c < 0) end = true; break; case 'p': case 'P': int pstart = this.offset; RangeToken tok2 = this.processBacksolidus_pP(c); if (tok2 == null) throw this.ex("parser.atom.5", pstart); tok.mergeRanges(tok2); end = true; break; default: c = this.decodeEscaped(); } // \ + c } // backsolidus // POSIX Character class such as [:alnum:] else if (type == T_POSIX_CHARCLASS_START) { int nameend = this.regex.indexOf(':', this.offset); if (nameend < 0) throw this.ex("parser.cc.1", this.offset); boolean positive = true; if (this.regex.charAt(this.offset) == '^') { this.offset ++; positive = false; } String name = this.regex.substring(this.offset, nameend); RangeToken range = Token.getRange(name, positive, this.isSet(RegularExpression.XMLSCHEMA_MODE)); if (range == null) throw this.ex("parser.cc.3", this.offset); tok.mergeRanges(range); end = true; if (nameend+1 >= this.regexlen || this.regex.charAt(nameend+1) != ']') throw this.ex("parser.cc.1", nameend); this.offset = nameend+2; } this.next(); if (!end) { // if not shorthands... if (this.read() != T_CHAR || this.chardata != '-') { // Here is no '-'. tok.addRange(c, c); } else { this.next(); // Skips '-' if ((type = this.read()) == T_EOF) throw this.ex("parser.cc.2", this.offset); if (type == T_CHAR && this.chardata == ']') { tok.addRange(c, c); tok.addRange('-', '-'); } else { int rangeend = this.chardata; if (type == T_BACKSOLIDUS) rangeend = this.decodeEscaped(); this.next(); tok.addRange(c, rangeend); } } } if (this.isSet(RegularExpression.SPECIAL_COMMA) && this.read() == T_CHAR && this.chardata == ',') this.next(); } if (this.read() == T_EOF) throw this.ex("parser.cc.2", this.offset); if (!useNrange && nrange) { base.subtractRanges(tok); tok = base; } tok.sortRanges(); tok.compactRanges(); //tok.dumpRanges(); /* if (this.isSet(RegularExpression.IGNORE_CASE)) tok = RangeToken.createCaseInsensitiveToken(tok); */ this.setContext(S_NORMAL); this.next(); // Skips ']' return tok; } /** * '(?[' ... ']' (('-' | '+' | '&') '[' ... ']')? ')' */ protected RangeToken parseSetOperations() throws ParseException { RangeToken tok = this.parseCharacterClass(false); int type; while ((type = this.read()) != T_RPAREN) { int ch = this.chardata; if (type == T_CHAR && (ch == '-' || ch == '&') || type == T_PLUS) { this.next(); if (this.read() != T_LBRACKET) throw ex("parser.ope.1", this.offset-1); RangeToken t2 = this.parseCharacterClass(false); if (type == T_PLUS) tok.mergeRanges(t2); else if (ch == '-') tok.subtractRanges(t2); else if (ch == '&') tok.intersectRanges(t2); else throw new RuntimeException("ASSERT"); } else { throw ex("parser.ope.2", this.offset-1); } } this.next(); return tok; } Token getTokenForShorthand(int ch) { Token tok; switch (ch) { case 'd': tok = this.isSet(RegularExpression.USE_UNICODE_CATEGORY) ? Token.getRange("Nd", true) : Token.token_0to9; break; case 'D': tok = this.isSet(RegularExpression.USE_UNICODE_CATEGORY) ? Token.getRange("Nd", false) : Token.token_not_0to9; break; case 'w': tok = this.isSet(RegularExpression.USE_UNICODE_CATEGORY) ? Token.getRange("IsWord", true) : Token.token_wordchars; break; case 'W': tok = this.isSet(RegularExpression.USE_UNICODE_CATEGORY) ? Token.getRange("IsWord", false) : Token.token_not_wordchars; break; case 's': tok = this.isSet(RegularExpression.USE_UNICODE_CATEGORY) ? Token.getRange("IsSpace", true) : Token.token_spaces; break; case 'S': tok = this.isSet(RegularExpression.USE_UNICODE_CATEGORY) ? Token.getRange("IsSpace", false) : Token.token_not_spaces; break; default: throw new RuntimeException("Internal Error: shorthands: \\u"+Integer.toString(ch, 16)); } return tok; } /** */ int decodeEscaped() throws ParseException { if (this.read() != T_BACKSOLIDUS) throw ex("parser.next.1", this.offset-1); int c = this.chardata; switch (c) { case 'e': c = 0x1b; break; // ESCAPE U+001B case 'f': c = '\f'; break; // FORM FEED U+000C case 'n': c = '\n'; break; // LINE FEED U+000A case 'r': c = '\r'; break; // CRRIAGE RETURN U+000D case 't': c = '\t'; break; // HORIZONTAL TABULATION U+0009 //case 'v': c = 0x0b; break; // VERTICAL TABULATION U+000B case 'x': this.next(); if (this.read() != T_CHAR) throw ex("parser.descape.1", this.offset-1); if (this.chardata == '{') { int v1 = 0; int uv = 0; do { this.next(); if (this.read() != T_CHAR) throw ex("parser.descape.1", this.offset-1); if ((v1 = hexChar(this.chardata)) < 0) break; if (uv > uv*16) throw ex("parser.descape.2", this.offset-1); uv = uv*16+v1; } while (true); if (this.chardata != '}') throw ex("parser.descape.3", this.offset-1); if (uv > Token.UTF16_MAX) throw ex("parser.descape.4", this.offset-1); c = uv; } else { int v1 = 0; if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) throw ex("parser.descape.1", this.offset-1); int uv = v1; this.next(); if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) throw ex("parser.descape.1", this.offset-1); uv = uv*16+v1; c = uv; } break; case 'u': int v1 = 0; this.next(); if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) throw ex("parser.descape.1", this.offset-1); int uv = v1; this.next(); if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) throw ex("parser.descape.1", this.offset-1); uv = uv*16+v1; this.next(); if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) throw ex("parser.descape.1", this.offset-1); uv = uv*16+v1; this.next(); if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) throw ex("parser.descape.1", this.offset-1); uv = uv*16+v1; c = uv; break; case 'v': this.next(); if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) throw ex("parser.descape.1", this.offset-1); uv = v1; this.next(); if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) throw ex("parser.descape.1", this.offset-1); uv = uv*16+v1; this.next(); if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) throw ex("parser.descape.1", this.offset-1); uv = uv*16+v1; this.next(); if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) throw ex("parser.descape.1", this.offset-1); uv = uv*16+v1; this.next(); if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) throw ex("parser.descape.1", this.offset-1); uv = uv*16+v1; this.next(); if (this.read() != T_CHAR || (v1 = hexChar(this.chardata)) < 0) throw ex("parser.descape.1", this.offset-1); uv = uv*16+v1; if (uv > Token.UTF16_MAX) throw ex("parser.descappe.4", this.offset-1); c = uv; break; case 'A': case 'Z': case 'z': throw ex("parser.descape.5", this.offset-2); default: } return c; } static private final int hexChar(int ch) { if (ch < '0') return -1; if (ch > 'f') return -1; if (ch <= '9') return ch-'0'; if (ch < 'A') return -1; if (ch <= 'F') return ch-'A'+10; if (ch < 'a') return -1; return ch-'a'+10; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -