📄 regcomp.java
字号:
if ('0' <= ch && ch <= '9') hex = hex * 16 + ch - '0'; else if ('a' <= ch && ch <= 'f') hex = hex * 16 + ch - 'a' + 10; else if ('A' <= ch && ch <= 'F') hex = hex * 16 + ch - 'A' + 10; else throw new IllegalRegexpException("expected hex digit at " + badChar(ch)); } return hex; } private RegexpNode parseBackReference(int ch, PeekStream pattern) throws IllegalRegexpException { int value = ch - '0'; int ch2 = pattern.peek(); if ('0' <= ch2 && ch2 <= '9') { pattern.read(); value = value * 10 + ch2 - '0'; } int ch3 = pattern.peek(); if (value < 10 || value <= _nGroup && ! ('0' <= ch3 && ch3 <= '7')) { return new RegexpNode.GroupRef(value); } else if (! ('0' <= ch2 && ch2 <= '7') && ! ('0' <= ch3 && ch3 <= '7')) throw new IllegalRegexpException("back referencing to a non-existent group: " + value); if (value > 10) pattern.ungetc(ch2); if (ch == '8' || ch == '9' || '0' <= ch3 && ch3 <= '9' && value * 10 + ch3 - '0' > 0xFF) { //out of byte range or not an octal, //need to parse backslash as the NULL character pattern.ungetc(ch); return parseString('\u0000', pattern); } int oct = parseOctal(ch, pattern); return parseString(oct, pattern, true); } private RegexpNode parseString(int ch, PeekStream pattern) throws IllegalRegexpException { return parseString(ch, pattern, false); } /** * parseString */ private RegexpNode parseString(int ch, PeekStream pattern, boolean isEscaped) throws IllegalRegexpException { CharBuffer cb = new CharBuffer(); cb.append((char) ch); for (ch = pattern.read(); ch >= 0; ch = pattern.read()) { switch (ch) { case ' ': case '\t': case '\n': case '\r': if (! isIgnoreWs() || isEscaped) cb.append((char) ch); break; case '#': if (! isIgnoreWs() || isEscaped) cb.append((char) ch); else { while ((ch = pattern.read()) != '\n' && ch >= 0) { } } break; case '(': case ')': case '[': case '+': case '?': case '*': case '.': case '$': case '^': case '|': pattern.ungetc(ch); return createString(cb); case '{': if ('0' <= pattern.peek() && pattern.peek() <= '9') { pattern.ungetc(ch); return createString(cb); } cb.append('{'); break; case '\\': ch = pattern.read(); switch (ch) { case -1: cb.append('\\'); return createString(cb); case 's': case 'S': case 'd': case 'D': case 'w': case 'W': case 'b': case 'B': case 'A': case 'z': case 'Z': case 'G': case 'p': case 'P': pattern.ungetc(ch); pattern.ungetc('\\'); return createString(cb); case 'a': cb.append('\u0007'); break; case 'c': ch = pattern.read(); ch = Character.toUpperCase(ch); ch ^= 0x40; cb.append((char) ch); break; case 'e': cb.append('\u001b'); break; case 't': cb.append('\t'); break; case 'f': cb.append('\f'); break; case 'n': cb.append('\n'); break; case 'r': cb.append('\r'); break; case 'x': int hex = parseHex(pattern); cb.append((char) hex); break; case 'Q': while ((ch = pattern.read()) >= 0) { if (ch == '\\' && pattern.peek() == 'E') { pattern.read(); break; } cb.append((char) ch); } break; case '0': int oct = parseOctal(ch, pattern); cb.append((char) oct); break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (ch - '0' <= _nGroup) { pattern.ungetc(ch); pattern.ungetc('\\'); return createString(cb); } else { oct = parseOctal(ch, pattern); cb.append((char) oct); } break; case '#': cb.append('#'); break; default: if ((_flags & STRICT) != 0) throw error(L.l("unrecognized escape at " + badChar(ch))); cb.append((char) ch); break; } break; default: cb.append((char) ch); } } return createString(cb); } private RegexpNode createString(CharBuffer cb) { if (isIgnoreCase()) return new RegexpNode.StringIgnoreCase(cb); else return new RegexpNode.StringNode(cb); } private int parseOctal(int ch, PeekStream pattern) throws IllegalRegexpException { if ('0' > ch || ch > '7') throw new IllegalRegexpException("expected octal digit at " + badChar(ch)); int oct = ch - '0'; int ch2 = pattern.peek(); if ('0' <= ch2 && ch2 <= '7') { pattern.read(); oct = oct * 8 + ch2 - '0'; ch = pattern.peek(); if ('0' <= ch && ch <= '7') { pattern.read(); oct = oct * 8 + ch - '0'; } } return oct; } private RegexpNode parseUnicodeProperty(PeekStream pattern, boolean isNegated) throws IllegalRegexpException { int ch = pattern.read(); boolean isBraced = false; if (ch == '{') { isBraced = true; ch = pattern.read(); if (ch == '^') { isNegated = ! isNegated; ch = pattern.read(); } } RegexpNode node; if (isBraced) node = parseBracedUnicodeProperty(ch, pattern, isNegated); else node = parseUnbracedUnicodeProperty(ch, pattern, isNegated); return node; } private RegexpNode parseBracedUnicodeProperty(int ch, PeekStream pattern, boolean isNegated) throws IllegalRegexpException { byte category = 0; int ch2 = pattern.read(); switch (ch) { case 'C': switch (ch2) { case 'c': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Cc : RegexpNode.PROP_Cc; case 'f': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Cf : RegexpNode.PROP_Cf; case 'n': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Cn : RegexpNode.PROP_Cn; case 'o': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Co : RegexpNode.PROP_Co; case 's': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Cs : RegexpNode.PROP_Cs; case '}': return isNegated ? RegexpNode.PROP_NOT_C : RegexpNode.PROP_C; default: throw error(L.l("invalid Unicode category " + badChar(ch) + "" + badChar(ch2))); } case 'L': switch (ch2) { case 'l': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Ll : RegexpNode.PROP_Ll; case 'm': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Lm : RegexpNode.PROP_Lm; case 'o': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Lo : RegexpNode.PROP_Lo; case 't': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Lt : RegexpNode.PROP_Lt; case 'u': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Lu : RegexpNode.PROP_Lu; case '}': return isNegated ? RegexpNode.PROP_NOT_L : RegexpNode.PROP_L; default: throw error(L.l("invalid Unicode category " + badChar(ch) + "" + badChar(ch2))); } case 'M': switch (ch2) { case 'c': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Mc : RegexpNode.PROP_Mc; case 'e': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Me : RegexpNode.PROP_Me; case 'n': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Mn : RegexpNode.PROP_Mn; case '}': return isNegated ? RegexpNode.PROP_NOT_M : RegexpNode.PROP_M; default: throw error(L.l("invalid Unicode category " + badChar(ch) + "" + badChar(ch2))); } case 'N': switch (ch2) { case 'd': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Nd : RegexpNode.PROP_Nd; case 'l': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Nl : RegexpNode.PROP_Nl; case 'o': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_No : RegexpNode.PROP_No; case '}': return isNegated ? RegexpNode.PROP_NOT_N : RegexpNode.PROP_N; default: throw error(L.l("invalid Unicode category " + badChar(ch) + "" + badChar(ch2))); } case 'P': switch (ch2) { case 'c': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Pc : RegexpNode.PROP_Pc; case 'd': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Pd : RegexpNode.PROP_Pd; case 'e': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Pe : RegexpNode.PROP_Pe; case 'f': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Pf : RegexpNode.PROP_Pf; case 'i': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Pi : RegexpNode.PROP_Pi; case 'o': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Po : RegexpNode.PROP_Po; case 's': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Ps : RegexpNode.PROP_Ps; case '}': return isNegated ? RegexpNode.PROP_NOT_P : RegexpNode.PROP_P; default: throw error(L.l("invalid Unicode category " + badChar(ch) + "" + badChar(ch2))); } case 'S': switch (ch2) { case 'c': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Sc : RegexpNode.PROP_Sc; case 'k': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Sk : RegexpNode.PROP_Sk; case 'm': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Sm : RegexpNode.PROP_Sm; case 'o': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_So : RegexpNode.PROP_So; case '}': return isNegated ? RegexpNode.PROP_NOT_S : RegexpNode.PROP_S; default: throw error(L.l("invalid Unicode category " + badChar(ch) + "" + badChar(ch2))); } case 'Z': switch (ch2) { case 'l': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Zl : RegexpNode.PROP_Zl; case 'p': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Zp : RegexpNode.PROP_Zp; case 's': expect('}', pattern.read()); return isNegated ? RegexpNode.PROP_NOT_Zs : RegexpNode.PROP_Zs; case '}': return isNegated ? RegexpNode.PROP_NOT_Z : RegexpNode.PROP_Z; default: throw error(L.l("invalid Unicode category " + badChar(ch) + "" + badChar(ch2))); } } if ((ch = pattern.read()) != '}') throw error(L.l("expected '}' at " + badChar(ch))); throw new UnsupportedOperationException(); } private RegexpNode parseUnbracedUnicodeProperty(int ch, PeekStream pattern, boolean isNegated) throws IllegalRegexpException { switch (ch) { case 'C': return isNegated ? RegexpNode.PROP_NOT_C : RegexpNode.PROP_C; case 'L': return isNegated ? RegexpNode.PROP_NOT_L : RegexpNode.PROP_L; case 'M': return isNegated ? RegexpNode.PROP_NOT_M : RegexpNode.PROP_M; case 'N': return isNegated ? RegexpNode.PROP_NOT_N : RegexpNode.PROP_N; case 'P': return isNegated ? RegexpNode.PROP_NOT_P : RegexpNode.PROP_P; case 'S': return isNegated ? RegexpNode.PROP_NOT_S : RegexpNode.PROP_S; case 'Z': return isNegated ? RegexpNode.PROP_NOT_Z : RegexpNode.PROP_Z; default: throw new IllegalRegexpException("invalid Unicode property " + badChar(ch)); } } /* static { _characterClassMap.put("alnum", RegexpNode.RC_ALNUM); _characterClassMap.put("alpha", RegexpNode.RC_ALPHA); _characterClassMap.put("blank", RegexpNode.RC_BLANK); _characterClassMap.put("cntrl", RegexpNode.RC_CNTRL); _characterClassMap.put("digit", RegexpNode.RC_DIGIT); _characterClassMap.put("graph", RegexpNode.RC_GRAPH); _characterClassMap.put("lower", RegexpNode.RC_LOWER); _characterClassMap.put("print", RegexpNode.RC_PRINT); _characterClassMap.put("punct", RegexpNode.RC_PUNCT); _characterClassMap.put("space", RegexpNode.RC_SPACE); _characterClassMap.put("upper", RegexpNode.RC_UPPER); _characterClassMap.put("xdigit", RegexpNode.RC_XDIGIT); } */}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -