📄 regcomp.java
字号:
groupTail = groupHead.getTail(); } else throw error(L.l("conditional requires number")); RegexpNode oldTail = _groupTail; _groupTail = groupTail; RegexpNode first = parseRec(pattern, null); RegexpNode second = null; if ((ch = pattern.read()) == '|') { second = parseRec(pattern, null); ch = pattern.read(); } if (ch != ')') throw error(L.l("expected ')' at '{0}'", String.valueOf((char) ch))); _groupTail = oldTail; groupHead.setFirst(first); groupHead.setSecond(second); return concat(tail, parseRec(pattern, groupHead)); } private RegexpNode parseGroup(PeekStream pattern, RegexpNode tail, int group, int oldFlags) throws IllegalRegexpException { RegexpNode.GroupHead groupHead = new RegexpNode.GroupHead(group); RegexpNode groupTail = groupHead.getTail(); RegexpNode oldTail = _groupTail; _groupTail = groupTail; RegexpNode body = parseRec(pattern, null); int ch; while ((ch = pattern.read()) == '|') { RegexpNode nextBody = parseRec(pattern, null); body = body.createOr(nextBody); } if (ch != ')') throw error(L.l("expected ')'")); _flags = oldFlags; _groupTail = oldTail; groupHead.setNode(body.getHead()); return concat(tail, parseRec(pattern, groupTail).getHead()); } private void expect(char test, int value) throws IllegalRegexpException { if (test != value) throw error(L.l("expected '{0}'", test)); } private IllegalRegexpException error(String msg) { return new IllegalRegexpException(msg); } /** * Parse the repetition construct. * * {n} -- exactly n * {n,} -- at least n * {n,m} -- from n to m * {,m} -- at most m */ private RegexpNode parseBrace(PeekStream pattern, RegexpNode node) throws IllegalRegexpException { int ch; int min = 0; int max = INTEGER_MAX; while ((ch = pattern.read()) >= '0' && ch <= '9') { min = 10 * min + ch - '0'; } if (ch == ',') { while ('0' <= (ch = pattern.read()) && ch <= '9') { if (max == INTEGER_MAX) max = 0; max = 10 * max + ch - '0'; } } else max = min; if (ch != '}') throw error(L.l("Expected '}'")); return createLoop(pattern, node, min, max); } private RegexpNode createLoop(PeekStream pattern, RegexpNode node, int min, int max) { if (pattern.peek() == '+') { pattern.read(); return node.createPossessiveLoop(min, max); } else if (pattern.peek() == '?') { pattern.read(); if (isGreedy()) return node.createLoopUngreedy(this, min, max); else return node.createLoop(this, min, max); } else { if (isGreedy()) return node.createLoop(this, min, max); else return node.createLoopUngreedy(this, min, max); } } static RegexpNode concat(RegexpNode prev, RegexpNode next) { if (prev != null) { return prev.concat(next).getHead(); } else return next; } private String hex(int value) { CharBuffer cb = new CharBuffer(); for (int b = 3; b >= 0; b--) { int v = (value >> (4 * b)) & 0xf; if (v < 10) cb.append((char) (v + '0')); else cb.append((char) (v - 10 + 'a')); } return cb.toString(); } private String badChar(int ch) { if (0x20 <= ch && ch <= 0x7f) return "'" + (char) ch + "'"; else if ((ch & 0xffff) == 0xffff) return "end of expression"; else return "'" + (char) ch + "' (\\u" + hex(ch) + ")"; } /** * Collect the characters in a set, e.g. [a-z@@^!"] * * Variables: * * last -- Contains last read character. * lastdash -- Contains character before dash or -1 if not after dash. */ private RegexpNode parseSet(PeekStream pattern) throws IllegalRegexpException { int first = pattern.peek(); boolean isNot = false; if (first == '^') { pattern.read(); isNot = true; } RegexpSet set = new RegexpSet(); int last = -1; int lastdash = -1; int ch; int charRead = 0; while ((ch = pattern.read()) >= 0) { charRead++; // php/4e3o // first literal closing bracket need not be escaped if (ch == ']') { if (charRead == 1 && first == '^') { pattern.ungetc(ch); ch = '\\'; } else break; } boolean isChar = true; boolean isDash = ch == '-'; if (ch == '\\') { isChar = false; switch ((ch = pattern.read())) { case 's': set.mergeOr(RegexpSet.SPACE); break; case 'S': set.mergeOrInv(RegexpSet.SPACE); break; case 'd': set.mergeOr(RegexpSet.DIGIT); break; case 'D': set.mergeOrInv(RegexpSet.DIGIT); break; case 'w': set.mergeOr(RegexpSet.WORD); break; case 'W': set.mergeOrInv(RegexpSet.WORD); break; case 'b': ch = '\b'; isChar = true; break; case 'n': ch = '\n'; isChar = true; break; case 't': ch = '\t'; isChar = true; break; case 'r': ch = '\r'; isChar = true; break; case 'f': ch = '\f'; isChar = true; break; case 'x': ch = parseHex(pattern); isChar = true; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': ch = parseOctal(ch, pattern); isChar = true; break; default: isChar = true; } } else if (ch == '[') { if (pattern.peek() == ':') { isChar = false; pattern.read(); set.mergeOr(parseCharacterClass(pattern)); } } if (isDash && last != -1 && lastdash == -1) { lastdash = last; } // c1-c2 else if (isChar && lastdash != -1) { if (lastdash > ch) throw new IllegalRegexpException("expected increasing range at " + badChar(ch)); setRange(set, lastdash, ch); last = -1; lastdash = -1; } else if (lastdash != -1) { setRange(set, lastdash, lastdash); setRange(set, '-', '-'); last = -1; lastdash = -1; } else if (last != -1) { setRange(set, last, last); if (isChar) last = ch; } else if (isChar) last = ch; } // Dash at end of set: [a-z1-] if (lastdash != -1) { setRange(set, lastdash, lastdash); setRange(set, '-', '-'); } else if (last != -1) setRange(set, last, last); if (ch != ']') throw error(L.l("Expected ']'")); if (isNot) return set.createNotNode(); else return set.createNode(); } private void setRange(RegexpSet set, int a, int b) { set.setRange(a, b); if (isIgnoreCase()) { if (Character.isLowerCase(a) && Character.isLowerCase(b)) { set.setRange(Character.toUpperCase(a), Character.toUpperCase(b)); } if (Character.isUpperCase(a) && Character.isUpperCase(b)) { set.setRange(Character.toLowerCase(a), Character.toLowerCase(b)); } } } /** * Returns a node for sequences starting with a backslash. */ private RegexpNode parseSlash(PeekStream pattern) throws IllegalRegexpException { int ch; switch (ch = pattern.read()) { case 's': return RegexpNode.SPACE; case 'S': return RegexpNode.NOT_SPACE; case 'd': return RegexpNode.DIGIT; case 'D': return RegexpNode.NOT_DIGIT; case 'w': return RegexpNode.S_WORD; case 'W': return RegexpNode.NOT_S_WORD; case 'b': return RegexpNode.WORD; case 'B': return RegexpNode.NOT_WORD; case 'A': return RegexpNode.STRING_BEGIN; case 'z': return RegexpNode.STRING_END; case 'Z': return RegexpNode.STRING_NEWLINE; case 'G': return RegexpNode.STRING_FIRST; case 'a': return parseString('\u0007', pattern); case 'c': ch = pattern.read(); ch = Character.toUpperCase(ch); ch ^= 0x40; return parseString(ch, pattern); case 'e': return parseString('\u001B', pattern, true); case 'n': return parseString('\n', pattern, true); case 'r': return parseString('\r', pattern, true); case 'f': return parseString('\f', pattern, true); case 't': return parseString('\t', pattern, true); case 'x': int hex = parseHex(pattern); return parseString(hex, pattern, true); case '0': int oct = parseOctal(ch, pattern); return parseString(oct, pattern, true); case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return parseBackReference(ch, pattern); case 'p': return parseUnicodeProperty(pattern, false); case 'P': return parseUnicodeProperty(pattern, true); case 'Q': throw new UnsupportedOperationException(); /* while ((ch = pattern.read()) >= 0) { if (ch == '\\' && pattern.peek() == 'E') { pattern.read(); break; } last = parseString(ch, pattern); } return last; */ case '#': return parseString('#', pattern, true); default: if ((_flags & STRICT) != 0) throw new IllegalRegexpException("unrecognized escape at " + badChar(ch)); return parseString(ch, pattern); } } /** * Returns a node for sequences starting with a '[:'. */ private RegexpSet parseCharacterClass(PeekStream pattern) throws IllegalRegexpException { StringBuilder sb = new StringBuilder(); int ch; while ((ch = pattern.read()) != ':' && ch >= 0) { sb.append((char)ch); } if (ch != ':') { throw new IllegalRegexpException("expected character class closing colon ':' at " + badChar(ch)); } if ((ch = pattern.read()) != ']') { throw new IllegalRegexpException("expected character class closing bracket ']' at " + badChar(ch)); } String name = sb.toString(); RegexpSet set = RegexpSet.CLASS_MAP.get(name); if (set == null) { throw new IllegalRegexpException("unrecognized POSIX character class " + name); } return set; } private int parseHex(PeekStream pattern) throws IllegalRegexpException { int ch = pattern.read(); int hex = 0; StringBuilder sb = new StringBuilder(); if (ch == '{') { while ((ch = pattern.read()) != '}') { if (ch < 0) throw new IllegalRegexpException("no more input; expected '}'"); sb.append((char)ch); } } else { if (ch < 0) throw new IllegalRegexpException("expected hex digit at " + badChar(ch)); sb.append((char)ch); ch = pattern.read(); if (ch < 0) { throw new IllegalRegexpException("expected hex digit at " + badChar(ch)); } sb.append((char)ch); } int len = sb.length(); for (int i = 0; i < len; i++) { ch = sb.charAt(i);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -