cauchoregexpmodule.java
来自「RESIN 3.2 最新源码」· Java 代码 · 共 1,953 行 · 第 1/4 页
JAVA
1,953 行
if (digit != '}') { env.warning(L.l("bad regexp {0}", replacement)); throw new QuercusException("bad regexp"); } if (text.length() > 0) program.add(new TextReplacement(text)); if (isEval) program.add(new GroupEscapeReplacement(group)); else program.add(new GroupReplacement(group)); text.setLength(0); } else text.append(ch); } else text.append(ch); } if (text.length() > 0) program.add(new TextReplacement(text)); return program; } /** * Cleans the regexp from valid values that the Java regexps can't handle. * Ereg has a different syntax so need to handle it differently from preg. */ private static StringValue cleanEregRegexp(Env env, StringValue regexp, boolean isComments) { int len = regexp.length(); StringValue sb = regexp.createStringBuilder(); char quote = 0; boolean sawVerticalBar = false; for (int i = 0; i < len; i++) { char ch = regexp.charAt(i); if (sawVerticalBar) { if ((! Character.isWhitespace(ch)) && ch != '#' && ch != '|') sawVerticalBar = false; } switch (ch) { case '\\': if (quote == '[') { sb = sb.appendByte('\\'); sb = sb.appendByte('\\'); continue; } if (i + 1 < len) { i++; ch = regexp.charAt(i); if (ch == '0' || '1' <= ch && ch <= '3' && i + 1 < len && '0' <= regexp.charAt(i + 1) && ch <= '7') { // Java's regexp requires \0 for octal sb = sb.appendByte('\\'); sb = sb.appendByte('0'); sb = sb.appendByte(ch); } else if (ch == 'x' && i + 1 < len && regexp.charAt(i + 1) == '{') { sb = sb.appendByte('\\'); int tail = regexp.indexOf('}', i + 1); if (tail > 0) { StringValue hex = regexp.substring(i + 2, tail); int length = hex.length(); if (length == 1) sb = sb.appendBytes("x0" + hex); else if (length == 2) sb = sb.appendBytes("x" + hex); else if (length == 3) sb = sb.appendBytes("u0" + hex); else if (length == 4) sb = sb.appendBytes("u" + hex); else throw new QuercusRuntimeException(L.l("illegal hex escape")); i = tail; } else { sb = sb.appendByte('\\'); sb = sb.appendByte('x'); } } else if (Character.isLetter(ch)) { switch (ch) { case 'a': case 'c': case 'e': case 'f': case 'n': case 'r': case 't': case 'x': case 'd': case 'D': case 's': case 'S': case 'w': case 'W': case 'b': case 'B': case 'A': case 'Z': case 'z': case 'G': case 'p': //XXX: need to translate PHP properties to Java ones case 'P': //XXX: need to translate PHP properties to Java ones case 'X': //case 'C': byte matching, not supported sb = sb.appendByte('\\'); sb = sb.appendByte(ch); break; default: sb = sb.appendByte(ch); } } else { sb = sb.appendByte('\\'); sb = sb.appendByte(ch); } } else sb = sb.appendByte('\\'); break; case '[': if (quote == '[') { if (i + 1 < len && regexp.charAt(i + 1) == ':') { sb = sb.appendByte('['); } else { sb = sb.appendByte('\\'); sb = sb.appendByte('['); } } else if (i + 1 < len && regexp.charAt(i + 1) == '[' && ! (i + 2 < len && regexp.charAt(i + 2) == ':')) { // XXX: check regexp grammar // php/151n sb = sb.appendByte('['); sb = sb.appendByte('\\'); sb = sb.appendByte('['); i += 1; } /* else if (i + 2 < len && regexp.charAt(i + 1) == '^' && regexp.charAt(i + 2) == ']') { sb.append("[^\\]"); i += 2; } */ else sb = sb.appendByte('['); if (quote == 0) quote = '['; break; case '#': if (quote == '[') { sb = sb.appendByte('\\'); sb = sb.appendByte('#'); } else if (isComments) { sb = sb.appendByte(ch); for (i++; i < len; i++) { ch = regexp.charAt(i); sb = sb.appendByte(ch); if (ch == '\n' || ch == '\r') break; } } else { sb = sb.appendByte(ch); } break; case ']': sb = sb.appendByte(ch); if (quote == '[') quote = 0; break; case '{': if (i + 1 < len && ('0' <= (ch = regexp.charAt(i + 1)) && ch <= '9' || ch == ',')) { sb = sb.appendByte('{'); for (i++; i < len && ('0' <= (ch = regexp.charAt(i)) && ch <= '9' || ch == ','); i++) { sb = sb.appendByte(ch); } if (i < len) sb = sb.appendByte(regexp.charAt(i)); } else { sb = sb.appendByte('\\'); sb = sb.appendByte('{'); } break; case '}': sb = sb.appendByte('\\'); sb = sb.appendByte('}'); break; case '|': // php/152o // php ignores subsequent vertical bars // // to accomodate drupal bug http://drupal.org/node/123750 if (! sawVerticalBar) { sb = sb.appendByte('|'); sawVerticalBar = true; } break; default: sb = sb.appendByte(ch); } } return sb; } abstract static class Replacement { abstract StringValue eval(Env env, StringValue sb, RegexpState regexpState); public String toString() { return getClass().getSimpleName() + "[]"; } } static class TextReplacement extends Replacement { private char []_text; TextReplacement(StringBuilder text) { int length = text.length(); _text = new char[length]; text.getChars(0, length, _text, 0); } @Override StringValue eval(Env env, StringValue sb, RegexpState regexpState) { return sb.appendBytes(_text, 0, _text.length); } public String toString() { StringBuilder sb = new StringBuilder(); sb.append(getClass().getSimpleName()); sb.append('['); for (char ch : _text) sb.append(ch); sb.append(']'); return sb.toString(); } } static class GroupReplacement extends Replacement { private int _group; GroupReplacement(int group) { _group = group; } @Override StringValue eval(Env env, StringValue sb, RegexpState regexpState) { if (_group < regexpState.groupCount()) sb = sb.append(regexpState.group(env, _group)); return sb; } public String toString() { return getClass().getSimpleName() + "[" + _group + "]"; } } static class GroupEscapeReplacement extends Replacement { private int _group; GroupEscapeReplacement(int group) { _group = group; } @Override StringValue eval(Env env, StringValue sb, RegexpState regexpState) { if (_group < regexpState.groupCount()) { StringValue group = regexpState.group(env, _group); int len = group.length(); for (int i = 0; i < len; i++) { char ch = group.charAt(i); if (ch == '\'') { sb = sb.appendByte('\\'); sb = sb.appendByte('\''); } else if (ch == '\"') { sb = sb.appendByte('\\'); sb = sb.appendByte('\"'); } else sb = sb.appendByte(ch); } } return sb; } public String toString() { return getClass().getSimpleName() + "[" + _group + "]"; } } /** * Holds information about the left neighbor of a particular group. */ static class GroupNeighborMap { private int []_neighborMap; private static int UNSET = -1; public GroupNeighborMap(CharSequence regexp, int groups) { _neighborMap = new int[groups + 1]; for (int i = 1; i <= groups; i++) { _neighborMap[i] = UNSET; } boolean sawEscape = false; boolean sawVerticalBar = false; boolean isLiteral = false; int group = 0; int parent = UNSET; int length = regexp.length(); ArrayList<Boolean> openParenStack = new ArrayList<Boolean>(groups); for (int i = 0; i < length; i++) { char ch = regexp.charAt(i); if (ch == ' ' || ch == '\t' || ch == '\n' || ch == 'r' || ch == '\f') { continue; } else if (ch == '\\') { sawEscape = ! sawEscape; continue; } else if (ch == '[' && ! sawEscape) { isLiteral = true; } else if (ch == ']' && ! sawEscape) { isLiteral = false; } else if (isLiteral || sawEscape) { sawEscape = false; } else if (ch == '(') { if (i + 1 < length && regexp.charAt(i + 1) == '?') { openParenStack.add(true); continue; } openParenStack.add(false); group++; if (sawVerticalBar) { sawVerticalBar = false; _neighborMap[group] = group - 1; } else { _neighborMap[group] = parent; parent = group; } } else if (ch == ')') { if (openParenStack.remove(openParenStack.size() - 1)) continue; sawVerticalBar = false; } else if (ch == '|') { sawVerticalBar = true; } else { } } } public boolean hasNeighbor(int group) { return _neighborMap[group] != UNSET; } public int getNeighbor(int group) { return _neighborMap[group]; } } static { PREG_QUOTE['\\'] = true; PREG_QUOTE['+'] = true; PREG_QUOTE['*'] = true; PREG_QUOTE['?'] = true; PREG_QUOTE['['] = true; PREG_QUOTE['^'] = true; PREG_QUOTE[']'] = true; PREG_QUOTE['$'] = true; PREG_QUOTE['('] = true; PREG_QUOTE[')'] = true; PREG_QUOTE['{'] = true; PREG_QUOTE['}'] = true; PREG_QUOTE['='] = true; PREG_QUOTE['!'] = true; PREG_QUOTE['<'] = true; PREG_QUOTE['>'] = true; PREG_QUOTE['|'] = true; PREG_QUOTE[':'] = true; PREG_QUOTE['.'] = true; }}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?