📄 javaregexpmodule.java
字号:
@Optional("0") int flag) { if (input == null) return NullValue.NULL; Pattern pattern = compileRegexp(patternString); Matcher matcher = null; ArrayValue matchArray = new ArrayValueImpl(); for (Map.Entry<Value, Value> entry : input.entrySet()) { Value entryValue = entry.getValue(); Value entryKey = entry.getKey(); matcher = pattern.matcher(entryValue.toString()); boolean found = matcher.find(); if (!found && (flag == PREG_GREP_INVERT)) matchArray.append(entryKey, entryValue); else if (found && (flag != PREG_GREP_INVERT)) matchArray.append(entryKey, entryValue); } return matchArray; } /** * Returns an array of strings produces from splitting the passed string * around the provided pattern. The pattern is case insensitive. * * @param patternString the pattern * @param string the string to split * @param limit if specified, the maximum number of elements in the array * @return an array of strings split around the pattern string */ public static ArrayValue spliti(Env env, StringValue patternString, StringValue string, @Optional("-1") long limit) { if (limit < 0) limit = Long.MAX_VALUE; // php/151c String cleanRegexp = cleanRegexp(patternString, false); Pattern pattern = Pattern.compile(cleanRegexp, Pattern.CASE_INSENSITIVE); ArrayValue result = new ArrayValueImpl(); Matcher matcher = pattern.matcher(string); long count = 0; int head = 0; while ((matcher.find()) && (count < limit)) { StringValue value; if (count == limit - 1) { value = string.substring(head); head = string.length(); } else { value = string.substring(head, matcher.start()); head = matcher.end(); } result.put(value); count++; } if ((head <= string.length()) && (count != limit)) { result.put(string.substring(head)); } return result; } private static Pattern compileRegexp(StringValue rawRegexp) { return compileRegexp(rawRegexp, 0); } private static Pattern compileRegexp(StringValue rawRegexp, int groupCount) { Pattern pattern = _patternCache.get(rawRegexp); if (pattern != null) return pattern; if (rawRegexp.length() < 2) { throw new IllegalStateException(L.l( "Can't find delimiters in regexp '{0}'.", rawRegexp)); } char delim = rawRegexp.charAt(0); if (delim == '{') delim = '}'; else if (delim == '[') delim = ']'; else if (delim == '(') delim = ')'; else if (delim == '<') delim = '>'; int tail = rawRegexp.lastIndexOf(delim); if (tail <= 0) throw new IllegalStateException(L.l( "Can't find second {0} in regexp '{1}'.", String.valueOf((char) delim), rawRegexp)); int len = rawRegexp.length(); int flags = 0; boolean isExt = false; boolean isGreedy = true; for (int i = tail + 1; i < len; i++) { char ch = rawRegexp.charAt(i); switch (ch) { case 'i': flags |= Pattern.CASE_INSENSITIVE; break; case 's': flags |= Pattern.DOTALL; break; case 'x': flags |= Pattern.COMMENTS; break; case 'm': flags |= Pattern.MULTILINE; break; case 'U': isGreedy = false; break; } } StringValue regexp = rawRegexp.substring(1, tail); String cleanRegexp = cleanRegexp(regexp, (flags & Pattern.COMMENTS) != 0, groupCount); if (! isGreedy) cleanRegexp = toNonGreedy(cleanRegexp); pattern = Pattern.compile(cleanRegexp, flags); _patternCache.put(rawRegexp, pattern); return pattern; } private static int regexpFlags(StringValue rawRegexp) { char delim = rawRegexp.charAt(0); if (delim == '{') delim = '}'; else if (delim == '[') delim = ']'; else if (delim == '(') delim = ')'; else if (delim == '<') delim = '>'; int len = rawRegexp.length(); int flags = 0; int tail = len - 1; for (; tail >= 0; tail--) { char ch = rawRegexp.charAt(tail); if (ch == delim) break; else if (ch == 'e') flags |= PREG_REPLACE_EVAL; else if (ch == 'u') flags |= PCRE_UTF8; } if (tail <= 0) throw new IllegalStateException(L.l( "Can't find second {0} in regexp '{1}'.", String.valueOf((char) delim), rawRegexp)); return flags; } private static ArrayList<Replacement> compileReplacement(Env env, StringValue replacement, boolean isEval) { ArrayList<Replacement> program = new ArrayList<Replacement>(); StringBuilder text = new StringBuilder(); for (int i = 0; i < replacement.length(); i++) { char ch = replacement.charAt(i); if ((ch == '\\' || ch == '$') && i + 1 < replacement.length()) { char digit; if ('0' <= (digit = replacement.charAt(i + 1)) && digit <= '9') { int group = digit - '0'; i++; if (i + 1 < replacement.length() && '0' <= (digit = replacement.charAt(i + 1)) && digit <= '9') { group = 10 * group + digit - '0'; i++; } if (text.length() > 0) program.add(new TextReplacement(text)); if (isEval) program.add(new GroupEscapeReplacement(group)); else program.add(new GroupReplacement(group)); text.setLength(0); } else if (ch == '\\') { i++; if (digit != '\\') { text.append('\\'); } text.append(digit); // took out test for ch == '$' because must be true //} else if (ch == '$' && digit == '{') { } else if (digit == '{') { i += 2; int group = 0; while (i < replacement.length() && '0' <= (digit = replacement.charAt(i)) && digit <= '9') { group = 10 * group + digit - '0'; i++; } if (digit != '}') { env.warning(L.l("bad regexp {0}", replacement)); throw new QuercusException("bad regexp"); } if (text.length() > 0) program.add(new TextReplacement(text)); if (isEval) program.add(new GroupEscapeReplacement(group)); else program.add(new GroupReplacement(group)); text.setLength(0); } else text.append(ch); } else text.append(ch); } if (text.length() > 0) program.add(new TextReplacement(text)); return program; } private static final String [] POSIX_CLASSES = { "[:alnum:]", "[:alpha:]", "[:blank:]", "[:cntrl:]", "[:digit:]", "[:graph:]", "[:lower:]", "[:print:]", "[:punct:]", "[:space:]", "[:upper:]", "[:xdigit:]" }; private static final String [] REGEXP_CLASSES = { "\\p{Alnum}", "\\p{Alpha}", "\\p{Blank}", "\\p{Cntrl}", "\\p{Digit}", "\\p{Graph}", "\\p{Lower}", "\\p{Print}", "\\p{Punct}", "\\p{Space}", "\\p{Upper}", "\\p{XDigit}" }; /** * Cleans the regexp from valid values that the Java regexps can't handle. */ private static String cleanRegexp(StringValue regexp, boolean isComments) { return cleanRegexp(regexp, isComments, 0); } private static String cleanRegexp(StringValue regexp, boolean isComments, int groupCount) { int len = regexp.length(); StringBuilder sb = new StringBuilder(); char quote = 0; boolean sawVerticalBar = false; for (int i = 0; i < len; i++) { char ch = regexp.charAt(i); if (sawVerticalBar) { if ((! Character.isWhitespace(ch)) && ch != '#' && ch != '|') sawVerticalBar = false; } switch (ch) { case '\\': //sb.append(ch); if (i + 1 < len) { i++; ch = regexp.charAt(i); if (ch == '0') { // Java's regexp requires \0 for octal sb.append('\\'); sb.append('0'); // php/151l sb.append(ch); } else if ('1' <= ch && ch <= '9') { // parse as int, if is backreference then use it for that, otherwise octal // php/151r, php/1530 int backref = 0; for (int j = i; j < len && backref <= groupCount; j++) { int digit = regexp.charAt(j); if ('0' <= digit && digit <= '9') backref = (backref * 10) + (digit - '0'); else break; } if (backref <= groupCount) sb.append('\\'); else sb.append("\\0"); sb.append(ch); } else if (ch == 'x' && i + 1 < len && regexp.charAt(i + 1) == '{') { sb.append('\\'); int tail = regexp.indexOf('}', i + 1); if (tail > 0) { StringValue hex = regexp.substring(i + 2, tail); int length = hex.length(); if (length == 1) sb.append("x0" + hex); else if (length == 2) sb.append("x" + hex); else if (length == 3) sb.append("u0" + hex); else if (length == 4) sb.append("u" + hex); else throw new QuercusRuntimeException(L.l("illegal hex escape")); i = tail; } else { sb.append("\\x"); } } else if (Character.isLetter(ch)) { switch (ch) { case 'a': case 'c': case 'e': case 'f': case 'n': case 'r': case 't': case 'x': case 'd': case 'D': case 's': case 'S': case 'w': case 'W': case 'b': case 'B': case 'A': case 'Z': case 'z': case 'G': case 'p': //XXX: need to translate PHP properties to Java ones case 'P': //XXX: need to translate PHP properties to Java ones case 'X': //case 'C': byte matching, not supported sb.append('\\'); sb.append(ch); break; default: sb.append(ch); } } else { sb.append('\\'); sb.append(ch); } } else sb.append('\\'); break; case '[': if (quote == '[') { if (i + 1 < len && regexp.charAt(i + 1) == ':') { String test = regexp.substring(i).toString(); boolean hasMatch = false; for (int j = 0; j < POSIX_CLASSES.length; j++) { if (test.startsWith(POSIX_CLASSES[j])) { hasMatch = true; sb.append(REGEXP_CLASSES[j]); i += POSIX_CLASSES[j].length() - 1; } } if (! hasMatch) sb.append("\\["); } else sb.append("\\["); } else if (i + 1 < len && regexp.charAt(i + 1) == '[' && ! (i + 2 < len && regexp.charAt(i + 2) == ':')) { // XXX: check regexp grammar // php/151n sb.append("[\\["); i += 1; } else if (i + 2 < len && regexp.charAt(i + 1) == '^' && regexp.charAt(i + 2) == ']') { sb.append("[^\\]"); i += 2; } else sb.append('['); if (quote == 0) quote = '['; break; case '#': if (quote == '[') { sb.append("\\#"); } else if (isComments) { sb.append(ch); for (i++; i < len; i++) { ch = regexp.charAt(i); sb.append(ch); if (ch == '\n' || ch == '\r') break; } } else { sb.append(ch); } break; case ']': sb.append(ch); if (quote == '[') quote = 0; break; /* commented out - braces also used for character properties (i.e. \p{L} * php/1534 case '{': if (i + 1 < len && ('0' <= (ch = regexp.charAt(i + 1)) && ch <= '9' || ch == ',')) { sb.append("{"); for (i++; i < len && ('0' <= (ch = regexp.charAt(i)) && ch <= '9' || ch == ','); i++) { sb.append(ch); } if (i < len) sb.append(regexp.charAt(i)); } else { sb.append("\\{"); } break; case '}': sb.append("\\}"); break; */ case '|': // php/152o // php ignores subsequent vertical bars // // to accomodate drupal bug http://drupal.org/node/123750 if (! sawVerticalBar) { sb.append('|'); sawVerticalBar = true; } break; case ' ': // php/1533 if (quote == '[' && isComments) sb.append("\\ "); else sb.append(' '); break; default: sb.append(ch); } } return sb.toString(); } /** * Cleans the regexp from valid values that the Java regexps can't handle. * Ereg has a different syntax so need to handle it differently from preg. * XXX: find out how ereg is different from preg. */ private static String cleanEregRegexp(StringValue regexp, boolean isComments) { int len = regexp.length(); StringBuilder sb = new StringBuilder(); char quote = 0; boolean sawVerticalBar = false; for (int i = 0; i < len; i++) { char ch = regexp.charAt(i); if (sawVerticalBar) { if ((! Character.isWhitespace(ch)) && ch != '#' && ch != '|') sawVerticalBar = false; } switch (ch) { case '\\': if (quote == '[') { sb.append('\\'); sb.append('\\'); continue; } if (i + 1 < len) { i++; ch = regexp.charAt(i); if (ch == '0' || '1' <= ch && ch <= '3' && i + 1 < len && '0' <= regexp.charAt(i + 1) && ch <= '7') { // Java's regexp requires \0 for octal sb.append('\\'); sb.append('0'); sb.append(ch); } else if (ch == 'x' && i + 1 < len && regexp.charAt(i + 1) == '{') { sb.append('\\'); int tail = regexp.indexOf('}', i + 1); if (tail > 0) { StringValue hex = regexp.substring(i + 2, tail); int length = hex.length(); if (length == 1) sb.append("x0" + hex); else if (length == 2) sb.append("x" + hex); else if (length == 3) sb.append("u0" + hex); else if (length == 4)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -