⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 javaregexpmodule.java

📁 RESIN 3.2 最新源码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
          @Optional("0") int flag)  {    if (input == null)      return NullValue.NULL;    Pattern pattern = compileRegexp(patternString);    Matcher matcher = null;    ArrayValue matchArray = new ArrayValueImpl();    for (Map.Entry<Value, Value> entry : input.entrySet()) {      Value entryValue = entry.getValue();      Value entryKey = entry.getKey();      matcher = pattern.matcher(entryValue.toString());      boolean found = matcher.find();      if (!found && (flag == PREG_GREP_INVERT))        matchArray.append(entryKey, entryValue);      else if (found && (flag != PREG_GREP_INVERT))        matchArray.append(entryKey, entryValue);    }    return matchArray;  }  /**   * Returns an array of strings produces from splitting the passed string   * around the provided pattern.  The pattern is case insensitive.   *   * @param patternString the pattern   * @param string the string to split   * @param limit if specified, the maximum number of elements in the array   * @return an array of strings split around the pattern string   */  public static ArrayValue spliti(Env env,          StringValue patternString,          StringValue string,          @Optional("-1") long limit)  {    if (limit < 0)      limit = Long.MAX_VALUE;    // php/151c    String cleanRegexp = cleanRegexp(patternString, false);    Pattern pattern = Pattern.compile(cleanRegexp, Pattern.CASE_INSENSITIVE);    ArrayValue result = new ArrayValueImpl();    Matcher matcher = pattern.matcher(string);    long count = 0;    int head = 0;    while ((matcher.find()) && (count < limit)) {      StringValue value;      if (count == limit - 1) {        value = string.substring(head);        head = string.length();      } else {        value = string.substring(head, matcher.start());        head = matcher.end();      }      result.put(value);      count++;    }    if ((head <= string.length()) && (count != limit)) {      result.put(string.substring(head));    }    return result;  }  private static Pattern compileRegexp(StringValue rawRegexp)  {    return compileRegexp(rawRegexp, 0);  }  private static Pattern compileRegexp(StringValue rawRegexp, int groupCount)  {    Pattern pattern = _patternCache.get(rawRegexp);    if (pattern != null)      return pattern;    if (rawRegexp.length() < 2) {      throw new IllegalStateException(L.l(              "Can't find delimiters in regexp '{0}'.",              rawRegexp));    }    char delim = rawRegexp.charAt(0);    if (delim == '{')      delim = '}';    else if (delim == '[')      delim = ']';    else if (delim == '(')      delim = ')';    else if (delim == '<')      delim = '>';    int tail = rawRegexp.lastIndexOf(delim);    if (tail <= 0)      throw new IllegalStateException(L.l(              "Can't find second {0} in regexp '{1}'.",              String.valueOf((char) delim),              rawRegexp));    int len = rawRegexp.length();    int flags = 0;    boolean isExt = false;    boolean isGreedy = true;    for (int i = tail + 1; i < len; i++) {      char ch = rawRegexp.charAt(i);      switch (ch) {      case 'i':        flags |= Pattern.CASE_INSENSITIVE;        break;      case 's':        flags |= Pattern.DOTALL;        break;      case 'x':        flags |= Pattern.COMMENTS;        break;      case 'm':        flags |= Pattern.MULTILINE;        break;      case 'U':        isGreedy = false;        break;      }    }    StringValue regexp = rawRegexp.substring(1, tail);    String cleanRegexp = cleanRegexp(regexp, (flags & Pattern.COMMENTS) != 0, groupCount);    if (! isGreedy)      cleanRegexp = toNonGreedy(cleanRegexp);    pattern = Pattern.compile(cleanRegexp, flags);    _patternCache.put(rawRegexp, pattern);    return pattern;  }  private static int regexpFlags(StringValue rawRegexp)  {    char delim = rawRegexp.charAt(0);    if (delim == '{')      delim = '}';    else if (delim == '[')      delim = ']';    else if (delim == '(')      delim = ')';    else if (delim == '<')      delim = '>';    int len = rawRegexp.length();    int flags = 0;    int tail = len - 1;    for (; tail >= 0; tail--) {      char ch = rawRegexp.charAt(tail);      if (ch == delim)        break;      else if (ch == 'e')        flags |= PREG_REPLACE_EVAL;      else if (ch == 'u')        flags |= PCRE_UTF8;    }    if (tail <= 0)      throw new IllegalStateException(L.l(              "Can't find second {0} in regexp '{1}'.",              String.valueOf((char) delim),              rawRegexp));    return flags;  }  private static ArrayList<Replacement>  compileReplacement(Env env, StringValue replacement, boolean isEval)  {    ArrayList<Replacement> program = new ArrayList<Replacement>();    StringBuilder text = new StringBuilder();    for (int i = 0; i < replacement.length(); i++) {      char ch = replacement.charAt(i);      if ((ch == '\\' || ch == '$') && i + 1 < replacement.length()) {        char digit;        if ('0' <= (digit = replacement.charAt(i + 1)) && digit <= '9') {          int group = digit - '0';          i++;          if (i + 1 < replacement.length() &&                  '0' <= (digit = replacement.charAt(i + 1)) && digit <= '9') {            group = 10 * group + digit - '0';            i++;          }          if (text.length() > 0)            program.add(new TextReplacement(text));          if (isEval)            program.add(new GroupEscapeReplacement(group));          else            program.add(new GroupReplacement(group));          text.setLength(0);        }        else if (ch == '\\') {          i++;          if (digit != '\\') {            text.append('\\');          }          text.append(digit);          // took out test for ch == '$' because must be true          //} else if (ch == '$' && digit == '{') {        } else if (digit == '{') {          i += 2;          int group = 0;          while (i < replacement.length() &&                  '0' <= (digit = replacement.charAt(i)) && digit <= '9') {            group = 10 * group + digit - '0';            i++;          }          if (digit != '}') {            env.warning(L.l("bad regexp {0}", replacement));            throw new QuercusException("bad regexp");          }          if (text.length() > 0)            program.add(new TextReplacement(text));          if (isEval)            program.add(new GroupEscapeReplacement(group));          else            program.add(new GroupReplacement(group));          text.setLength(0);        }        else          text.append(ch);      }      else        text.append(ch);    }    if (text.length() > 0)      program.add(new TextReplacement(text));    return program;  }  private static final String [] POSIX_CLASSES = {    "[:alnum:]", "[:alpha:]", "[:blank:]", "[:cntrl:]",    "[:digit:]", "[:graph:]", "[:lower:]", "[:print:]",    "[:punct:]", "[:space:]", "[:upper:]", "[:xdigit:]"  };  private static final String [] REGEXP_CLASSES = {    "\\p{Alnum}", "\\p{Alpha}", "\\p{Blank}", "\\p{Cntrl}",    "\\p{Digit}", "\\p{Graph}", "\\p{Lower}", "\\p{Print}",    "\\p{Punct}", "\\p{Space}", "\\p{Upper}", "\\p{XDigit}"  };  /**   * Cleans the regexp from valid values that the Java regexps can't handle.   */  private static String cleanRegexp(StringValue regexp,          boolean isComments)  {    return cleanRegexp(regexp, isComments, 0);  }  private static String cleanRegexp(StringValue regexp,          boolean isComments,          int groupCount)  {    int len = regexp.length();    StringBuilder sb = new StringBuilder();    char quote = 0;    boolean sawVerticalBar = false;    for (int i = 0; i < len; i++) {      char ch = regexp.charAt(i);      if (sawVerticalBar) {        if ((! Character.isWhitespace(ch)) &&                ch != '#' &&                ch != '|')          sawVerticalBar = false;      }      switch (ch) {      case '\\':        //sb.append(ch);        if (i + 1 < len) {          i++;          ch = regexp.charAt(i);          if (ch == '0') {            // Java's regexp requires \0 for octal            sb.append('\\');            sb.append('0'); // php/151l            sb.append(ch);          }          else if ('1' <= ch && ch <= '9') {            // parse as int, if is backreference then use it for that, otherwise octal            // php/151r, php/1530            int backref = 0;            for (int j = i; j < len && backref <= groupCount; j++)            {              int digit = regexp.charAt(j);              if ('0' <= digit && digit <= '9')                backref = (backref * 10) + (digit - '0');              else                break;            }            if (backref <= groupCount)              sb.append('\\');            else              sb.append("\\0");            sb.append(ch);          }          else if (ch == 'x' && i + 1 < len && regexp.charAt(i + 1) == '{') {            sb.append('\\');            int tail = regexp.indexOf('}', i + 1);            if (tail > 0) {              StringValue hex = regexp.substring(i + 2, tail);              int length = hex.length();              if (length == 1)                sb.append("x0" + hex);              else if (length == 2)                sb.append("x" + hex);              else if (length == 3)                sb.append("u0" + hex);              else if (length == 4)                sb.append("u" + hex);              else                throw new QuercusRuntimeException(L.l("illegal hex escape"));              i = tail;            }            else {              sb.append("\\x");            }          }          else if (Character.isLetter(ch)) {            switch (ch) {            case 'a':            case 'c':            case 'e':            case 'f':            case 'n':            case 'r':            case 't':            case 'x':            case 'd':            case 'D':            case 's':            case 'S':            case 'w':            case 'W':            case 'b':            case 'B':            case 'A':            case 'Z':            case 'z':            case 'G':            case 'p': //XXX: need to translate PHP properties to Java ones            case 'P': //XXX: need to translate PHP properties to Java ones            case 'X':              //case 'C': byte matching, not supported              sb.append('\\');              sb.append(ch);              break;            default:              sb.append(ch);            }          }          else {            sb.append('\\');            sb.append(ch);          }        }        else          sb.append('\\');        break;      case '[':        if (quote == '[') {          if (i + 1 < len && regexp.charAt(i + 1) == ':') {            String test = regexp.substring(i).toString();            boolean hasMatch = false;            for (int j = 0; j < POSIX_CLASSES.length; j++) {              if (test.startsWith(POSIX_CLASSES[j])) {                hasMatch = true;                sb.append(REGEXP_CLASSES[j]);                i += POSIX_CLASSES[j].length() - 1;              }            }            if (! hasMatch)              sb.append("\\[");          }          else            sb.append("\\[");        }        else if (i + 1 < len && regexp.charAt(i + 1) == '['          && ! (i + 2 < len && regexp.charAt(i + 2) == ':')) {          // XXX: check regexp grammar          // php/151n          sb.append("[\\[");          i += 1;        }        else if (i + 2 < len &&                regexp.charAt(i + 1) == '^' &&                regexp.charAt(i + 2) == ']') {          sb.append("[^\\]");          i += 2;        }        else          sb.append('[');        if (quote == 0)          quote = '[';        break;      case '#':        if (quote == '[') {          sb.append("\\#");        }        else if (isComments) {          sb.append(ch);          for (i++; i < len; i++) {            ch = regexp.charAt(i);            sb.append(ch);            if (ch == '\n' || ch == '\r')              break;          }        }        else {          sb.append(ch);        }        break;      case ']':        sb.append(ch);        if (quote == '[')          quote = 0;        break;      /* commented out - braces also used for character properties (i.e. \p{L}       * php/1534      case '{':        if (i + 1 < len &&                ('0' <= (ch = regexp.charAt(i + 1)) && ch <= '9' || ch == ',')) {          sb.append("{");          for (i++;          i < len &&          ('0' <= (ch = regexp.charAt(i)) && ch <= '9' || ch == ',');          i++) {            sb.append(ch);          }          if (i < len)            sb.append(regexp.charAt(i));        }        else {          sb.append("\\{");        }        break;      case '}':        sb.append("\\}");        break;      */      case '|':        // php/152o        // php ignores subsequent vertical bars        //        // to accomodate drupal bug http://drupal.org/node/123750        if (! sawVerticalBar) {          sb.append('|');          sawVerticalBar = true;         }        break;            case ' ':        // php/1533        if (quote == '[' && isComments)          sb.append("\\ ");        else          sb.append(' ');        break;              default:        sb.append(ch);      }    }    return sb.toString();  }  /**   * Cleans the regexp from valid values that the Java regexps can't handle.   * Ereg has a different syntax so need to handle it differently from preg.   * XXX: find out how ereg is different from preg.   */  private static String cleanEregRegexp(StringValue regexp,          boolean isComments)  {    int len = regexp.length();    StringBuilder sb = new StringBuilder();    char quote = 0;    boolean sawVerticalBar = false;    for (int i = 0; i < len; i++) {      char ch = regexp.charAt(i);      if (sawVerticalBar) {        if ((! Character.isWhitespace(ch)) &&                ch != '#' &&                ch != '|')          sawVerticalBar = false;      }      switch (ch) {      case '\\':        if (quote == '[') {          sb.append('\\');          sb.append('\\');          continue;        }        if (i + 1 < len) {          i++;          ch = regexp.charAt(i);          if (ch == '0' ||                  '1' <= ch && ch <= '3' && i + 1 < len && '0' <= regexp.charAt(i + 1) && ch <= '7') {            // Java's regexp requires \0 for octal            sb.append('\\');            sb.append('0');            sb.append(ch);          }          else if (ch == 'x' && i + 1 < len && regexp.charAt(i + 1) == '{') {            sb.append('\\');            int tail = regexp.indexOf('}', i + 1);            if (tail > 0) {              StringValue hex = regexp.substring(i + 2, tail);              int length = hex.length();              if (length == 1)                sb.append("x0" + hex);              else if (length == 2)                sb.append("x" + hex);              else if (length == 3)                sb.append("u0" + hex);              else if (length == 4)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -