cauchoregexpmodule.java

来自「RESIN 3.2 最新源码」· Java 代码 · 共 1,953 行 · 第 1/4 页

JAVA
1,953
字号
          if (digit != '}') {            env.warning(L.l("bad regexp {0}", replacement));            throw new QuercusException("bad regexp");          }          if (text.length() > 0)            program.add(new TextReplacement(text));          if (isEval)            program.add(new GroupEscapeReplacement(group));          else            program.add(new GroupReplacement(group));          text.setLength(0);        }        else          text.append(ch);      }      else        text.append(ch);    }    if (text.length() > 0)      program.add(new TextReplacement(text));    return program;  }  /**   * Cleans the regexp from valid values that the Java regexps can't handle.   * Ereg has a different syntax so need to handle it differently from preg.   */  private static StringValue cleanEregRegexp(Env env,                                             StringValue regexp,                                             boolean isComments)  {    int len = regexp.length();    StringValue sb = regexp.createStringBuilder();    char quote = 0;    boolean sawVerticalBar = false;    for (int i = 0; i < len; i++) {      char ch = regexp.charAt(i);      if (sawVerticalBar) {        if ((! Character.isWhitespace(ch)) &&                ch != '#' &&                ch != '|')          sawVerticalBar = false;      }      switch (ch) {      case '\\':        if (quote == '[') {          sb = sb.appendByte('\\');          sb = sb.appendByte('\\');          continue;        }        if (i + 1 < len) {          i++;          ch = regexp.charAt(i);          if (ch == '0' ||                  '1' <= ch && ch <= '3' && i + 1 < len && '0' <= regexp.charAt(i + 1) && ch <= '7') {            // Java's regexp requires \0 for octal            sb = sb.appendByte('\\');            sb = sb.appendByte('0');            sb = sb.appendByte(ch);          }          else if (ch == 'x' && i + 1 < len && regexp.charAt(i + 1) == '{') {            sb = sb.appendByte('\\');            int tail = regexp.indexOf('}', i + 1);            if (tail > 0) {              StringValue hex = regexp.substring(i + 2, tail);              int length = hex.length();              if (length == 1)                sb = sb.appendBytes("x0" + hex);              else if (length == 2)                sb = sb.appendBytes("x" + hex);              else if (length == 3)                sb = sb.appendBytes("u0" + hex);              else if (length == 4)                sb = sb.appendBytes("u" + hex);              else                throw new QuercusRuntimeException(L.l("illegal hex escape"));              i = tail;            }            else {              sb = sb.appendByte('\\');              sb = sb.appendByte('x');            }          }          else if (Character.isLetter(ch)) {            switch (ch) {            case 'a':            case 'c':            case 'e':            case 'f':            case 'n':            case 'r':            case 't':            case 'x':            case 'd':            case 'D':            case 's':            case 'S':            case 'w':            case 'W':            case 'b':            case 'B':            case 'A':            case 'Z':            case 'z':            case 'G':            case 'p': //XXX: need to translate PHP properties to Java ones            case 'P': //XXX: need to translate PHP properties to Java ones            case 'X':              //case 'C': byte matching, not supported              sb = sb.appendByte('\\');              sb = sb.appendByte(ch);              break;            default:              sb = sb.appendByte(ch);            }          }          else {            sb = sb.appendByte('\\');            sb = sb.appendByte(ch);          }        }        else          sb = sb.appendByte('\\');        break;      case '[':        if (quote == '[') {          if (i + 1 < len && regexp.charAt(i + 1) == ':') {            sb = sb.appendByte('[');          }          else {            sb = sb.appendByte('\\');            sb = sb.appendByte('[');          }        }        else if (i + 1 < len && regexp.charAt(i + 1) == '['          && ! (i + 2 < len && regexp.charAt(i + 2) == ':')) {          // XXX: check regexp grammar          // php/151n          sb = sb.appendByte('[');          sb = sb.appendByte('\\');          sb = sb.appendByte('[');          i += 1;        }        /*        else if (i + 2 < len &&                regexp.charAt(i + 1) == '^' &&                regexp.charAt(i + 2) == ']') {          sb.append("[^\\]");          i += 2;        }        */        else          sb = sb.appendByte('[');        if (quote == 0)          quote = '[';        break;      case '#':        if (quote == '[') {          sb = sb.appendByte('\\');          sb = sb.appendByte('#');        }        else if (isComments) {          sb = sb.appendByte(ch);          for (i++; i < len; i++) {            ch = regexp.charAt(i);            sb = sb.appendByte(ch);            if (ch == '\n' || ch == '\r')              break;          }        }        else {          sb = sb.appendByte(ch);        }        break;      case ']':        sb = sb.appendByte(ch);        if (quote == '[')          quote = 0;        break;      case '{':        if (i + 1 < len &&                ('0' <= (ch = regexp.charAt(i + 1)) && ch <= '9' || ch == ',')) {          sb = sb.appendByte('{');          for (i++;          i < len &&          ('0' <= (ch = regexp.charAt(i)) && ch <= '9' || ch == ',');          i++) {            sb = sb.appendByte(ch);          }          if (i < len)            sb = sb.appendByte(regexp.charAt(i));        }        else {          sb = sb.appendByte('\\');          sb = sb.appendByte('{');        }        break;      case '}':        sb = sb.appendByte('\\');        sb = sb.appendByte('}');        break;      case '|':        // php/152o        // php ignores subsequent vertical bars        //        // to accomodate drupal bug http://drupal.org/node/123750        if (! sawVerticalBar) {          sb = sb.appendByte('|');          sawVerticalBar = true;         }        break;      default:        sb = sb.appendByte(ch);      }    }    return sb;  }  abstract static class Replacement {    abstract StringValue eval(Env env,			      StringValue sb,			      RegexpState regexpState);    public String toString()    {      return getClass().getSimpleName() + "[]";    }  }  static class TextReplacement  extends Replacement  {    private char []_text;    TextReplacement(StringBuilder text)    {      int length = text.length();      _text = new char[length];      text.getChars(0, length, _text, 0);    }    @Override    StringValue eval(Env env,                     StringValue sb,                     RegexpState regexpState)    {      return sb.appendBytes(_text, 0, _text.length);    }    public String toString()    {      StringBuilder sb = new StringBuilder();      sb.append(getClass().getSimpleName());      sb.append('[');      for (char ch : _text)        sb.append(ch);      sb.append(']');      return sb.toString();    }  }  static class GroupReplacement  extends Replacement  {    private int _group;    GroupReplacement(int group)    {      _group = group;    }    @Override    StringValue eval(Env env,                     StringValue sb,                     RegexpState regexpState)    {      if (_group < regexpState.groupCount())        sb = sb.append(regexpState.group(env, _group));            return sb;    }    public String toString()    {      return getClass().getSimpleName() + "[" + _group + "]";    }  }  static class GroupEscapeReplacement    extends Replacement  {    private int _group;    GroupEscapeReplacement(int group)    {      _group = group;    }    @Override    StringValue eval(Env env,                     StringValue sb,                     RegexpState regexpState)    {      if (_group < regexpState.groupCount()) {        StringValue group = regexpState.group(env, _group);        int len = group.length();        for (int i = 0; i < len; i++) {          char ch = group.charAt(i);          if (ch == '\'') {            sb = sb.appendByte('\\');            sb = sb.appendByte('\'');          }          else if (ch == '\"') {            sb = sb.appendByte('\\');            sb = sb.appendByte('\"');          }          else            sb = sb.appendByte(ch);        }      }            return sb;    }    public String toString()    {      return getClass().getSimpleName() + "[" + _group + "]";    }  }  /**   * Holds information about the left neighbor of a particular group.   */  static class GroupNeighborMap  {    private int []_neighborMap;    private static int UNSET = -1;    public GroupNeighborMap(CharSequence regexp, int groups)    {       _neighborMap = new int[groups + 1];      for (int i = 1; i <= groups; i++) {        _neighborMap[i] = UNSET;      }      boolean sawEscape = false;      boolean sawVerticalBar = false;      boolean isLiteral = false;      int group = 0;      int parent = UNSET;      int length = regexp.length();      ArrayList<Boolean> openParenStack = new ArrayList<Boolean>(groups);      for (int i = 0; i < length; i++) {        char ch = regexp.charAt(i);        if (ch == ' ' || ch == '\t' || ch == '\n' || ch == 'r' || ch == '\f') {          continue;        }        else if (ch == '\\') {          sawEscape = ! sawEscape;          continue;        }        else if (ch == '[' && ! sawEscape) {          isLiteral = true;        }        else if (ch == ']' && ! sawEscape) {          isLiteral = false;        }        else if (isLiteral || sawEscape) {          sawEscape = false;        }        else if (ch == '(') {          if (i + 1 < length && regexp.charAt(i + 1) == '?') {            openParenStack.add(true);            continue;          }          openParenStack.add(false);          group++;          if (sawVerticalBar) {            sawVerticalBar = false;            _neighborMap[group] = group - 1;          }          else {            _neighborMap[group] = parent;            parent = group;          }        }        else if (ch == ')') {          if (openParenStack.remove(openParenStack.size() - 1))            continue;          sawVerticalBar = false;        }        else if (ch == '|') {          sawVerticalBar = true;        }        else {        }      }    }    public boolean hasNeighbor(int group)    {      return _neighborMap[group] != UNSET;    }    public int getNeighbor(int group)    {      return _neighborMap[group];    }  }    static {    PREG_QUOTE['\\'] = true;    PREG_QUOTE['+'] = true;    PREG_QUOTE['*'] = true;    PREG_QUOTE['?'] = true;    PREG_QUOTE['['] = true;    PREG_QUOTE['^'] = true;    PREG_QUOTE[']'] = true;    PREG_QUOTE['$'] = true;    PREG_QUOTE['('] = true;    PREG_QUOTE[')'] = true;    PREG_QUOTE['{'] = true;    PREG_QUOTE['}'] = true;    PREG_QUOTE['='] = true;    PREG_QUOTE['!'] = true;    PREG_QUOTE['<'] = true;    PREG_QUOTE['>'] = true;    PREG_QUOTE['|'] = true;    PREG_QUOTE[':'] = true;    PREG_QUOTE['.'] = true;  }}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?