regcomp.java

来自「RESIN 3.2 最新源码」· Java 代码 · 共 1,575 行 · 第 1/3 页
JAVA
1,575 行
/* * Copyright (c) 1998-2008 Caucho Technology -- all rights reserved * * This file is part of Resin(R) Open Source * * Each copy or derived work must preserve the copyright notice and this * notice unmodified. * * Resin Open Source is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Resin Open Source is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty * of NON-INFRINGEMENT.  See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License * along with Resin Open Source; if not, write to the * *   Free Software Foundation, Inc. *   59 Temple Place, Suite 330 *   Boston, MA 02111-1307  USA * * @author Scott Ferguson *//* * XXX: anchored expressions should have flags for quick matching. */package com.caucho.quercus.lib.regexp;import java.util.*;import java.util.logging.*;import com.caucho.quercus.env.StringValue;import com.caucho.quercus.env.StringBuilderValue;import com.caucho.util.*;/** * Regular expression compilation. */class Regcomp {  private static final Logger log    = Logger.getLogger(Regcomp.class.getName());  private static final L10N L = new L10N(RegexpNode.class);  // #2526, JIT issues with Integer.MAX_VALUE  private static final int INTEGER_MAX = Integer.MAX_VALUE - 1;    static final int MULTILINE = 0x1;  static final int SINGLE_LINE = 0x2;  static final int IGNORE_CASE = 0x4;  static final int IGNORE_WS = 0x8;  static final int GLOBAL = 0x10;  static final int ANCHORED = 0x20;  static final int END_ONLY = 0x40;  static final int UNGREEDY = 0x80;  static final int STRICT = 0x100;    static final HashMap<String,Integer> _characterClassMap    = new HashMap<String,Integer>();    int _nGroup;  int _nLoop;  int _maxGroup;  int _flags;  HashMap<Integer,StringValue> _groupNameMap    = new HashMap<Integer,StringValue>();  HashMap<StringValue,Integer> _groupNameReverseMap    = new HashMap<StringValue,Integer>();  RegexpNode _groupTail;    boolean _isLookbehind;  boolean _isOr;    Regcomp(int flags)  {    _flags = flags;  }  boolean isGreedy()  {    return (_flags & UNGREEDY) != UNGREEDY;  }  boolean isIgnoreCase()  {    return (_flags & IGNORE_CASE) == IGNORE_CASE;  }  boolean isIgnoreWs()  {    return (_flags & IGNORE_WS) == IGNORE_WS;  }  boolean isMultiline()  {    return (_flags & MULTILINE) == MULTILINE;  }  boolean isDollarEndOnly()  {    return (_flags & END_ONLY) == END_ONLY;  }  int nextLoopIndex()  {    return _nLoop++;  }  RegexpNode parse(PeekStream pattern) throws IllegalRegexpException  {    _nGroup = 1;    RegexpNode begin = null;    if ((_flags & ANCHORED) != 0)      begin = RegexpNode.ANCHOR_BEGIN;        RegexpNode value = parseRec(pattern, begin);    int ch;    while ((ch = pattern.read()) == '|') {      value = RegexpNode.Or.create(value, parseRec(pattern, begin));    }        value = value != null ? value.getHead() : RegexpNode.N_END;    if (_maxGroup < _nGroup)      _maxGroup = _nGroup;    if (log.isLoggable(Level.FINEST))      log.finest("regexp[] " + value);    return value;  }    /**   *   Recursively compile a RegexpNode.   *   * first      -- The first node of this sub-RegexpNode   * prev       -- The previous node of this sub-RegexpNode   * last_begin -- When the last grouping began   * last_end   -- When the last grouping ended   *   * head       ->  node   *                 v -- rest   *                ...   *                 v -- rest   *                node   *   * last       ->  node   *                 v -- rest   *                ...   *                 v -- rest   *                node   */  private RegexpNode parseRec(PeekStream pattern, RegexpNode tail)    throws IllegalRegexpException  {    int ch = pattern.read();    RegexpNode next;    RegexpNode groupTail;    switch (ch) {    case -1:      return tail != null ? tail.getHead() : null;    case '?':      if (tail == null)	throw error(L.l("'?' requires a preceeding regexp"));      tail = createLoop(pattern, tail, 0, 1);            return parseRec(pattern, tail.getTail());    case '*':      if (tail == null)	throw error(L.l("'*' requires a preceeding regexp"));      tail = createLoop(pattern, tail, 0, INTEGER_MAX);            return parseRec(pattern, tail.getTail());    case '+':      if (tail == null)	throw error(L.l("'+' requires a preceeding regexp"));      tail = createLoop(pattern, tail, 1, INTEGER_MAX);            return parseRec(pattern, tail.getTail());    case '{':      if (tail == null || ! ('0' <= pattern.peek() && pattern.peek() <= '9')) {	next = parseString('{', pattern);      	return concat(tail, parseRec(pattern, next));      }      return parseRec(pattern, parseBrace(pattern, tail).getTail());    case '.':      if ((_flags & SINGLE_LINE) == 0)	next = RegexpNode.DOT;      else	next = RegexpNode.ANY_CHAR;	      return concat(tail, parseRec(pattern, next));    case '|':      pattern.ungetc(ch);      if (_groupTail != null)	return concat(tail, _groupTail);      else	return tail.getHead();    case '(':      {	switch (pattern.peek()) {	case '?':	  pattern.read();	  switch (pattern.peek()) {	  case ':':	    pattern.read();	    return parseGroup(pattern, tail, 0, _flags);	    	  case '#':	    parseCommentGroup(pattern);	    	    return parseRec(pattern, tail);	    	  case '(':	    return parseConditional(pattern, tail);	    	  case '=':	  case '!':	    ch = pattern.read();	    boolean isPositive = (ch == '=');	    groupTail = _groupTail;	    _groupTail = null;	    next = parseRec(pattern, null);	    	    while ((ch = pattern.read()) == '|') {	      RegexpNode nextHead = parseRec(pattern, null);	      next = next.createOr(nextHead);	    }	    if (isPositive)	      next = new RegexpNode.Lookahead(next);	    else	      next = new RegexpNode.NotLookahead(next);	    if (ch != ')')	      throw error(L.l("expected ')' at '{0}'",			      String.valueOf((char) ch)));	    _groupTail = groupTail;	    return concat(tail, parseRec(pattern, next));	    	  case '<':	    pattern.read();	    switch (pattern.read()) {	    case '=':	      isPositive = true;	      break;	    case '!':	      isPositive = false;	      break;	    default:	      throw error(L.l("expected '=' or '!'"));	    }	    groupTail = _groupTail;	    _groupTail = null;	    next = parseRec(pattern, null);	    if (next == null) {	    }	    else if (isPositive)	      next = new RegexpNode.Lookbehind(next);	    else	      next = new RegexpNode.NotLookbehind(next);	    	    while ((ch = pattern.read()) == '|') {	      RegexpNode second = parseRec(pattern, null);	      if (second == null) {	      }	      else if (isPositive)		second = new RegexpNode.Lookbehind(second);	      else		second = new RegexpNode.NotLookbehind(second);	      if (second != null)		next = next.createOr(second);	    }	    if (ch != ')')	      throw error(L.l("expected ')' at '{0}'",			      String.valueOf((char) ch)));	    _groupTail = groupTail;	    return concat(tail, parseRec(pattern, next));	  	  // XXX: once-only subpatterns (mostly an optimization feature)	  case '>':	    pattern.read();	    return parseGroup(pattern, tail, 0, _flags);	  case 'P':	    pattern.read();	    return parseNamedGroup(pattern, tail);	  case 'm': case 's': case 'i': case 'x': case 'g':	  case 'U': case 'X':	    {	      int flags = _flags;	      	      while ((ch = pattern.read()) > 0 && ch != ')') {		switch (ch) {		case 'm': _flags |= MULTILINE; break;		case 's': _flags |= SINGLE_LINE; break;		case 'i': _flags |= IGNORE_CASE; break;		case 'x': _flags |= IGNORE_WS; break;		case 'g': _flags |= GLOBAL; break;		case 'U': _flags |= UNGREEDY; break;		case 'X': _flags |= STRICT; break;		case ':':		  {		    return parseGroup(pattern, tail, 0, flags);		  }		default:		  throw error(L.l("'{0}' is an unknown (? code", String.valueOf((char) ch)));		}	      }	      if (ch != ')')		throw error(L.l("expected ')' at '{0}'",				String.valueOf((char) ch)));	      	      RegexpNode node = parseRec(pattern, tail);	      _flags = flags;	      return node;	    }	    	  default:	    throw error(L.l("'{0}' is an unknown (? code", String.valueOf((char) pattern.peek())));	  }	  	default:	  return parseGroup(pattern, tail, _nGroup++, _flags);	}      }    case ')':      pattern.ungetc(ch);      if (_groupTail != null)	return concat(tail, _groupTail);      else	return tail;    case '[':      next = parseSet(pattern);      return concat(tail, parseRec(pattern, next));          case '\\':      next = parseSlash(pattern);            return concat(tail, parseRec(pattern, next));          case '^':      if (isMultiline())	next = RegexpNode.ANCHOR_BEGIN_OR_NEWLINE;      else	next = RegexpNode.ANCHOR_BEGIN;            return concat(tail, parseRec(pattern, next));          case '$':      if (isMultiline())	next = RegexpNode.ANCHOR_END_OR_NEWLINE;      else if (isDollarEndOnly())	next = RegexpNode.ANCHOR_END_ONLY;      else	next = RegexpNode.ANCHOR_END;            return concat(tail, parseRec(pattern, next));    case ' ': case '\n': case '\t': case '\r':      if (isIgnoreWs()) {	while (Character.isSpace((char) pattern.peek()))	  pattern.read();	return parseRec(pattern, tail);      }      else {	next = parseString(ch, pattern);      	return concat(tail, parseRec(pattern, next));      }    case '#':      if (isIgnoreWs()) {	while ((ch = pattern.read()) > 0 && ch!= '\n') {	}	return parseRec(pattern, tail);      }      else {	next = parseString(ch, pattern);      	return concat(tail, parseRec(pattern, next));      }          default:      next = parseString(ch, pattern);            return concat(tail, parseRec(pattern, next));    }  }  private void parseCommentGroup(PeekStream pattern)  {    int ch;        // (?#...) Comment    while ((ch = pattern.read()) >= 0 && ch != ')') {    }  }    private RegexpNode parseNamedGroup(PeekStream pattern, RegexpNode tail)    throws IllegalRegexpException  {    int ch = pattern.read();    if (ch == '=') {      StringBuilder sb = new StringBuilder();      while ((ch = pattern.read()) != ')' && ch >= 0) {	sb.append((char) ch);      }      if (ch != ')')	throw error(L.l("expected ')'"));      String name = sb.toString();            Integer v = _groupNameReverseMap.get(new StringBuilderValue(name));      if (v != null) {	RegexpNode next = new RegexpNode.GroupRef(v);      	return concat(tail, parseRec(pattern, next));      }      else	throw error(L.l("'{0}' is an unknown regexp group", name));    }    else if (ch == '<') {      StringBuilder sb = new StringBuilder();      while ((ch = pattern.read()) != '>' && ch >= 0) {	sb.append((char) ch);      }      if (ch != '>')	throw error(L.l("expected '>'"));      String name = sb.toString();      int group = _nGroup++;      _groupNameMap.put(group, new StringBuilderValue(name));      _groupNameReverseMap.put(new StringBuilderValue(name), group);      return parseGroup(pattern, tail, group, _flags);    }    else      throw error(L.l("Expected '(?:P=name' or '(?:P<name' for named group"));  }  private RegexpNode parseConditional(PeekStream pattern, RegexpNode tail)    throws IllegalRegexpException  {    int ch = pattern.read();    if (ch != '(')      throw error(L.l("expected '('"));        RegexpNode.ConditionalHead groupHead = null;;    RegexpNode groupTail = null;    if ('1' <= (ch = pattern.peek()) && ch <= '9') {      int value = 0;      while ('0' <= (ch = pattern.read()) && ch <= '9') {	value = 10 * value + ch - '0';      }      if (ch != ')')	throw error(L.l("expected ')'"));      if (_nGroup <= value)	throw error(L.l("conditional value less than number of groups"));      groupHead = new RegexpNode.ConditionalHead(value);
regcomp.java - 源码说明

本页面展示了「RESIN 3.2 最新源码」中的 regcomp.java 源码文件，采用 Java 编程语言编写，共 1,575 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与RESIN相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?