📄 re.java

📁 Mac OS X 10.4.9 for x86 Source Code gcc 实现源代码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
      if (quot)      	unit.bk = false;      // ALTERNATION OPERATOR      //  \| or | (if RE_NO_BK_VBAR) or newline (if RE_NEWLINE_ALT)      //  not available if RE_LIMITED_OPS is set      // TODO: the '\n' literal here should be a test against REToken.newline,      // which unfortunately may be more than a single character.      if ( ( (unit.ch == '|' && (syntax.get(RESyntax.RE_NO_BK_VBAR) ^ (unit.bk || quot)))	     || (syntax.get(RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') && !(unit.bk || quot)) )	   && !syntax.get(RESyntax.RE_LIMITED_OPS)) {	// make everything up to here be a branch. create vector if nec.	addToken(currentToken);	RE theBranch = new RE(firstToken, lastToken, numSubs, subIndex, minimumLength);	minimumLength = 0;	if (branches == null) {	    branches = new Vector();	}	branches.addElement(theBranch);	firstToken = lastToken = currentToken = null;      }            // INTERVAL OPERATOR:      //  {x} | {x,} | {x,y}  (RE_INTERVALS && RE_NO_BK_BRACES)      //  \{x\} | \{x,\} | \{x,y\} (RE_INTERVALS && !RE_NO_BK_BRACES)      //      // OPEN QUESTION:       //  what is proper interpretation of '{' at start of string?      else if ((unit.ch == '{') && syntax.get(RESyntax.RE_INTERVALS) && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ (unit.bk || quot))) {	int newIndex = getMinMax(pattern,index,minMax,syntax);        if (newIndex > index) {          if (minMax.first > minMax.second)            throw new REException(getLocalizedMessage("interval.order"),REException.REG_BADRPT,newIndex);          if (currentToken == null)            throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,newIndex);          if (currentToken instanceof RETokenRepeated)             throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,newIndex);          if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)            throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,newIndex);          if ((currentToken.getMinimumLength() == 0) && (minMax.second == Integer.MAX_VALUE))            throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,newIndex);          index = newIndex;          currentToken = setRepeated(currentToken,minMax.first,minMax.second,index);         }        else {          addToken(currentToken);          currentToken = new RETokenChar(subIndex,unit.ch,insens);        }       }            // LIST OPERATOR:      //  [...] | [^...]      else if ((unit.ch == '[') && !(unit.bk || quot)) {	Vector options = new Vector();	boolean negative = false;	char lastChar = 0;	if (index == pLength) throw new REException(getLocalizedMessage("unmatched.bracket"),REException.REG_EBRACK,index);		// Check for initial caret, negation	if ((ch = pattern[index]) == '^') {	  negative = true;	  if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);	  ch = pattern[index];	}	// Check for leading right bracket literal	if (ch == ']') {	  lastChar = ch;	  if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);	}	while ((ch = pattern[index++]) != ']') {	  if ((ch == '-') && (lastChar != 0)) {	    if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);	    if ((ch = pattern[index]) == ']') {	      options.addElement(new RETokenChar(subIndex,lastChar,insens));	      lastChar = '-';	    } else {	      options.addElement(new RETokenRange(subIndex,lastChar,ch,insens));	      lastChar = 0;	      index++;	    }          } else if ((ch == '\\') && syntax.get(RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) {            if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);	    int posixID = -1;	    boolean negate = false;            char asciiEsc = 0;	    if (("dswDSW".indexOf(pattern[index]) != -1) && syntax.get(RESyntax.RE_CHAR_CLASS_ESC_IN_LISTS)) {	      switch (pattern[index]) {	      case 'D':		negate = true;	      case 'd':		posixID = RETokenPOSIX.DIGIT;		break;	      case 'S':		negate = true;	      case 's':		posixID = RETokenPOSIX.SPACE;		break;	      case 'W':		negate = true;	      case 'w':		posixID = RETokenPOSIX.ALNUM;		break;	      }	    }            else if ("nrt".indexOf(pattern[index]) != -1) {              switch (pattern[index]) {                case 'n':                  asciiEsc = '\n';                  break;                case 't':                  asciiEsc = '\t';                  break;                case 'r':                  asciiEsc = '\r';                  break;              }            }	    if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));	    	    if (posixID != -1) {	      options.addElement(new RETokenPOSIX(subIndex,posixID,insens,negate));	    } else if (asciiEsc != 0) {	      lastChar = asciiEsc;	    } else {	      lastChar = pattern[index];	    }	    ++index;	  } else if ((ch == '[') && (syntax.get(RESyntax.RE_CHAR_CLASSES)) && (index < pLength) && (pattern[index] == ':')) {	    StringBuffer posixSet = new StringBuffer();	    index = getPosixSet(pattern,index+1,posixSet);	    int posixId = RETokenPOSIX.intValue(posixSet.toString());	    if (posixId != -1)	      options.addElement(new RETokenPOSIX(subIndex,posixId,insens,false));	  } else {	    if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));	    lastChar = ch;	  }	  if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);	} // while in list	// Out of list, index is one past ']'	    	if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));	    	// Create a new RETokenOneOf	addToken(currentToken);	options.trimToSize();	currentToken = new RETokenOneOf(subIndex,options,negative);      }      // SUBEXPRESSIONS      //  (...) | \(...\) depending on RE_NO_BK_PARENS      else if ((unit.ch == '(') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) {	boolean pure = false;	boolean comment = false;        boolean lookAhead = false;        boolean negativelh = false;	if ((index+1 < pLength) && (pattern[index] == '?')) {	  switch (pattern[index+1]) {          case '!':            if (syntax.get(RESyntax.RE_LOOKAHEAD)) {              pure = true;              negativelh = true;              lookAhead = true;              index += 2;            }            break;          case '=':            if (syntax.get(RESyntax.RE_LOOKAHEAD)) {              pure = true;              lookAhead = true;              index += 2;            }            break;	  case ':':	    if (syntax.get(RESyntax.RE_PURE_GROUPING)) {	      pure = true;	      index += 2;	    }	    break;	  case '#':	    if (syntax.get(RESyntax.RE_COMMENTS)) {	      comment = true;	    }	    break;          default:            throw new REException(getLocalizedMessage("repeat.no.token"), REException.REG_BADRPT, index);	  }	}	if (index >= pLength) {	    throw new REException(getLocalizedMessage("unmatched.paren"), REException.REG_ESUBREG,index);	}	// find end of subexpression	int endIndex = index;	int nextIndex = index;	int nested = 0;	while ( ((nextIndex = getCharUnit(pattern,endIndex,unit,false)) > 0)		&& !(nested == 0 && (unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) )	  if ((endIndex = nextIndex) >= pLength)	    throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex);	  else if (unit.ch == '(' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))	    nested++;	  else if (unit.ch == ')' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))	    nested--;	// endIndex is now position at a ')','\)' 	// nextIndex is end of string or position after ')' or '\)'	if (comment) index = nextIndex;	else { // not a comment	  // create RE subexpression as token.	  addToken(currentToken);	  if (!pure) {	    numSubs++;	  }	  int useIndex = (pure || lookAhead) ? 0 : nextSub + numSubs;	  currentToken = new RE(String.valueOf(pattern,index,endIndex-index).toCharArray(),cflags,syntax,useIndex,nextSub + numSubs);	  numSubs += ((RE) currentToken).getNumSubs();          if (lookAhead) {	      currentToken = new RETokenLookAhead(currentToken,negativelh);	  }	  index = nextIndex;	} // not a comment      } // subexpression          // UNMATCHED RIGHT PAREN      // ) or \) throw exception if      // !syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD)      else if (!syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) && ((unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))) {	throw new REException(getLocalizedMessage("unmatched.paren"),REException.REG_EPAREN,index);      }      // START OF LINE OPERATOR      //  ^      else if ((unit.ch == '^') && !(unit.bk || quot)) {	addToken(currentToken);	currentToken = null;	addToken(new RETokenStart(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));      }      // END OF LINE OPERATOR      //  $      else if ((unit.ch == '$') && !(unit.bk || quot)) {	addToken(currentToken);	currentToken = null;	addToken(new RETokenEnd(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));      }      // MATCH-ANY-CHARACTER OPERATOR (except possibly newline and null)      //  .      else if ((unit.ch == '.') && !(unit.bk || quot)) {	addToken(currentToken);	currentToken = new RETokenAny(subIndex,syntax.get(RESyntax.RE_DOT_NEWLINE) || ((cflags & REG_DOT_NEWLINE) > 0),syntax.get(RESyntax.RE_DOT_NOT_NULL));      }      // ZERO-OR-MORE REPEAT OPERATOR      //  *      else if ((unit.ch == '*') && !(unit.bk || quot)) {	if (currentToken == null)          throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);	if (currentToken instanceof RETokenRepeated)          throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);	if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)	  throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);	if (currentToken.getMinimumLength() == 0)	  throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,index);	currentToken = setRepeated(currentToken,0,Integer.MAX_VALUE,index);      }      // ONE-OR-MORE REPEAT OPERATOR      //  + | \+ depending on RE_BK_PLUS_QM      //  not available if RE_LIMITED_OPS is set      else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {	if (currentToken == null)          throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);	if (currentToken instanceof RETokenRepeated)          throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);	if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)	  throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);	if (currentToken.getMinimumLength() == 0)	  throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,index);	currentToken = setRepeated(currentToken,1,Integer.MAX_VALUE,index);      }      // ZERO-OR-ONE REPEAT OPERATOR / STINGY MATCHING OPERATOR      //  ? | \? depending on RE_BK_PLUS_QM      //  not available if RE_LIMITED_OPS is set      //  stingy matching if RE_STINGY_OPS is set and it follows a quantifier      else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {	if (currentToken == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);	// Check for stingy matching on RETokenRepeated	if (currentToken instanceof RETokenRepeated) {          if (syntax.get(RESyntax.RE_STINGY_OPS) && !((RETokenRepeated)currentToken).isStingy())            ((RETokenRepeated)currentToken).makeStingy();          else            throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);        }        else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)          throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);	else	  currentToken = setRepeated(currentToken,0,1,index);      }	      // BACKREFERENCE OPERATOR      //  \1 \2 ... \9      // not available if RE_NO_BK_REFS is set      else if (unit.bk && Character.isDigit(unit.ch) && !syntax.get(RESyntax.RE_NO_BK_REFS)) {	addToken(currentToken);	currentToken = new RETokenBackRef(subIndex,Character.digit(unit.ch,10),insens);      }      // START OF STRING OPERATOR      //  \A if RE_STRING_ANCHORS is set            else if (unit.bk && (unit.ch == 'A') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {	addToken(currentToken);	currentToken = new RETokenStart(subIndex,null);      }      // WORD BREAK OPERATOR      //  \b if ????      else if (unit.bk && (unit.ch == 'b') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {	  addToken(currentToken);	  currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, false);      }       // WORD BEGIN OPERATOR
💿 文件大小 38649 K
👤 上传用户 liuchuyuan
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#Source #Code #Mac #for
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -