📄 re.java

📁 linux下建立JAVA虚拟机的源码KAFFE
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
      // ALTERNATION OPERATOR      //  \| or | (if RE_NO_BK_VBAR) or newline (if RE_NEWLINE_ALT)      //  not available if RE_LIMITED_OPS is set      // TODO: the '\n' literal here should be a test against REToken.newline,      // which unfortunately may be more than a single character.      if ( ( (unit.ch == '|' && (syntax.get(RESyntax.RE_NO_BK_VBAR) ^ (unit.bk || quot)))	     || (syntax.get(RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') && !(unit.bk || quot)) )	   && !syntax.get(RESyntax.RE_LIMITED_OPS)) {	// make everything up to here be a branch. create vector if nec.	addToken(currentToken);	RE theBranch = new RE(firstToken, lastToken, numSubs, subIndex, minimumLength, maximumLength);	minimumLength = 0;	maximumLength = 0;	if (branches == null) {	    branches = new Vector();	}	branches.addElement(theBranch);	firstToken = lastToken = currentToken = null;      }            // INTERVAL OPERATOR:      //  {x} | {x,} | {x,y}  (RE_INTERVALS && RE_NO_BK_BRACES)      //  \{x\} | \{x,\} | \{x,y\} (RE_INTERVALS && !RE_NO_BK_BRACES)      //      // OPEN QUESTION:       //  what is proper interpretation of '{' at start of string?      //      // This method used to check "repeat.empty.token" to avoid such regexp      // as "(a*){2,}", but now "repeat.empty.token" is allowed.      else if ((unit.ch == '{') && syntax.get(RESyntax.RE_INTERVALS) && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ (unit.bk || quot))) {	int newIndex = getMinMax(pattern,index,minMax,syntax);        if (newIndex > index) {          if (minMax.first > minMax.second)            throw new REException(getLocalizedMessage("interval.order"),REException.REG_BADRPT,newIndex);          if (currentToken == null)            throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,newIndex);          if (currentToken instanceof RETokenRepeated)             throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,newIndex);          if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)            throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,newIndex);          index = newIndex;          currentToken = setRepeated(currentToken,minMax.first,minMax.second,index);         }        else {          addToken(currentToken);          currentToken = new RETokenChar(subIndex,unit.ch,insens);        }       }            // LIST OPERATOR:      //  [...] | [^...]      else if ((unit.ch == '[') && !(unit.bk || quot)) {	// Create a new RETokenOneOf	ParseCharClassResult result = parseCharClass(		subIndex, pattern, index, pLength, cflags, syntax, 0);	addToken(currentToken);	currentToken = result.token;	index = result.index;      }      // SUBEXPRESSIONS      //  (...) | \(...\) depending on RE_NO_BK_PARENS      else if ((unit.ch == '(') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) {	boolean pure = false;	boolean comment = false;        boolean lookAhead = false;        boolean lookBehind = false;        boolean independent = false;        boolean negativelh = false;        boolean negativelb = false;	if ((index+1 < pLength) && (pattern[index] == '?')) {	  switch (pattern[index+1]) {          case '!':            if (syntax.get(RESyntax.RE_LOOKAHEAD)) {              pure = true;              negativelh = true;              lookAhead = true;              index += 2;            }            break;          case '=':            if (syntax.get(RESyntax.RE_LOOKAHEAD)) {              pure = true;              lookAhead = true;              index += 2;            }            break;	  case '<':	    // We assume that if the syntax supports look-ahead,	    // it also supports look-behind.	    if (syntax.get(RESyntax.RE_LOOKAHEAD)) {		index++;		switch (pattern[index +1]) {		case '!':		  pure = true;		  negativelb = true;		  lookBehind = true;		  index += 2;		  break;		case '=':		  pure = true;		  lookBehind = true;		  index += 2;		}	    }	    break;	  case '>':	    // We assume that if the syntax supports look-ahead,	    // it also supports independent group.            if (syntax.get(RESyntax.RE_LOOKAHEAD)) {              pure = true;              independent = true;              index += 2;            }            break;	  case 'i':	  case 'd':	  case 'm':	  case 's':	  // case 'u':  not supported	  // case 'x':  not supported	  case '-':            if (!syntax.get(RESyntax.RE_EMBEDDED_FLAGS)) break;	    // Set or reset syntax flags.	    int flagIndex = index + 1;	    int endFlag = -1;	    RESyntax newSyntax = new RESyntax(syntax);	    int newCflags = cflags;	    boolean negate = false;	    while (flagIndex < pLength && endFlag < 0) {	        switch(pattern[flagIndex]) {	  	case 'i':		  if (negate)		    newCflags &= ~REG_ICASE;		  else		    newCflags |= REG_ICASE;		  flagIndex++;		  break;	  	case 'd':		  if (negate)		    newSyntax.setLineSeparator(RESyntax.DEFAULT_LINE_SEPARATOR);		  else		    newSyntax.setLineSeparator("\n");		  flagIndex++;		  break;	  	case 'm':		  if (negate)		    newCflags &= ~REG_MULTILINE;		  else		    newCflags |= REG_MULTILINE;		  flagIndex++;		  break;	  	case 's':		  if (negate)		    newCflags &= ~REG_DOT_NEWLINE;		  else		    newCflags |= REG_DOT_NEWLINE;		  flagIndex++;		  break;	  	// case 'u': not supported	  	// case 'x': not supported	  	case '-':		  negate = true;		  flagIndex++;		  break;		case ':':		case ')':		  endFlag = pattern[flagIndex];		  break;		default:            	  throw new REException(getLocalizedMessage("repeat.no.token"), REException.REG_BADRPT, index);		}	    }	    if (endFlag == ')') {		syntax = newSyntax;		cflags = newCflags;		insens = ((cflags & REG_ICASE) > 0);		// This can be treated as though it were a comment.		comment = true;		index = flagIndex - 1;		break;	    }	    if (endFlag == ':') {		savedSyntax = syntax;		savedCflags = cflags;		flagsSaved = true;		syntax = newSyntax;		cflags = newCflags;		insens = ((cflags & REG_ICASE) > 0);		index = flagIndex -1;		// Fall through to the next case.	    }	    else {	        throw new REException(getLocalizedMessage("unmatched.paren"), REException.REG_ESUBREG,index);	    }	  case ':':	    if (syntax.get(RESyntax.RE_PURE_GROUPING)) {	      pure = true;	      index += 2;	    }	    break;	  case '#':	    if (syntax.get(RESyntax.RE_COMMENTS)) {	      comment = true;	    }	    break;          default:            throw new REException(getLocalizedMessage("repeat.no.token"), REException.REG_BADRPT, index);	  }	}	if (index >= pLength) {	    throw new REException(getLocalizedMessage("unmatched.paren"), REException.REG_ESUBREG,index);	}	// find end of subexpression	int endIndex = index;	int nextIndex = index;	int nested = 0;	while ( ((nextIndex = getCharUnit(pattern,endIndex,unit,false)) > 0)		&& !(nested == 0 && (unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) ) {	  if ((endIndex = nextIndex) >= pLength)	    throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex);	  else if ((unit.ch == '[') && !(unit.bk || quot)) {	    // I hate to do something similar to the LIST OPERATOR matters	    // above, but ...	    int listIndex = nextIndex;	    if (listIndex < pLength && pattern[listIndex] == '^') listIndex++;	    if (listIndex < pLength && pattern[listIndex] == ']') listIndex++;	    int listEndIndex = -1;	    int listNest = 0;	    while (listIndex < pLength && listEndIndex < 0) {	      switch(pattern[listIndex++]) {		case '\\':		  listIndex++;		  break;		case '[':		  // Sun's API document says that regexp like "[a-d[m-p]]"		  // is legal. Even something like "[[[^]]]]" is accepted.		  listNest++;		  if (listIndex < pLength && pattern[listIndex] == '^') listIndex++;		  if (listIndex < pLength && pattern[listIndex] == ']') listIndex++;		  break;		case ']':		  if (listNest == 0)		    listEndIndex = listIndex;		  listNest--;		  break;	      }	    }	    if (listEndIndex >= 0) {	      nextIndex = listEndIndex;	      if ((endIndex = nextIndex) >= pLength)	        throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex);	      else	        continue;	    }	    throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex);	  }	  else if (unit.ch == '(' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))	    nested++;	  else if (unit.ch == ')' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))	    nested--;	}	// endIndex is now position at a ')','\)' 	// nextIndex is end of string or position after ')' or '\)'	if (comment) index = nextIndex;	else { // not a comment	  // create RE subexpression as token.	  addToken(currentToken);	  if (!pure) {	    numSubs++;	  }	  int useIndex = (pure || lookAhead || lookBehind || independent) ?			 0 : nextSub + numSubs;	  currentToken = new RE(String.valueOf(pattern,index,endIndex-index).toCharArray(),cflags,syntax,useIndex,nextSub + numSubs);	  numSubs += ((RE) currentToken).getNumSubs();          if (lookAhead) {	      currentToken = new RETokenLookAhead(currentToken,negativelh);	  }          else if (lookBehind) {	      currentToken = new RETokenLookBehind(currentToken,negativelb);	  }          else if (independent) {	      currentToken = new RETokenIndependent(currentToken);	  }	  index = nextIndex;	  if (flagsSaved) {	      syntax = savedSyntax;	      cflags = savedCflags;	      insens = ((cflags & REG_ICASE) > 0);	      flagsSaved = false;	  }	} // not a comment      } // subexpression          // UNMATCHED RIGHT PAREN      // ) or \) throw exception if      // !syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD)      else if (!syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) && ((unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))) {	throw new REException(getLocalizedMessage("unmatched.paren"),REException.REG_EPAREN,index);      }      // START OF LINE OPERATOR      //  ^      else if ((unit.ch == '^') && !(unit.bk || quot)) {	addToken(currentToken);	currentToken = null;	addToken(new RETokenStart(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));      }      // END OF LINE OPERATOR      //  $      else if ((unit.ch == '$') && !(unit.bk || quot)) {	addToken(currentToken);	currentToken = null;	addToken(new RETokenEnd(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));      }      // MATCH-ANY-CHARACTER OPERATOR (except possibly newline and null)      //  .      else if ((unit.ch == '.') && !(unit.bk || quot)) {	addToken(currentToken);	currentToken = new RETokenAny(subIndex,syntax.get(RESyntax.RE_DOT_NEWLINE) || ((cflags & REG_DOT_NEWLINE) > 0),syntax.get(RESyntax.RE_DOT_NOT_NULL));      }      // ZERO-OR-MORE REPEAT OPERATOR      //  *      //      // This method used to check "repeat.empty.token" to avoid such regexp      // as "(a*)*", but now "repeat.empty.token" is allowed.      else if ((unit.ch == '*') && !(unit.bk || quot)) {	if (currentToken == null)          throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);	if (currentToken instanceof RETokenRepeated)          throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);	if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)	  throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);	currentToken = setRepeated(currentToken,0,Integer.MAX_VALUE,index);      }      // ONE-OR-MORE REPEAT OPERATOR / POSSESSIVE MATCHING OPERATOR      //  + | \+ depending on RE_BK_PLUS_QM      //  not available if RE_LIMITED_OPS is set      //      // This method used to check "repeat.empty.token" to avoid such regexp      // as "(a*)+", but now "repeat.empty.token" is allowed.      else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {	if (currentToken == null)          throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);		// Check for possessive matching on RETokenRepeated	if (currentToken instanceof RETokenRepeated) {	  RETokenRepeated tokenRep = (RETokenRepeated)currentToken;	  if (syntax.get(RESyntax.RE_POSSESSIVE_OPS) && !tokenRep.isPossessive() && !tokenRep.isStingy())	    tokenRep.makePossessive();	  else	    throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
💿 文件大小 14155 K
👤 上传用户 zcmm_321
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#linux #KAFFE #JAVA #虚拟机
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -