📄 re.java

📁 linux下建立JAVA虚拟机的源码KAFFE
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
	}	else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)	  throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);	else	  currentToken = setRepeated(currentToken,1,Integer.MAX_VALUE,index);      }      // ZERO-OR-ONE REPEAT OPERATOR / STINGY MATCHING OPERATOR      //  ? | \? depending on RE_BK_PLUS_QM      //  not available if RE_LIMITED_OPS is set      //  stingy matching if RE_STINGY_OPS is set and it follows a quantifier      else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {	if (currentToken == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);	// Check for stingy matching on RETokenRepeated	if (currentToken instanceof RETokenRepeated) {	  RETokenRepeated tokenRep = (RETokenRepeated)currentToken;	  if (syntax.get(RESyntax.RE_STINGY_OPS) && !tokenRep.isStingy() && !tokenRep.isPossessive())	    tokenRep.makeStingy();	  else	    throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);	}	else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)	  throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);	else	  currentToken = setRepeated(currentToken,0,1,index);      }      // OCTAL CHARACTER      //  \0377	      else if (unit.bk && (unit.ch == '0') && syntax.get(RESyntax.RE_OCTAL_CHAR)) {	CharExpression ce = getCharExpression(pattern, index - 2, pLength, syntax);	if (ce == null)	  throw new REException("invalid octal character", REException.REG_ESCAPE, index);	index = index - 2 + ce.len;	addToken(currentToken);	currentToken = new RETokenChar(subIndex,ce.ch,insens);      }      // BACKREFERENCE OPERATOR      //  \1 \2 ... \9 and \10 \11 \12 ...      // not available if RE_NO_BK_REFS is set      // Perl recognizes \10, \11, and so on only if enough number of      // parentheses have opened before it, otherwise they are treated      // as aliases of \010, \011, ... (octal characters).  In case of      // Sun's JDK, octal character expression must always begin with \0.      // We will do as JDK does. But FIXME, take a look at "(a)(b)\29".      // JDK treats \2 as a back reference to the 2nd group because      // there are only two groups. But in our poor implementation,      // we cannot help but treat \29 as a back reference to the 29th group.      else if (unit.bk && Character.isDigit(unit.ch) && !syntax.get(RESyntax.RE_NO_BK_REFS)) {	addToken(currentToken);	int numBegin = index - 1;	int numEnd = pLength;	for (int i = index; i < pLength; i++) {	    if (! Character.isDigit(pattern[i])) {		numEnd = i;		break;	    }	}	int num = parseInt(pattern, numBegin, numEnd-numBegin, 10);	currentToken = new RETokenBackRef(subIndex,num,insens);	index = numEnd;      }      // START OF STRING OPERATOR      //  \A if RE_STRING_ANCHORS is set            else if (unit.bk && (unit.ch == 'A') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {	addToken(currentToken);	currentToken = new RETokenStart(subIndex,null);      }      // WORD BREAK OPERATOR      //  \b if ????      else if (unit.bk && (unit.ch == 'b') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {	  addToken(currentToken);	  currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, false);      }       // WORD BEGIN OPERATOR       //  \< if ????      else if (unit.bk && (unit.ch == '<')) {	  addToken(currentToken);	  currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN, false);      }       // WORD END OPERATOR       //  \> if ????      else if (unit.bk && (unit.ch == '>')) {	  addToken(currentToken);	  currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.END, false);      }       // NON-WORD BREAK OPERATOR      // \B if ????      else if (unit.bk && (unit.ch == 'B') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {	  addToken(currentToken);	  currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, true);      }             // DIGIT OPERATOR      //  \d if RE_CHAR_CLASS_ESCAPES is set            else if (unit.bk && (unit.ch == 'd') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {	addToken(currentToken);	currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,false);      }      // NON-DIGIT OPERATOR      //  \D	else if (unit.bk && (unit.ch == 'D') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {	  addToken(currentToken);	  currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,true);	}	// NEWLINE ESCAPE        //  \n	else if (unit.bk && (unit.ch == 'n')) {	  addToken(currentToken);	  currentToken = new RETokenChar(subIndex,'\n',false);	}	// RETURN ESCAPE        //  \r	else if (unit.bk && (unit.ch == 'r')) {	  addToken(currentToken);	  currentToken = new RETokenChar(subIndex,'\r',false);	}	// WHITESPACE OPERATOR        //  \s if RE_CHAR_CLASS_ESCAPES is set	else if (unit.bk && (unit.ch == 's') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {	  addToken(currentToken);	  currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,false);	}	// NON-WHITESPACE OPERATOR        //  \S	else if (unit.bk && (unit.ch == 'S') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {	  addToken(currentToken);	  currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,true);	}	// TAB ESCAPE        //  \t	else if (unit.bk && (unit.ch == 't')) {	  addToken(currentToken);	  currentToken = new RETokenChar(subIndex,'\t',false);	}	// ALPHANUMERIC OPERATOR        //  \w	else if (unit.bk && (unit.ch == 'w') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {	  addToken(currentToken);	  currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,false);	}	// NON-ALPHANUMERIC OPERATOR        //  \W	else if (unit.bk && (unit.ch == 'W') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {	  addToken(currentToken);	  currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,true);	}	// END OF STRING OPERATOR        //  \Z	else if (unit.bk && (unit.ch == 'Z') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {	  addToken(currentToken);	  currentToken = new RETokenEnd(subIndex,null);	}        // HEX CHARACTER, UNICODE CHARACTER        //  \x1B, \u1234		else if ((unit.bk && (unit.ch == 'x') && syntax.get(RESyntax.RE_HEX_CHAR)) ||		 (unit.bk && (unit.ch == 'u') && syntax.get(RESyntax.RE_UNICODE_CHAR))) {	  CharExpression ce = getCharExpression(pattern, index - 2, pLength, syntax);	  if (ce == null)	    throw new REException("invalid hex character", REException.REG_ESCAPE, index);	  index = index - 2 + ce.len;	  addToken(currentToken);	  currentToken = new RETokenChar(subIndex,ce.ch,insens);	}	// NAMED PROPERTY	// \p{prop}, \P{prop}	else if ((unit.bk && (unit.ch == 'p') && syntax.get(RESyntax.RE_NAMED_PROPERTY)) ||	         (unit.bk && (unit.ch == 'P') && syntax.get(RESyntax.RE_NAMED_PROPERTY))) {	  NamedProperty np = getNamedProperty(pattern, index - 2, pLength);	  if (np == null)	      throw new REException("invalid escape sequence", REException.REG_ESCAPE, index);	  index = index - 2 + np.len;	  addToken(currentToken);	  currentToken = getRETokenNamedProperty(subIndex,np,insens,index);	}	// NON-SPECIAL CHARACTER (or escape to make literal)        //  c | \* for example	else {  // not a special character	  addToken(currentToken);	  currentToken = new RETokenChar(subIndex,unit.ch,insens);	}       } // end while    // Add final buffered token and an EndSub marker    addToken(currentToken);          if (branches != null) {	branches.addElement(new RE(firstToken,lastToken,numSubs,subIndex,minimumLength, maximumLength));	branches.trimToSize(); // compact the Vector	minimumLength = 0;	maximumLength = 0;	firstToken = lastToken = null;	addToken(new RETokenOneOf(subIndex,branches,false));    }     else addToken(new RETokenEndSub(subIndex));  }  private static class ParseCharClassResult {      RETokenOneOf token;      int index;      boolean returnAtAndOperator = false;  }  /**   * Parse [...] or [^...] and make an RETokenOneOf instance.   * @param subIndex subIndex to be given to the created RETokenOneOf instance.   * @param pattern Input array of characters to be parsed.   * @param index Index pointing to the character next to the beginning '['.   * @param pLength Limit of the input array.   * @param cflags Compilation flags used to parse the pattern.   * @param pflags Flags that affect the behavior of this method.   * @param syntax Syntax used to parse the pattern.   */  private static ParseCharClassResult parseCharClass(int subIndex,		char[] pattern, int index,		int pLength, int cflags, RESyntax syntax, int pflags)		throws REException {	boolean insens = ((cflags & REG_ICASE) > 0);	Vector options = new Vector();	Vector addition = new Vector();	boolean additionAndAppeared = false;	final int RETURN_AT_AND = 0x01;	boolean returnAtAndOperator = ((pflags & RETURN_AT_AND) != 0);	boolean negative = false;	char ch;	char lastChar = 0;	boolean lastCharIsSet = false;	if (index == pLength) throw new REException(getLocalizedMessage("unmatched.bracket"),REException.REG_EBRACK,index);		// Check for initial caret, negation	if ((ch = pattern[index]) == '^') {	  negative = true;	  if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);	  ch = pattern[index];	}	// Check for leading right bracket literal	if (ch == ']') {	  lastChar = ch; lastCharIsSet = true;	  if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);	}	while ((ch = pattern[index++]) != ']') {	  if ((ch == '-') && (lastCharIsSet)) {	    if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);	    if ((ch = pattern[index]) == ']') {	      options.addElement(new RETokenChar(subIndex,lastChar,insens));	      lastChar = '-';	    } else {	      if ((ch == '\\') && syntax.get(RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) {	        CharExpression ce = getCharExpression(pattern, index, pLength, syntax);	        if (ce == null)		  throw new REException("invalid escape sequence", REException.REG_ESCAPE, index);		ch = ce.ch;		index = index + ce.len - 1;	      }	      options.addElement(new RETokenRange(subIndex,lastChar,ch,insens));	      lastChar = 0; lastCharIsSet = false;	      index++;	    }          } else if ((ch == '\\') && syntax.get(RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) {            if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);	    int posixID = -1;	    boolean negate = false;            char asciiEsc = 0;	    boolean asciiEscIsSet = false;	    NamedProperty np = null;	    if (("dswDSW".indexOf(pattern[index]) != -1) && syntax.get(RESyntax.RE_CHAR_CLASS_ESC_IN_LISTS)) {	      switch (pattern[index]) {	      case 'D':		negate = true;	      case 'd':		posixID = RETokenPOSIX.DIGIT;		break;	      case 'S':		negate = true;	      case 's':		posixID = RETokenPOSIX.SPACE;		break;	      case 'W':		negate = true;	      case 'w':		posixID = RETokenPOSIX.ALNUM;		break;	      }	    }	    if (("pP".indexOf(pattern[index]) != -1) && syntax.get(RESyntax.RE_NAMED_PROPERTY)) {	      np = getNamedProperty(pattern, index - 1, pLength);	      if (np == null)		throw new REException("invalid escape sequence", REException.REG_ESCAPE, index);	      index = index - 1 + np.len - 1;	    }	    else {	      CharExpression ce = getCharExpression(pattern, index - 1, pLength, syntax);	      if (ce == null)		throw new REException("invalid escape sequence", REException.REG_ESCAPE, index);	      asciiEsc = ce.ch; asciiEscIsSet = true;	      index = index - 1 + ce.len - 1;	    }	    if (lastCharIsSet) options.addElement(new RETokenChar(subIndex,lastChar,insens));	    	    if (posixID != -1) {	      options.addElement(new RETokenPOSIX(subIndex,posixID,insens,negate));	    } else if (np != null) {	      options.addElement(getRETokenNamedProperty(subIndex,np,insens,index));	    } else if (asciiEscIsSet) {	      lastChar = asciiEsc; lastCharIsSet = true;	    } else {	      lastChar = pattern[index]; lastCharIsSet = true;	    }	    ++index;	  } else if ((ch == '[') && (syntax.get(RESyntax.RE_CHAR_CLASSES)) && (index < pLength) && (pattern[index] == ':')) {	    StringBuffer posixSet = new StringBuffer();	    index = getPosixSet(pattern,index+1,posixSet);	    int posixId = RETokenPOSIX.intValue(posixSet.toString());	    if (posixId != -1)	      options.addElement(new RETokenPOSIX(subIndex,posixId,insens,false));	  } else if ((ch == '[') && (syntax.get(RESyntax.RE_NESTED_CHARCLASS))) {		ParseCharClassResult result = parseCharClass(		    subIndex, pattern, index, pLength, cflags, syntax, 0);		addition.addElement(result.token);		addition.addElement("|");		index = result.index;	  } else if ((ch == '&') &&		     (syntax.get(RESyntax.RE_NESTED_CHARCLASS)) &&		     (index < pLength) && (pattern[index] == '&')) {		if (returnAtAndOperator) {		    ParseCharClassResult result = new ParseCharClassResult(); 		    options.trimToSize();		    if (additionAndAppeared) addition.addElement("&");
💿 文件大小 14155 K
👤 上传用户 zcmm_321
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#linux #KAFFE #JAVA #虚拟机
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -