⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 perl5compiler.java

📁 java实现正则表达式的代码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
    if(__input._getOffset() == 0) {      __input._setOffset(-1);      __getNextChar();    } else {      __input._decrement();      __getNextChar();    }    value = __input._getValue();    while(value != CharStringPointer._END_OF_STRING &&	  value != '|' && value != ')') {      flags &= ~__TRYAGAIN;      latest = __parseBranch(retFlags);      if(latest == OpCode._NULL_OFFSET) {	if((flags & __TRYAGAIN) != 0){	  value = __input._getValue();	  continue;	}	return OpCode._NULL_OFFSET;      }      retFlags[0] |= (flags & __NONNULL);      if(chain == OpCode._NULL_OFFSET)	retFlags[0] |= (flags & __SPSTART);      else {	++__cost;	__programAddTail(chain, latest);      }      chain = latest;      value = __input._getValue();    }    // If loop was never entered.    if(chain == OpCode._NULL_OFFSET)      __emitNode(OpCode._NOTHING);    return offset;  }  private int __parseAtom(int[] retFlags) throws MalformedPatternException {    boolean doDefault;    char value;    int offset, flags[] = { 0 };            retFlags[0] = __WORSTCASE;    doDefault = false;    offset = OpCode._NULL_OFFSET;  tryAgain:    while(true) {      value = __input._getValue();      switch(value) {      case '^' :	__getNextChar();	// The order here is important in order to support /ms.	// /m takes precedence over /s for ^ and $, but not for .	if((__modifierFlags[0] & __MULTILINE) != 0)	  offset = __emitNode(OpCode._MBOL);	else if((__modifierFlags[0] & __SINGLELINE) != 0)	  offset = __emitNode(OpCode._SBOL);	else	  offset = __emitNode(OpCode._BOL);	break tryAgain;      case '$':	__getNextChar();	// The order here is important in order to support /ms.	// /m takes precedence over /s for ^ and $, but not for .	if((__modifierFlags[0] & __MULTILINE) != 0)	  offset = __emitNode(OpCode._MEOL);	else if((__modifierFlags[0] & __SINGLELINE) != 0)	  offset = __emitNode(OpCode._SEOL);	else	  offset = __emitNode(OpCode._EOL);	break tryAgain;      case '.':	__getNextChar();	// The order here is important in order to support /ms.	// /m takes precedence over /s for ^ and $, but not for .	if((__modifierFlags[0] & __SINGLELINE) != 0)	  offset = __emitNode(OpCode._SANY);	else	  offset = __emitNode(OpCode._ANY);	++__cost;	retFlags[0] |= (__NONNULL | __SIMPLE);	break tryAgain;      case '[':	__input._increment();	offset = __parseUnicodeClass();	retFlags[0] |= (__NONNULL | __SIMPLE);	break tryAgain;      case '(':	__getNextChar();	offset = __parseExpression(true, flags);	if(offset == OpCode._NULL_OFFSET) {	  if((flags[0] & __TRYAGAIN) != 0)	    continue tryAgain;	  return OpCode._NULL_OFFSET;	}	retFlags[0] |= (flags[0] & (__NONNULL | __SPSTART));	break tryAgain;      case '|':      case ')':	if((flags[0] & __TRYAGAIN) != 0) {	  retFlags[0] |= __TRYAGAIN;	  return OpCode._NULL_OFFSET;	}	throw new MalformedPatternException("Error in expression at " +				   __input._toString(__input._getOffset()));	//break tryAgain;      case '?':      case '+':      case '*':	throw new MalformedPatternException(                 "?+* follows nothing in expression");	//break tryAgain;      case '\\':	value = __input._increment();	switch(value) {	case 'A' :	  offset = __emitNode(OpCode._SBOL);	  retFlags[0] |= __SIMPLE;	  __getNextChar();	  break;	case 'G':	  offset = __emitNode(OpCode._GBOL);	  retFlags[0] |= __SIMPLE;	  __getNextChar();	  break;	case 'Z':	  offset = __emitNode(OpCode._SEOL);	  retFlags[0] |= __SIMPLE;	  __getNextChar();	  break;	case 'w':	  offset = __emitNode(OpCode._ALNUM);	  retFlags[0] |= (__NONNULL | __SIMPLE);	  __getNextChar();	  break;	case 'W':	  offset = __emitNode(OpCode._NALNUM);	  retFlags[0] |= (__NONNULL | __SIMPLE);	  __getNextChar();	  break;	case 'b':	  offset = __emitNode(OpCode._BOUND);	  retFlags[0] |= __SIMPLE;	  __getNextChar();	  break;	case 'B':	  offset = __emitNode(OpCode._NBOUND);	  retFlags[0] |= __SIMPLE;	  __getNextChar();	  break;	case 's':	  offset = __emitNode(OpCode._SPACE);	  retFlags[0] |= (__NONNULL | __SIMPLE);	  __getNextChar();	  break;	case 'S':	  offset = __emitNode(OpCode._NSPACE);	  retFlags[0] |= (__NONNULL | __SIMPLE);	  __getNextChar();	  break;	case 'd':	  offset = __emitNode(OpCode._DIGIT);	  retFlags[0] |= (__NONNULL | __SIMPLE);	  __getNextChar();	  break;	case 'D':	  offset = __emitNode(OpCode._NDIGIT);	  retFlags[0] |= (__NONNULL | __SIMPLE);	  __getNextChar();	  break;	case 'n': case 'r': case 't': case 'f': case 'e': case 'a': case 'x':	case 'c': case '0':	  doDefault = true;	  break tryAgain;	case '1': case '2': case '3': case '4': case '5': case '6': case '7':	case '8': case '9':	  int num;	  StringBuffer buffer = new StringBuffer(10);	  num = 0;	  value = __input._getValueRelative(num);	  while(Character.isDigit(value)) {	    buffer.append(value);	    ++num;	    value = __input._getValueRelative(num);	  }	  try {	    num = Integer.parseInt(buffer.toString());	  } catch(NumberFormatException e) {	    throw new MalformedPatternException(	   "Unexpected number format exception.  Please report this bug." +	   "NumberFormatException message: " + e.getMessage());	  }	  if(num > 9 && num >= __numParentheses) {	    doDefault = true;	    break tryAgain;	  } else {	    // A backreference may only occur AFTER its group	    if(num >= __numParentheses)	      throw new MalformedPatternException("Invalid backreference: \\" +						  num);	    __sawBackreference = true;	    offset = __emitArgNode(OpCode._REF, (char)num);	    retFlags[0] |= __NONNULL;	    value = __input._getValue();	    while(Character.isDigit(value))	      value = __input._increment();	    __input._decrement();	    __getNextChar();	  }	  break;	case '\0':	case CharStringPointer._END_OF_STRING:	  if(__input._isAtEnd())	    throw new	      MalformedPatternException("Trailing \\ in expression.");	// fall through to default	default:	  doDefault = true;	  break tryAgain;	}	break tryAgain;      case '#':	// skip over comments	if((__modifierFlags[0] & __EXTENDED) != 0) {	  while(!__input._isAtEnd() && __input._getValue() != '\n')	    __input._increment();	  if(!__input._isAtEnd())	    continue tryAgain;	}	// fall through to default      default:	__input._increment();	doDefault = true;	break tryAgain;      }// end master switch    } // end tryAgain    if(doDefault) {      char ender;      int length, pOffset, maxOffset, lastOffset, numLength[];      offset = __emitNode(OpCode._EXACTLY);      // Not sure that it's ok to use 0 to mark end.      //__emitCode((char)0);      __emitCode((char)CharStringPointer._END_OF_STRING);    forLoop:      for(length = 0, pOffset = __input._getOffset() - 1,	    maxOffset = __input._getLength();	  length < 127 && pOffset < maxOffset; ++length) {	lastOffset = pOffset;	value = __input._getValue(pOffset);	switch(value) {	case '^': case '$': case '.': case '[': case '(': case ')':	case '|':	  break forLoop;	case '\\':	  value = __input._getValue(++pOffset);	  switch(value) {	  case 'A': case 'G': case 'Z': case 'w': case 'W': case 'b':	  case 'B': case 's': case 'S': case 'd': case 'D':	    --pOffset;	    break forLoop;	  case 'n':	    ender = '\n';	    ++pOffset;	    break;	  case 'r':	    ender = '\r';	    ++pOffset;	    break;	  case 't':	    ender = '\t';	    ++pOffset;	    break;	  case 'f':	    ender = '\f';	    ++pOffset;	    break;	  case 'e':	    ender = '\033';	    ++pOffset;	    break;	  case 'a':	    ender = '\007';	    ++pOffset;	    break;	  case 'x':	    numLength = new int[1];	    ender = (char)__parseHex(__input._array, ++pOffset, 2, numLength);	    pOffset+=numLength[0];	    break;	  case 'c':	    ++pOffset;	    ender = __input._getValue(pOffset++);	    if(Character.isLowerCase(ender))	      ender = Character.toUpperCase(ender);	    ender ^= 64;	    break;	  case '0': case '1': case '2': case'3': case '4': case '5':	  case '6': case '7': case '8': case '9':	    boolean doOctal = false;	    value = __input._getValue(pOffset);	    if(value == '0')	      doOctal = true;	    value = __input._getValue(pOffset + 1);	    if(Character.isDigit(value)) {	      int num;	      StringBuffer buffer = new StringBuffer(10);	      num = pOffset;	      value = __input._getValue(num);	      while(Character.isDigit(value)){		buffer.append(value);		++num;		value = __input._getValue(num);	      }	      try {		num = Integer.parseInt(buffer.toString());	      } catch(NumberFormatException e) {		throw new MalformedPatternException(	     "Unexpected number format exception.  Please report this bug." +	     "NumberFormatException message: " + e.getMessage());	      }	      if(!doOctal)		doOctal = (num >= __numParentheses);	    }	    if(doOctal) {	      numLength = new int[1];	      ender = (char)__parseOctal(__input._array, pOffset, 3, numLength);	      pOffset+=numLength[0];	    } else {	      --pOffset;	      break forLoop;	    }	    break;	  case CharStringPointer._END_OF_STRING:	  case '\0':	    if(pOffset >= maxOffset)	      throw new		MalformedPatternException("Trailing \\ in expression.");	    // fall through to default	  default:	    ender = __input._getValue(pOffset++);	    break;	  } // end backslash switch	  break;	case '#':	  if((__modifierFlags[0] & __EXTENDED) != 0) {	    while(pOffset < maxOffset && __input._getValue(pOffset) != '\n')	      ++pOffset;	  }	  // fall through to whitespace handling	case ' ': case '\t': case '\n': case '\r': case '\f': case '\013':	  if((__modifierFlags[0] & __EXTENDED) != 0) {	    ++pOffset;	    --length;	    continue;	  }	  // fall through to default	default:	  ender = __input._getValue(pOffset++);	  break;	}   // end master switch	if((__modifierFlags[0] & __CASE_INSENSITIVE) != 0 &&	   Character.isUpperCase(ender))	  ender = Character.toLowerCase(ender);	if(pOffset < maxOffset && __isComplexRepetitionOp(__input._array, pOffset)) {	  if(length > 0)	    pOffset = lastOffset;	  else {	    ++length;	    __emitCode(ender);	  }	  break;	}	__emitCode(ender);      } // end for loop      __input._setOffset(pOffset - 1);      __getNextChar();      if(length < 0)	throw new MalformedPatternException(         "Unexpected compilation failure.  Please report this bug!");      if(length > 0)	retFlags[0] |= __NONNULL;      if(length == 1)	retFlags[0] |= __SIMPLE;      if(__program!= null)	__program[OpCode._getOperand(offset)] = (char)length;      //__emitCode('\0'); // debug      __emitCode(CharStringPointer._END_OF_STRING);    }    return offset;  }  // These are the original 8-bit character class handling methods.  // We don't want to delete them just yet only to have to dig it out  // of revision control later.  /*  // Set the bits in a character class.  Only recognizes ascii.  private void __setCharacterClassBits(char[] bits, int offset, char deflt,				       char ch)  {    if(__program== null || ch >= 256)      return;    ch &= 0xffff;    if(deflt == 0) {      bits[offset + (ch >> 4)] |= (1 << (ch & 0xf));    } else {      bits[offset + (ch >> 4)] &= ~(1 << (ch & 0xf));    }  }  private int __parseCharacterClass() throws MalformedPatternException {    boolean range = false, skipTest;    char clss, deflt, lastclss = Character.MAX_VALUE;    int offset, bits, numLength[] = { 0 };    offset = __emitNode(OpCode._ANYOF);    if(__input._getValue() == '^') {      ++__cost;      __input._increment();      deflt = 0;    } else {      deflt = 0xffff;    }    bits = __programSize;    for(clss = 0; clss < 16; clss++)      __emitCode(deflt);    clss = __input._getValue();    if(clss == ']' || clss == '-')      skipTest = true;    else      skipTest = false;    while((!__input._isAtEnd() && (clss = __input._getValue()) != ']')	  || skipTest) {      // It sucks, but we have to make this assignment every time      skipTest = false;      __input._increment();      if(clss == '\\') {	clss = __input._postIncrement();	switch(clss){	case 'w':	  for(clss = 0; clss < 256; clss++)	    if(OpCode._isWordCharacter(clss))	      __setCharacterClassBits(__program, bits, deflt, clss);	  lastclss = Character.MAX_VALUE;	  continue;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -