⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 perl5compiler.java

📁 java实现正则表达式的代码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
	case 'W':	  for(clss = 0; clss < 256; clss++)	    if(!OpCode._isWordCharacter(clss))	      __setCharacterClassBits(__program, bits, deflt, clss);	  lastclss = Character.MAX_VALUE;	  continue;	case 's':	  for(clss = 0; clss < 256; clss++)	    if(Character.isWhitespace(clss))	      __setCharacterClassBits(__program, bits, deflt, clss);	  lastclss = Character.MAX_VALUE;	  continue;	case 'S':	  for(clss = 0; clss < 256; clss++)	    if(!Character.isWhitespace(clss))	      __setCharacterClassBits(__program, bits, deflt, clss);	  lastclss = Character.MAX_VALUE;	  continue;	case 'd':	  for(clss = '0'; clss <= '9'; clss++)	    __setCharacterClassBits(__program, bits, deflt, clss);	  lastclss = Character.MAX_VALUE;	  continue;	case 'D':	  for(clss = 0; clss < '0'; clss++)	    __setCharacterClassBits(__program, bits, deflt, clss);	  for(clss = (char)('9' + 1); clss < 256; clss++)	    __setCharacterClassBits(__program, bits, deflt, clss);	  lastclss = Character.MAX_VALUE;	  continue;	case 'n':	  clss = '\n';	  break;	case 'r':	  clss = '\r';	  break;	case 't':	  clss = '\t';	  break;	case 'f':	  clss = '\f';	  break;	case 'b':	  clss = '\b';	  break;	case 'e':	  clss = '\033';	  break;	case 'a':	  clss = '\007';	  break;	case 'x':	  clss = (char)__parseHex(__input._array, __input._getOffset(), 2,				  numLength);	  __input._increment(numLength[0]);	  break;	case 'c':	  clss = __input._postIncrement();	  if(Character.isLowerCase(clss))	    clss = Character.toUpperCase(clss);	  clss ^= 64;	  break;	case '0': case '1': case '2': case '3': case '4':	case '5': case '6': case '7': case '8': case '9':	  clss = (char)__parseOctal(__input._array, __input._getOffset() - 1,				    3, numLength);	  __input._increment(numLength[0] - 1);	  break;	}      }      if(range) {	if(lastclss > clss)	  throw new MalformedPatternException(			 "Invalid [] range in expression.");	range = false;      } else {	lastclss = clss;	if(__input._getValue() == '-' &&	   __input._getOffset() + 1 < __input._getLength() &&	   __input._getValueRelative(1) != ']') {	  __input._increment();	  range = true;	  continue;	}      }      while(lastclss <= clss) {	__setCharacterClassBits(__program, bits, deflt, lastclss);	if((__modifierFlags[0] & __CASE_INSENSITIVE) != 0 &&	   Character.isUpperCase(lastclss))	  __setCharacterClassBits(__program, bits, deflt,				 Character.toLowerCase(lastclss));	++lastclss;      }      lastclss = clss;    }    if(__input._getValue() != ']')      throw new MalformedPatternException("Unmatched [] in expression.");    __getNextChar();    return offset;  }  */  private int __parseUnicodeClass() throws MalformedPatternException {    boolean range = false, skipTest;    char clss, lastclss = Character.MAX_VALUE;    int offset, numLength[] = { 0 };    boolean negFlag[] = { false };    boolean opcodeFlag; /* clss isn't character when this flag true. */    if(__input._getValue() == '^') {      offset = __emitNode(OpCode._NANYOFUN);      __input._increment();    } else {      offset = __emitNode(OpCode._ANYOFUN);    }    clss = __input._getValue();    if(clss == ']' || clss == '-')      skipTest = true;    else      skipTest = false;    while((!__input._isAtEnd() && (clss = __input._getValue()) != ']')	  || skipTest)      {	// It sucks, but we have to make this assignment every time	skipTest = false;	opcodeFlag = false;	__input._increment();	if(clss == '\\' || clss == '[') {	  if(clss == '\\') {	    /* character is escaped */	    clss = __input._postIncrement();	  } else {	    /* try POSIX expression */	    char posixOpCode = __parsePOSIX(negFlag);	    if(posixOpCode != 0){	      opcodeFlag = true;	      clss = posixOpCode;	    }	  }	  if (opcodeFlag != true) {	    switch(clss){	    case 'w':	      opcodeFlag = true;	      clss = OpCode._ALNUM;	      lastclss = Character.MAX_VALUE;	      break;	    case 'W':	      opcodeFlag = true;	      clss = OpCode._NALNUM;	      lastclss = Character.MAX_VALUE;	      break;	    case 's':	      opcodeFlag = true;	      clss = OpCode._SPACE;	      lastclss = Character.MAX_VALUE;	      break;	    case 'S':	      opcodeFlag = true;	      clss = OpCode._NSPACE;	      lastclss = Character.MAX_VALUE;	      break;	    case 'd':	      opcodeFlag = true;	      clss = OpCode._DIGIT;	      lastclss = Character.MAX_VALUE;	      break;	    case 'D':	      opcodeFlag = true;	      clss = OpCode._NDIGIT;	      lastclss = Character.MAX_VALUE;	      break;	    case 'n':	      clss = '\n';	      break;	    case 'r':	      clss = '\r';	      break;	    case 't':	      clss = '\t';	      break;	    case 'f':	      clss = '\f';	      break;	    case 'b':	      clss = '\b';	      break;	    case 'e':	      clss = '\033';	      break;	    case 'a':	      clss = '\007';	      break;	    case 'x':	      clss = (char)__parseHex(__input._array, __input._getOffset(), 2,				      numLength);	      __input._increment(numLength[0]);	      break;	    case 'c':	      clss = __input._postIncrement();	      if(Character.isLowerCase(clss))		clss = Character.toUpperCase(clss);	      clss ^= 64;	      break;	    case '0': case '1': case '2': case '3': case '4':	    case '5': case '6': case '7': case '8': case '9':	      clss = 		(char)__parseOctal(__input._array,				   __input._getOffset() - 1, 3, numLength);	      __input._increment(numLength[0] - 1);	      break;	    default:	      break;	    }	  }	}	if(range) {	  if(lastclss > clss)	    throw new MalformedPatternException(					"Invalid [] range in expression.");	  range = false;	} else {	  lastclss = clss;	  if(opcodeFlag == false &&             __input._getValue() == '-' &&	     __input._getOffset() + 1 < __input._getLength() &&	     __input._getValueRelative(1) != ']') {	    __input._increment();	    range = true;	    continue;	  }	}	if(lastclss == clss) {	  if(opcodeFlag == true) {	    if(negFlag[0] == false)	      __emitCode(OpCode._OPCODE);	    else 	      __emitCode(OpCode._NOPCODE);	  } else	    __emitCode(OpCode._ONECHAR);	  __emitCode(clss);	  if((__modifierFlags[0] & __CASE_INSENSITIVE) != 0 &&	     Character.isUpperCase(clss) && Character.isUpperCase(lastclss)){	    __programSize--;	    __emitCode(Character.toLowerCase(clss));	  }	}	if(lastclss < clss) {	  __emitCode(OpCode._RANGE);	  __emitCode(lastclss);	  __emitCode(clss);	  if((__modifierFlags[0] & __CASE_INSENSITIVE) != 0 &&	     Character.isUpperCase(clss) && Character.isUpperCase(lastclss)){	    __programSize-=2;	    __emitCode(Character.toLowerCase(lastclss));	    __emitCode(Character.toLowerCase(clss));	  }	  lastclss = Character.MAX_VALUE;	  range = false;	}    	lastclss = clss;      }    if(__input._getValue() != ']')      throw new MalformedPatternException("Unmatched [] in expression.");    __getNextChar();    __emitCode(OpCode._END);    return offset;  }  /**   * Parse POSIX epxression like [:foo:].    *    * @return OpCode. return 0 when fail parsing POSIX expression.   */  private char __parsePOSIX(boolean negFlag[])    throws MalformedPatternException   {    int offset = __input._getOffset();    int len = __input._getLength();    int pos = offset;    char value = __input._getValue(pos++);    StringBuffer buf;    Object opcode;    if( value != ':' ) return 0;    if( __input._getValue(pos) == '^' ) {      negFlag[0] = true;      pos++;    } else {      negFlag[0] = false;    }    buf = new StringBuffer();        try {       while ( (value = __input._getValue(pos++)) != ':' && pos < len) {	buf.append(value);	      }    } catch (Exception e){      return 0;    }    if( __input._getValue(pos++) != ']'){      return 0;    }    opcode = __hashPOSIX.get(buf.toString());    if( opcode == null )      return 0;    __input._setOffset(pos);        return ((Character)opcode).charValue();  }  private int __parseBranch(int[] retFlags) throws MalformedPatternException {    boolean nestCheck = false, handleRepetition = false;    int offset, next, min, max, flags[] = { 0 };    char operator, value;    min = 0;    max = Character.MAX_VALUE;    offset = __parseAtom(flags);    if(offset == OpCode._NULL_OFFSET) {      if((flags[0] & __TRYAGAIN) != 0)	retFlags[0] |= __TRYAGAIN;      return OpCode._NULL_OFFSET;    }    operator = __input._getValue();    if(operator == '(' && __input._getValueRelative(1) == '?' &&       __input._getValueRelative(2) == '#') {      while(operator != CharStringPointer._END_OF_STRING && operator != ')')	operator = __input._increment();      if(operator != CharStringPointer._END_OF_STRING) {	__getNextChar();	operator = __input._getValue();      }    }    if(operator == '{' &&       __parseRepetition(__input._array, __input._getOffset())) {      int maxOffset, pos;      next = __input._getOffset() + 1;      pos = maxOffset = __input._getLength();      value = __input._getValue(next);      while(Character.isDigit(value) || value == ',') {	if(value == ',') {	  if(pos != maxOffset)	    break;	  else	    pos = next;	}	++next;	value = __input._getValue(next);      }      if(value == '}') {	int num;	StringBuffer buffer = new StringBuffer(10);	if(pos == maxOffset)	  pos = next;	__input._increment();	num = __input._getOffset();	value = __input._getValue(num);	while(Character.isDigit(value)) {	  buffer.append(value);	  ++num;	  value = __input._getValue(num);	}	try {	  min = Integer.parseInt(buffer.toString());	} catch(NumberFormatException e) {	  throw new MalformedPatternException(	 "Unexpected number format exception.  Please report this bug." +	   "NumberFormatException message: " + e.getMessage());	}	value = __input._getValue(pos);	if(value == ',')	  ++pos;	else	  pos = __input._getOffset();	num = pos;	buffer = new StringBuffer(10);	value = __input._getValue(num);	while(Character.isDigit(value)){	  buffer.append(value);	  ++num;	  value = __input._getValue(num);	}	try {	  if(num != pos)	    max = Integer.parseInt(buffer.toString());	} catch(NumberFormatException e) {	  throw new MalformedPatternException(	 "Unexpected number format exception.  Please report this bug." +	   "NumberFormatException message: " + e.getMessage());	}	if(max == 0 && __input._getValue(pos) != '0')	  max = Character.MAX_VALUE;	__input._setOffset(next);	__getNextChar();	nestCheck = true;	handleRepetition = true;      }    }    if(!nestCheck) {      handleRepetition = false;      if(!__isSimpleRepetitionOp(operator)) {	retFlags[0] = flags[0];	return offset;      }      __getNextChar();      retFlags[0] = ((operator != '+') ?		  (__WORSTCASE | __SPSTART) : (__WORSTCASE | __NONNULL));      if(operator == '*' && ((flags[0] & __SIMPLE) != 0)) {	__programInsertOperator(OpCode._STAR, offset);	__cost+=4;      } else if(operator == '*') {	min = 0;	handleRepetition = true;      } else if(operator == '+' && (flags[0] & __SIMPLE) != 0) {	__programInsertOperator(OpCode._PLUS, offset);	__cost+=3;      } else if(operator == '+') {	min = 1;	handleRepetition = true;      } else if(operator == '?') {	min = 0;	max = 1;	handleRepetition = true;      }    }    if(handleRepetition) {      // handle repetition      if((flags[0] & __SIMPLE) != 0){	__cost+= ((2 + __cost) / 2);	__programInsertOperator(OpCode._CURLY, offset);      } else {	__cost += (4 + __cost);	__programAddTail(offset, __emitNode(OpCode._WHILEM));	__programInsertOperator(OpCode._CURLYX, offset);	__programAddTail(offset, __emitNode(OpCode._NOTHING));      }      if(min > 0)	retFlags[0] = (__WORSTCASE | __NONNULL);      if(max != 0 && max < min)	throw new MalformedPatternException(       "Invalid interval {" + min + "," + max + "}");

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -