⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 perl5compiler.java

📁 java实现正则表达式的代码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
      if(__program!= null) {	__program[offset + 2] = (char)min;	__program[offset + 3] = (char)max;      }    }    if(__input._getValue() == '?') {      __getNextChar();      __programInsertOperator(OpCode._MINMOD, offset);      __programAddTail(offset, offset + 2);    }    if(__isComplexRepetitionOp(__input._array, __input._getOffset()))      throw new MalformedPatternException(        "Nested repetitions *?+ in expression");    return offset;  }  private int __parseExpression(boolean isParenthesized, int[] hintFlags)    throws MalformedPatternException {    char value, paren;    char[] modifierFlags, posFlags = { 0 }, negFlags = { 0 };    int nodeOffset = OpCode._NULL_OFFSET, parenthesisNum = 0, br, ender;    int[] flags = { 0 };;    String modifiers = "iogmsx-";    modifierFlags = posFlags;    // Initially we assume expression doesn't match null string.    hintFlags[0] = __NONNULL;    if (isParenthesized) {      paren = 1;      if(__input._getValue() == '?') {	__input._increment();	paren = value = __input._postIncrement();	switch(value) {	case ':' :	case '=' :	case '!' : break;	case '#' :	  value = __input._getValue();	  while(value != CharStringPointer._END_OF_STRING && value != ')')	    value = __input._increment();	  if(value != ')')	    throw new MalformedPatternException(	       "Sequence (?#... not terminated");	  __getNextChar();	  hintFlags[0] = __TRYAGAIN;	  return OpCode._NULL_OFFSET;	default :	  __input._decrement();	  value = __input._getValue();	  while(value != CharStringPointer._END_OF_STRING &&		modifiers.indexOf(value) != -1) {	    if(value == '-')	      modifierFlags = negFlags;	    else	      __setModifierFlag(modifierFlags, value);	    value = __input._increment();	  }	  __modifierFlags[0] |= posFlags[0];	  __modifierFlags[0] &= ~negFlags[0];	  if(value != ')')	    throw new MalformedPatternException(	       "Sequence (?" + value + "...) not recognized");	  __getNextChar();	  hintFlags[0] = __TRYAGAIN;	  return OpCode._NULL_OFFSET;	}      } else {	parenthesisNum = __numParentheses;	++__numParentheses;	nodeOffset = __emitArgNode(OpCode._OPEN, (char)parenthesisNum);      }    } else       paren = 0;    br = __parseAlternation(flags);    if(br == OpCode._NULL_OFFSET)      return OpCode._NULL_OFFSET;    if(nodeOffset != OpCode._NULL_OFFSET)      __programAddTail(nodeOffset, br);    else      nodeOffset = br;    if((flags[0] & __NONNULL) == 0)      hintFlags[0] &= ~__NONNULL;    hintFlags[0] |= (flags[0] & __SPSTART);    while(__input._getValue() == '|') {      __getNextChar();      br = __parseAlternation(flags);      if(br == OpCode._NULL_OFFSET)	return OpCode._NULL_OFFSET;      __programAddTail(nodeOffset, br);      if((flags[0] & __NONNULL) == 0)	hintFlags[0] &= ~__NONNULL;      hintFlags[0] |= (flags[0] & __SPSTART);    }    switch(paren) {    case ':' :      ender = __emitNode(OpCode._NOTHING);      break;    case 1:      ender = __emitArgNode(OpCode._CLOSE, (char)parenthesisNum);      break;    case '=':    case '!':      ender = __emitNode(OpCode._SUCCEED);      hintFlags[0] &= ~__NONNULL;      break;    case 0  :    default :      ender = __emitNode(OpCode._END);      break;    }    __programAddTail(nodeOffset, ender);    for(br = nodeOffset; br != OpCode._NULL_OFFSET;	br = OpCode._getNext(__program, br))      __programAddOperatorTail(br, ender);    if(paren == '=') {      __programInsertOperator(OpCode._IFMATCH, nodeOffset);      __programAddTail(nodeOffset, __emitNode(OpCode._NOTHING));    } else if(paren == '!') {      __programInsertOperator(OpCode._UNLESSM, nodeOffset);      __programAddTail(nodeOffset, __emitNode(OpCode._NOTHING));    }    if(paren != 0 && (__input._isAtEnd() || __getNextChar() != ')')) {      throw new MalformedPatternException("Unmatched parentheses.");    } else if(paren == 0 && !__input._isAtEnd()) {       if(__input._getValue() == ')')	throw new MalformedPatternException("Unmatched parentheses.");      else	// Should never happen.	throw new MalformedPatternException(       "Unreached characters at end of expression.  Please report this bug!");    }    return nodeOffset;  }  /**   * Compiles a Perl5 regular expression into a Perl5Pattern instance that   * can be used by a Perl5Matcher object to perform pattern matching.   * Please see the user's guide for more information about Perl5 regular   * expressions.   * <p>   * @param pattern  A Perl5 regular expression to compile.   * @param options  A set of flags giving the compiler instructions on   *                 how to treat the regular expression.  The flags   *                 are a logical OR of any number of the five <b>MASK</b>   *                 constants.  For example:   *                 <pre>   * regex =   *   compiler.compile(pattern, Perl5Compiler.   *                    CASE_INSENSITIVE_MASK |   *                    Perl5Compiler.MULTILINE_MASK);   *                 </pre>   *                  This says to compile the pattern so that it treats   *                  input as consisting of multiple lines and to perform   *                  matches in a case insensitive manner.   * @return A Pattern instance constituting the compiled regular expression.   *         This instance will always be a Perl5Pattern and can be reliably   *         casted to a Perl5Pattern.   * @exception MalformedPatternException  If the compiled expression   *  is not a valid Perl5 regular expression.   */  public Pattern compile(char[] pattern, int options)       throws MalformedPatternException {    int[] flags = { 0 };    int caseInsensitive, scan;    Perl5Pattern regexp;    String mustString, startString;    int first;    boolean sawOpen = false, sawPlus = false;    StringBuffer lastLongest, longest;    int length, minLength = 0, curBack, back, backmost;    __input = new CharStringPointer(pattern);    caseInsensitive    = options & __CASE_INSENSITIVE;    __modifierFlags[0] = (char)options;    __sawBackreference = false;    __numParentheses   = 1;    __programSize      = 0;    __cost             = 0;    __program= null;    __emitCode((char)0);    if(__parseExpression(false, flags) == OpCode._NULL_OFFSET)      throw new MalformedPatternException("Unknown compilation error.");    if(__programSize >= Character.MAX_VALUE - 1)      throw new MalformedPatternException("Expression is too large.");    __program= new char[__programSize];    regexp = new Perl5Pattern();    regexp._program    = __program;    regexp._expression = new String(pattern);    __input._setOffset(0);    __numParentheses   = 1;    __programSize      = 0;    __cost             = 0;    __emitCode((char)0);    if(__parseExpression(false, flags) == OpCode._NULL_OFFSET)      throw new MalformedPatternException("Unknown compilation error.");    caseInsensitive = __modifierFlags[0] & __CASE_INSENSITIVE;    regexp._isExpensive      = (__cost >= 10);    regexp._startClassOffset = OpCode._NULL_OFFSET;    regexp._anchor           = 0;    regexp._back             = -1;    regexp._options          = options;    regexp._startString      = null;    regexp._mustString       = null;    mustString               = null;    startString              = null;    scan = 1;    if(__program[OpCode._getNext(__program, scan)] == OpCode._END){      boolean doItAgain;  // bad variables names!      char op;      first = scan = OpCode._getNextOperator(scan);      op = __program[first];      while((op == OpCode._OPEN && (sawOpen = true)) ||	    (op == OpCode._BRANCH &&	     __program[OpCode._getNext(__program, first)] != OpCode._BRANCH) ||	    op == OpCode._PLUS || op == OpCode._MINMOD ||	    (OpCode._opType[op] == OpCode._CURLY && 	     OpCode._getArg1(__program, first) > 0)) {	if(op == OpCode._PLUS)	  sawPlus = true;	else	  first+=OpCode._operandLength[op];	first = OpCode._getNextOperator(first);	op = __program[first];      }      doItAgain = true;      while(doItAgain) {	doItAgain = false;	op = __program[first];	if(op == OpCode._EXACTLY) {	  startString =	    new String(__program, OpCode._getOperand(first + 1),		       __program[OpCode._getOperand(first)]);	} else if(OpCode._isInArray(op, OpCode._opLengthOne, 2))	  regexp._startClassOffset = first;	else if(op == OpCode._BOUND || op == OpCode._NBOUND)	  regexp._startClassOffset = first;	else if(OpCode._opType[op] == OpCode._BOL) {	  if(op == OpCode._BOL)	    regexp._anchor = Perl5Pattern._OPT_ANCH_BOL;	  else if(op == OpCode._MBOL)	    regexp._anchor = Perl5Pattern._OPT_ANCH_MBOL;	  else	    regexp._anchor = Perl5Pattern._OPT_ANCH;	  first = OpCode._getNextOperator(first);	  doItAgain = true;	  continue;	} else if(op == OpCode._STAR &&		  OpCode._opType[__program[OpCode._getNextOperator(first)]] == 		  OpCode._ANY && (regexp._anchor & Perl5Pattern._OPT_ANCH) != 0)	  {	    regexp._anchor = Perl5Pattern._OPT_ANCH | Perl5Pattern._OPT_IMPLICIT;	    first = OpCode._getNextOperator(first);	    doItAgain = true;	    continue;	}      } // end while do it again      if(sawPlus && (!sawOpen || !__sawBackreference))	regexp._anchor |= Perl5Pattern._OPT_SKIP;      lastLongest   = new StringBuffer();      longest   = new StringBuffer();      length    = 0;      minLength = 0;      curBack   = 0;      back   = 0;      backmost   = 0;      while(scan > 0 && (op = __program[scan]) != OpCode._END) {	if(op == OpCode._BRANCH) {	  if(__program[OpCode._getNext(__program, scan)] == OpCode._BRANCH) {	    curBack = -30000;	    while(__program[scan] == OpCode._BRANCH)	      scan = OpCode._getNext(__program, scan);	  } else	    scan = OpCode._getNextOperator(scan);	  continue;	}	if(op == OpCode._UNLESSM) {	  curBack = -30000;	  scan = OpCode._getNext(__program, scan);	  continue;	}	if(op == OpCode._EXACTLY) {	  int temp;	  first = scan;	  while(__program[(temp = OpCode._getNext(__program, scan))] == 		OpCode._CLOSE)	    scan = temp;	  minLength += __program[OpCode._getOperand(first)];	  temp = __program[OpCode._getOperand(first)];	  if(curBack - back == length) {	    lastLongest.append(new String(__program, OpCode._getOperand(first) + 1,				      temp));	    length  += temp;	    curBack += temp;	    first = OpCode._getNext(__program, scan);	  } else if(temp >= (length + (curBack >= 0 ? 1 : 0))) {	    length = temp;	    lastLongest =	      new StringBuffer(new String(__program,					  OpCode._getOperand(first) + 1, temp));	    back = curBack;	    curBack += length;	    first = OpCode._getNext(__program, scan);	  } else	    curBack += temp;	} else if(OpCode._isInArray(op, OpCode._opLengthVaries, 0)) {	  curBack = -30000;	  length = 0;	  if(lastLongest.length() > longest.length()) {	    longest = lastLongest;	    backmost = back;	  }	  lastLongest = new StringBuffer();	  if(op == OpCode._PLUS && 	     OpCode._isInArray(__program[OpCode._getNextOperator(scan)],			    OpCode._opLengthOne, 0))	    ++minLength;	  else if(OpCode._opType[op] == OpCode._CURLY &&		  OpCode._isInArray(__program[OpCode._getNextOperator(scan) + 2],				 OpCode._opLengthOne, 0))	    minLength += OpCode._getArg1(__program, scan);	} else if(OpCode._isInArray(op, OpCode._opLengthOne, 0)) {	  ++curBack;	  ++minLength;	  length = 0;	  if(lastLongest.length() > longest.length()) {	    longest = lastLongest;	    backmost = back;	  }	  lastLongest = new StringBuffer();	}	scan = OpCode._getNext(__program, scan);      } // end while      if(lastLongest.length() +	 ((OpCode._opType[__program[first]] == OpCode._EOL) ? 1 : 0) >	 longest.length()) {	longest = lastLongest;	backmost = back;      } else	lastLongest = new StringBuffer();      if(longest.length() > 0 && startString == null) {	mustString = longest.toString();	if(backmost < 0)	  backmost = -1;	regexp._back = backmost;	/*	  if(longest.length() > 	  (((caseInsensitive & __CASE_INSENSITIVE) != 0 ||	  OpCode._opType[__program[first]] == OpCode._EOL)	  ? 1 : 0))	  */	          } else	longest = null;    } // end if    regexp._isCaseInsensitive = ((caseInsensitive & __CASE_INSENSITIVE) != 0);    regexp._numParentheses  = __numParentheses - 1;    regexp._minLength       = minLength;    if(mustString != null) {      regexp._mustString = mustString.toCharArray();      regexp._mustUtility = 100;    }    if(startString != null)      regexp._startString = startString.toCharArray();    return regexp;  }  /**   * Same as calling <b>compile(pattern, Perl5Compiler.DEFAULT_MASK);</b>   * <p>   * @param pattern  A regular expression to compile.   * @return A Pattern instance constituting the compiled regular expression.   *         This instance will always be a Perl5Pattern and can be reliably   *         casted to a Perl5Pattern.   * @exception MalformedPatternException  If the compiled expression   *  is not a valid Perl5 regular expression.   */  public Pattern compile(char[] pattern) throws MalformedPatternException {	 return compile(pattern, DEFAULT_MASK);  }  /**   * Same as calling <b>compile(pattern, Perl5Compiler.DEFAULT_MASK);</b>   * <p>   * @param pattern  A regular expression to compile.   * @return A Pattern instance constituting the compiled regular expression.   *         This instance will always be a Perl5Pattern and can be reliably   *         casted to a Perl5Pattern.   * @exception MalformedPatternException  If the compiled expression   *  is not a valid Perl5 regular expression.   */  public Pattern compile(String pattern) throws MalformedPatternException {	 return compile(pattern.toCharArray(), DEFAULT_MASK);  }  /**   * Compiles a Perl5 regular expression into a Perl5Pattern instance that   * can be used by a Perl5Matcher object to perform pattern matching.   * Please see the user's guide for more information about Perl5 regular   * expressions.   * <p>   * @param pattern  A Perl5 regular expression to compile.   * @param options  A set of flags giving the compiler instructions on   *                 how to treat the regular expression.  The flags   *                 are a logical OR of any number of the five <b>MASK</b>   *                 constants.  For example:   *                 <pre>   * regex =   *   compiler.compile("^\\w+\\d+$",   *                    Perl5Compiler.CASE_INSENSITIVE_MASK |   *                    Perl5Compiler.MULTILINE_MASK);   *                 </pre>   *                  This says to compile the pattern so that it treats   *                  input as consisting of multiple lines and to perform   *                  matches in a case insensitive manner.   * @return A Pattern instance constituting the compiled regular expression.   *         This instance will always be a Perl5Pattern and can be reliably   *         casted to a Perl5Pattern.   * @exception MalformedPatternException  If the compiled expression   *  is not a valid Perl5 regular expression.   */  public Pattern compile(String pattern, int options)       throws MalformedPatternException {	 return compile(pattern.toCharArray(), options);  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -