⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regex.c

📁 功能较全面的反汇编器:反汇编器ht-2.0.15.tar.gz
💻 C
📖 第 1 页 / 共 5 页
字号:
			goto normal_char;		 }		 break;	case '+':	   case '?':		if ((syntax & RE_BK_PLUS_QM)		    || (syntax & RE_LIMITED_OPS))		  goto normal_char;	   handle_plus:	   case '*':		/* If there is no previous pattern... */		if (!laststart)		  {		    if (syntax & RE_CONTEXT_INVALID_OPS)			 FREE_STACK_RETURN (REG_BADRPT);		    else if (!(syntax & RE_CONTEXT_INDEP_OPS))			 goto normal_char;		  }		{		  /* Are we optimizing this jump?  */		  boolean keep_string_p = false;		  		  /* 1 means zero (many) matches is allowed.  */		  char zero_times_ok = 0, many_times_ok = 0;		  /* If there is a sequence of repetition chars, collapse it			down to just one (the right one).  We can't combine			interval operators with these because of, e.g., `a{2}*',			which should only match an even number of `a's.  */		  for (;;)		    {			 zero_times_ok |= c != '+';			 many_times_ok |= c != '?';			 if (p == pend)			   break;			 PATFETCH (c);			 if (c == '*'				|| (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))			   ;			 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')			   {				if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);				PATFETCH (c1);				if (!(c1 == '+' || c1 == '?'))				  {				    PATUNFETCH;				    PATUNFETCH;				    break;				  }				c = c1;			   }			 else			   {				PATUNFETCH;				break;			   }			 /* If we get here, we found another repeat character.  */			}		  /* Star, etc. applied to an empty pattern is equivalent			to an empty pattern.  */		  if (!laststart)  		    break;		  /* Now we know whether or not zero matches is allowed			and also whether or not two or more matches is allowed.  */		  if (many_times_ok)		    { /* More than one repetition is allowed, so put in at the			    end a backward relative jump from `b' to before the next			    jump we're going to put in below (which jumps from			    laststart to after this jump).  			    But if we are at the `*' in the exact sequence `.*\n',			    insert an unconditional jump backwards to the .,			    instead of the beginning of the loop.  This way we only			    push a failure point once, instead of every time			    through the loop.  */			 assert (p - 1 > pattern);			 /* Allocate the space for the jump.  */			 GET_BUFFER_SPACE (3);			 /* We know we are not at the first character of the pattern,			    because laststart was nonzero.  And we've already			    incremented `p', by the way, to be the character after			    the `*'.  Do we have to do something analogous here			    for null bytes, because of RE_DOT_NOT_NULL?  */			 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')		    && zero_times_ok				&& p < pend && TRANSLATE (*p) == TRANSLATE ('\n')				&& !(syntax & RE_DOT_NEWLINE))			   { /* We have .*\n.  */				STORE_JUMP (jump, b, laststart);				keep_string_p = true;			   }			 else			   /* Anything else.  */			   STORE_JUMP (maybe_pop_jump, b, laststart - 3);			 /* We've added more stuff to the buffer.  */			 b += 3;		    }		  /* On failure, jump from laststart to b + 3, which will be the			end of the buffer after this jump is inserted.  */		  GET_BUFFER_SPACE (3);		  INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump							    : on_failure_jump,					laststart, b + 3);		  pending_exact = 0;		  b += 3;		  if (!zero_times_ok)		    {			 /* At least one repetition is required, so insert a			    `dummy_failure_jump' before the initial			    `on_failure_jump' instruction of the loop. This			    effects a skip over that instruction the first time			    we hit that loop.  */			 GET_BUFFER_SPACE (3);			 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);			 b += 3;		    }		  }	  break;	case '.':		laststart = b;		BUF_PUSH (anychar);		break;	   case '[':		{		  boolean had_char_class = false;		  if (p == pend) FREE_STACK_RETURN (REG_EBRACK);		  /* Ensure that we have enough space to push a charset: the			opcode, the length count, and the bitset; 34 bytes in all.  */	    GET_BUFFER_SPACE (34);		  laststart = b;		  /* We test `*p == '^' twice, instead of using an if			statement, so we only need one BUF_PUSH.  */		  BUF_PUSH (*p == '^' ? charset_not : charset); 		  if (*p == '^')		    p++;		  /* Remember the first position in the bracket expression.  */		  p1 = p;		  /* Push the number of bytes in the bitmap.  */		  BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);		  /* Clear the whole map.  */		  bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);		  /* charset_not matches newline according to a syntax bit.  */		  if ((re_opcode_t) b[-2] == charset_not			 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))		    SET_LIST_BIT ('\n');		  /* Read in characters and ranges, setting map bits.  */		  for (;;)		    {			 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);			 PATFETCH (c);			 /* \ might escape characters inside [...] and [^...].  */			 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')			   {				if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);				PATFETCH (c1);				SET_LIST_BIT (c1);				continue;			   }			 /* Could be the end of the bracket expression.  If it's			    not (i.e., when the bracket expression is `[]' so			    far), the ']' character bit gets set way below.  */			 if (c == ']' && p != p1 + 1)			   break;			 /* Look ahead to see if it's a range when the last thing			    was a character class.  */			 if (had_char_class && c == '-' && *p != ']')			   FREE_STACK_RETURN (REG_ERANGE);			 /* Look ahead to see if it's a range when the last thing			    was a character: if this is a hyphen not at the			    beginning or the end of a list, then it's the range			    operator.  */			 if (c == '-' 				&& !(p - 2 >= pattern && p[-2] == '[') 				&& !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')				&& *p != ']')			   {				reg_errcode_t ret				  = compile_range (&p, pend, translate, syntax, b);				if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);			   }			 else if (p[0] == '-' && p[1] != ']')			   { /* This handles ranges made up of characters only.  */				reg_errcode_t ret;		    /* Move past the `-'.  */				PATFETCH (c1);								ret = compile_range (&p, pend, translate, syntax, b);				if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);			   }			 /* See if we're at the beginning of a possible character			    class.  */			 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')			   { /* Leave room for the null.  */				char str[CHAR_CLASS_MAX_LENGTH + 1];				PATFETCH (c);				c1 = 0;				/* If pattern is `[[:'.  */				if (p == pend) FREE_STACK_RETURN (REG_EBRACK);				for (;;)				  {				    PATFETCH (c);				    if (c == ':' || c == ']' || p == pend					   || c1 == CHAR_CLASS_MAX_LENGTH)					 break;				    str[c1++] = c;				  }				str[c1] = '\0';				/* If isn't a word bracketed by `[:' and:`]':				   undo the ending character, the letters, and leave 				   the leading `:' and `[' (but set bits for them).  */				if (c == ':' && *p == ']')				  {				    int ch;				    boolean is_alnum = STREQ (str, "alnum");				    boolean is_alpha = STREQ (str, "alpha");				    boolean is_blank = STREQ (str, "blank");				    boolean is_cntrl = STREQ (str, "cntrl");				    boolean is_digit = STREQ (str, "digit");				    boolean is_graph = STREQ (str, "graph");				    boolean is_lower = STREQ (str, "lower");				    boolean is_print = STREQ (str, "print");				    boolean is_punct = STREQ (str, "punct");				    boolean is_space = STREQ (str, "space");				    boolean is_upper = STREQ (str, "upper");				    boolean is_xdigit = STREQ (str, "xdigit");				    				    if (!IS_CHAR_CLASS (str))			  FREE_STACK_RETURN (REG_ECTYPE);				    /* Throw away the ] at the end of the character					  class.  */				    PATFETCH (c);									    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);				    for (ch = 0; ch < 1 << BYTEWIDTH; ch++)					 {			    /* This was split into 3 if's to				  avoid an arbitrary limit in some compiler.  */					   if (   (is_alnum  && ISALNUM (ch))						  || (is_alpha  && ISALPHA (ch))						  || (is_blank  && ISBLANK (ch))						  || (is_cntrl  && ISCNTRL (ch)))				 SET_LIST_BIT (ch);			    if (   (is_digit  && ISDIGIT (ch))						  || (is_graph  && ISGRAPH (ch))						  || (is_lower  && ISLOWER (ch))						  || (is_print  && ISPRINT (ch)))				 SET_LIST_BIT (ch);			    if (   (is_punct  && ISPUNCT (ch))						  || (is_space  && ISSPACE (ch))						  || (is_upper  && ISUPPER (ch))						  || (is_xdigit && ISXDIGIT (ch)))				 SET_LIST_BIT (ch);					 }				    had_char_class = true;				  }				else				  {				    c1++;				    while (c1--)    					 PATUNFETCH;				    SET_LIST_BIT ('[');				    SET_LIST_BIT (':');				    had_char_class = false;				  }			   }			 else			   {				had_char_class = false;				SET_LIST_BIT (c);			   }		    }		  /* Discard any (non)matching list bytes that are all 0 at the			end of the map.  Decrease the map-length byte too.  */		  while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) 		    b[-1]--; 		  b += b[-1];		}		break;	case '(':		if (syntax & RE_NO_BK_PARENS)		  goto handle_open;		else		  goto normal_char;	   case ')':		if (syntax & RE_NO_BK_PARENS)		  goto handle_close;		else		  goto normal_char;	   case '\n':		if (syntax & RE_NEWLINE_ALT)		  goto handle_alt;		else		  goto normal_char;	case '|':		if (syntax & RE_NO_BK_VBAR)		  goto handle_alt;		else		  goto normal_char;	   case '{':		 if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)		   goto handle_interval;		 else		   goto normal_char;	   case '\\':		if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);		/* Do not translate the character after the \, so that we can		   distinguish, e.g., \B from \b, even if we normally would		   translate, e.g., B to b.  */		PATFETCH_RAW (c);		switch (c)		  {		  case '(':		    if (syntax & RE_NO_BK_PARENS)			 goto normal_backslash;		  handle_open:		    bufp->re_nsub++;		    regnum++;		    if (COMPILE_STACK_FULL)			 { 			   RETALLOC (compile_stack.stack, compile_stack.size << 1,					   compile_stack_elt_t);			   if (compile_stack.stack == NULL) return REG_ESPACE;			   compile_stack.size <<= 1;			 }		    /* These are the values to restore when we hit end of this			  group.  They are all relative offsets, so that if the			  whole pattern moves because of realloc, they will still			  be valid.  */		    COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;		    COMPILE_STACK_TOP.fixup_alt_jump 			 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;		    COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;		    COMPILE_STACK_TOP.regnum = regnum;		    /* We will eventually replace the 0 with the number of			  groups inner to this one.  But do not push a			  start_memory for groups beyond the last one we can			  represent in the compiled pattern.  */		    if (regnum <= MAX_REGNUM)			 {			   COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;			   BUF_PUSH_3 (start_memory, regnum, 0);			 }			 		    compile_stack.avail++;		    fixup_alt_jump = 0;		    laststart = 0;		    begalt = b;		 /* If we've reached MAX_REGNUM groups, then this open		 won't actually generate any code, so we'll have to		 clear pending_exact explicitly.  */		 pending_exact = 0;		    break;		  case ')':		    if (syntax & RE_NO_BK_PARENS) goto normal_backslash;		    if (COMPILE_STACK_EMPTY)			 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)			   goto normal_backslash;			 else			   FREE_STACK_RETURN (REG_ERPAREN);		  handle_close:		    if (fixup_alt_jump)			 { /* Push a dummy failure point at the end of the				 alternative for a possible future				 `pop_failure_jump' to pop.  See comments at				 `push_dummy_failure' in `re_match_2'.  */			   BUF_PUSH (push_dummy_failure);			   			   /* We allocated space for this

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -