⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regex.c

📁 在 linux平台上的网页编程的模板
💻 C
📖 第 1 页 / 共 5 页
字号:
	{ /* Caller did not allocate a buffer.	Do it for them.	 */	  bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);	}      if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);      bufp->allocated = INIT_BUF_SIZE;    }  begalt = b = bufp->buffer;  /* Loop through the uncompiled pattern until we're at the end.  */  while (p != pend)    {      PATFETCH (c);      switch (c)	{	case '^':	  {	    if (   /* If at start of pattern, it's an operator.	 */		   p == pattern + 1		   /* If context independent, it's an operator.	 */		|| syntax & RE_CONTEXT_INDEP_ANCHORS		   /* Otherwise, depends on what's come before.	 */		|| at_begline_loc_p (pattern, p, syntax))	      BUF_PUSH (begline);	    else	      goto normal_char;	  }	  break;	case '$':	  {	    if (   /* If at end of pattern, it's an operator.  */		   p == pend		   /* If context independent, it's an operator.	 */		|| syntax & RE_CONTEXT_INDEP_ANCHORS		   /* Otherwise, depends on what's next.  */		|| at_endline_loc_p (p, pend, syntax))	       BUF_PUSH (endline);	     else	       goto normal_char;	   }	   break;	case '+':	case '?':	  if ((syntax & RE_BK_PLUS_QM)	      || (syntax & RE_LIMITED_OPS))	    goto normal_char;	handle_plus:	case '*':	  /* If there is no previous pattern... */	  if (!laststart)	    {	      if (syntax & RE_CONTEXT_INVALID_OPS)		FREE_STACK_RETURN (REG_BADRPT);	      else if (!(syntax & RE_CONTEXT_INDEP_OPS))		goto normal_char;	    }	  {	    /* Are we optimizing this jump?  */	    boolean keep_string_p = false;	    /* 1 means zero (many) matches is allowed.	*/	    char zero_times_ok = 0, many_times_ok = 0;	    /* If there is a sequence of repetition chars, collapse it	       down to just one (the right one).  We can't combine	       interval operators with these because of, e.g., `a{2}*',	       which should only match an even number of `a's.	*/	    for (;;)	      {		zero_times_ok |= c != '+';		many_times_ok |= c != '?';		if (p == pend)		  break;		PATFETCH (c);		if (c == '*'		    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))		  ;		else if (syntax & RE_BK_PLUS_QM	 &&  c == '\\')		  {		    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);		    PATFETCH (c1);		    if (!(c1 == '+' || c1 == '?'))		      {			PATUNFETCH;			PATUNFETCH;			break;		      }		    c = c1;		  }		else		  {		    PATUNFETCH;		    break;		  }		/* If we get here, we found another repeat character.  */	       }	    /* Star, etc. applied to an empty pattern is equivalent	       to an empty pattern.  */	    if (!laststart)	      break;	    /* Now we know whether or not zero matches is allowed	       and also whether or not two or more matches is allowed.	*/	    if (many_times_ok)	      { /* More than one repetition is allowed, so put in at the		   end a backward relative jump from `b' to before the next		   jump we're going to put in below (which jumps from		   laststart to after this jump).		   But if we are at the `*' in the exact sequence `.*\n',		   insert an unconditional jump backwards to the .,		   instead of the beginning of the loop.  This way we only		   push a failure point once, instead of every time		   through the loop.  */		assert (p - 1 > pattern);		/* Allocate the space for the jump.  */		GET_BUFFER_SPACE (3);		/* We know we are not at the first character of the pattern,		   because laststart was nonzero.  And we've already		   incremented `p', by the way, to be the character after		   the `*'.  Do we have to do something analogous here		   for null bytes, because of RE_DOT_NOT_NULL?	*/		if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')		    && zero_times_ok		    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')		    && !(syntax & RE_DOT_NEWLINE))		  { /* We have .*\n.  */		    STORE_JUMP (jump, b, laststart);		    keep_string_p = true;		  }		else		  /* Anything else.  */		  STORE_JUMP (maybe_pop_jump, b, laststart - 3);		/* We've added more stuff to the buffer.  */		b += 3;	      }	    /* On failure, jump from laststart to b + 3, which will be the	       end of the buffer after this jump is inserted.  */	    GET_BUFFER_SPACE (3);	    INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump				       : on_failure_jump,			 laststart, b + 3);	    pending_exact = 0;	    b += 3;	    if (!zero_times_ok)	      {		/* At least one repetition is required, so insert a		   `dummy_failure_jump' before the initial		   `on_failure_jump' instruction of the loop. This		   effects a skip over that instruction the first time		   we hit that loop.  */		GET_BUFFER_SPACE (3);		INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);		b += 3;	      }	    }	  break;	case '.':	  laststart = b;	  BUF_PUSH (anychar);	  break;	case '[':	  {	    boolean had_char_class = false;	    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);	    /* Ensure that we have enough space to push a charset: the	       opcode, the length count, and the bitset; 34 bytes in all.  */	    GET_BUFFER_SPACE (34);	    laststart = b;	    /* We test `*p == '^' twice, instead of using an if	       statement, so we only need one BUF_PUSH.	 */	    BUF_PUSH (*p == '^' ? charset_not : charset);	    if (*p == '^')	      p++;	    /* Remember the first position in the bracket expression.  */	    p1 = p;	    /* Push the number of bytes in the bitmap.	*/	    BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);	    /* Clear the whole map.  */	    bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);	    /* charset_not matches newline according to a syntax bit.  */	    if ((re_opcode_t) b[-2] == charset_not		&& (syntax & RE_HAT_LISTS_NOT_NEWLINE))	      SET_LIST_BIT ('\n');	    /* Read in characters and ranges, setting map bits.	 */	    for (;;)	      {		if (p == pend) FREE_STACK_RETURN (REG_EBRACK);		PATFETCH (c);		/* \ might escape characters inside [...] and [^...].  */		if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')		  {		    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);		    PATFETCH (c1);		    SET_LIST_BIT (c1);		    continue;		  }		/* Could be the end of the bracket expression.	If it's		   not (i.e., when the bracket expression is `[]' so		   far), the ']' character bit gets set way below.  */		if (c == ']' && p != p1 + 1)		  break;		/* Look ahead to see if it's a range when the last thing		   was a character class.  */		if (had_char_class && c == '-' && *p != ']')		  FREE_STACK_RETURN (REG_ERANGE);		/* Look ahead to see if it's a range when the last thing		   was a character: if this is a hyphen not at the		   beginning or the end of a list, then it's the range		   operator.  */		if (c == '-'		    && !(p - 2 >= pattern && p[-2] == '[')		    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')		    && *p != ']')		  {		    reg_errcode_t ret		      = compile_range (&p, pend, translate, syntax, b);		    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);		  }		else if (p[0] == '-' && p[1] != ']')		  { /* This handles ranges made up of characters only.	*/		    reg_errcode_t ret;		    /* Move past the `-'.  */		    PATFETCH (c1);		    ret = compile_range (&p, pend, translate, syntax, b);		    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);		  }		/* See if we're at the beginning of a possible character		   class.  */		else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')		  { /* Leave room for the null.	 */		    char str[CHAR_CLASS_MAX_LENGTH + 1];		    PATFETCH (c);		    c1 = 0;		    /* If pattern is `[[:'.  */		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);		    for (;;)		      {			PATFETCH (c);			if (c == ':' || c == ']' || p == pend			    || c1 == CHAR_CLASS_MAX_LENGTH)			  break;			str[c1++] = c;		      }		    str[c1] = '\0';		    /* If isn't a word bracketed by `[:' and:`]':		       undo the ending character, the letters, and leave		       the leading `:' and `[' (but set bits for them).	 */		    if (c == ':' && *p == ']')		      {			int ch;			boolean is_alnum = STREQ (str, "alnum");			boolean is_alpha = STREQ (str, "alpha");			boolean is_blank = STREQ (str, "blank");			boolean is_cntrl = STREQ (str, "cntrl");			boolean is_digit = STREQ (str, "digit");			boolean is_graph = STREQ (str, "graph");			boolean is_lower = STREQ (str, "lower");			boolean is_print = STREQ (str, "print");			boolean is_punct = STREQ (str, "punct");			boolean is_space = STREQ (str, "space");			boolean is_upper = STREQ (str, "upper");			boolean is_xdigit = STREQ (str, "xdigit");			if (!IS_CHAR_CLASS (str))			  FREE_STACK_RETURN (REG_ECTYPE);			/* Throw away the ] at the end of the character			   class.  */			PATFETCH (c);			if (p == pend) FREE_STACK_RETURN (REG_EBRACK);			for (ch = 0; ch < 1 << BYTEWIDTH; ch++)			  {			    int translated = TRANSLATE (ch);			    /* This was split into 3 if's to			       avoid an arbitrary limit in some compiler.  */			    if (   (is_alnum  && ISALNUM (ch))				|| (is_alpha  && ISALPHA (ch))				|| (is_blank  && ISBLANK (ch))				|| (is_cntrl  && ISCNTRL (ch)))			      SET_LIST_BIT (translated);			    if (   (is_digit  && ISDIGIT (ch))				|| (is_graph  && ISGRAPH (ch))				|| (is_lower  && ISLOWER (ch))				|| (is_print  && ISPRINT (ch)))			      SET_LIST_BIT (translated);			    if (   (is_punct  && ISPUNCT (ch))				|| (is_space  && ISSPACE (ch))				|| (is_upper  && ISUPPER (ch))				|| (is_xdigit && ISXDIGIT (ch)))			      SET_LIST_BIT (translated);			  }			had_char_class = true;		      }		    else		      {			c1++;			while (c1--)			  PATUNFETCH;			SET_LIST_BIT ('[');			SET_LIST_BIT (':');			had_char_class = false;		      }		  }		else		  {		    had_char_class = false;		    SET_LIST_BIT (c);		  }	      }	    /* Discard any (non)matching list bytes that are all 0 at the	       end of the map.	Decrease the map-length byte too.  */	    while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)	      b[-1]--;	    b += b[-1];	  }	  break;	case '(':	  if (syntax & RE_NO_BK_PARENS)	    goto handle_open;	  else	    goto normal_char;	case ')':	  if (syntax & RE_NO_BK_PARENS)	    goto handle_close;	  else	    goto normal_char;	case '\n':	  if (syntax & RE_NEWLINE_ALT)	    goto handle_alt;	  else	    goto normal_char;	case '|':	  if (syntax & RE_NO_BK_VBAR)	    goto handle_alt;	  else	    goto normal_char;	case '{':	   if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)	     goto handle_interval;	   else	     goto normal_char;	case '\\':	  if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);	  /* Do not translate the character after the \, so that we can	     distinguish, e.g., \B from \b, even if we normally would	     translate, e.g., B to b.  */	  PATFETCH_RAW (c);	  switch (c)	    {	    case '(':	      if (syntax & RE_NO_BK_PARENS)		goto normal_backslash;	    handle_open:	      bufp->re_nsub++;	      regnum++;	      if (COMPILE_STACK_FULL)		{		  RETALLOC (compile_stack.stack, compile_stack.size << 1,			    compile_stack_elt_t);		  if (compile_stack.stack == NULL) return REG_ESPACE;		  compile_stack.size <<= 1;		}	      /* These are the values to restore when we hit end of this		 group.	 They are all relative offsets, so that if the		 whole pattern moves because of realloc, they will still		 be valid.  */	      COMPILE_STACK_TOP.begal

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -