⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regex.cc

📁 早期freebsd实现
💻 CC
📖 第 1 页 / 共 5 页
字号:
  /* Place in pattern (i.e., the {) to which to go back if the interval     is invalid.  */  const char *beg_interval = 0;    /* Stack of information saved by \( and restored by \).     Four stack elements are pushed by each \(:       First, the value of b.       Second, the value of fixup_jump.       Third, the value of regnum.       Fourth, the value of begalt.  */  int stackb[40];  int *stackp = stackb;  int *stacke = stackb + 40;  int *stackt;  /* Counts \('s as they are encountered.  Remembered for the matching \),     where it becomes the register number to put in the stop_memory     command.  */  unsigned regnum = 1;  bufp->fastmap_accurate = 0;#ifndef emacs#ifndef SYNTAX_TABLE  /* Initialize the syntax table.  */   init_syntax_once();#endif#endif  if (bufp->allocated == 0)    {      bufp->allocated = INIT_BUF_SIZE;      if (bufp->buffer)	/* EXTEND_BUFFER loses when bufp->allocated is 0.  */	bufp->buffer = (char *) realloc (bufp->buffer, INIT_BUF_SIZE);      else	/* Caller did not allocate a buffer.  Do it for them.  */	bufp->buffer = (char *) malloc (INIT_BUF_SIZE);      if (!bufp->buffer) goto memory_exhausted;      begalt = b = bufp->buffer;    }  while (p != pend)    {      PATFETCH (c);      switch (c)	{	case '$':	  {	    const char *p1 = p;	    /* When testing what follows the $,	       look past the \-constructs that don't consume anything.  */	    if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS))	      while (p1 != pend)		{		  if (*p1 == '\\' && p1 + 1 != pend		      && (p1[1] == '<' || p1[1] == '>'			  || p1[1] == '`' || p1[1] == '\''#ifdef emacs			  || p1[1] == '='#endif			  || p1[1] == 'b' || p1[1] == 'B'))		    p1 += 2;		  else		    break;		}            if (obscure_syntax & RE_TIGHT_VBAR)	      {		if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p1 != pend)		  goto normal_char;		/* Make operand of last vbar end before this `$'.  */		if (fixup_jump)		  store_jump (fixup_jump, jump, b);		fixup_jump = 0;		BUFPUSH (endline);		break;	      }	    /* $ means succeed if at end of line, but only in special contexts.	      If validly in the middle of a pattern, it is a normal character. */            if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && p1 != pend)	      goto invalid_pattern;	    if (p1 == pend || *p1 == '\n'		|| (obscure_syntax & RE_CONTEXT_INDEP_OPS)		|| (obscure_syntax & RE_NO_BK_PARENS		    ? *p1 == ')'		    : *p1 == '\\' && p1[1] == ')')		|| (obscure_syntax & RE_NO_BK_VBAR		    ? *p1 == '|'		    : *p1 == '\\' && p1[1] == '|'))	      {		BUFPUSH (endline);		break;	      }	    goto normal_char;          }	case '^':	  /* ^ means succeed if at beg of line, but only if no preceding              pattern.  */                       if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && laststart)            goto invalid_pattern;          if (laststart && p - 2 >= pattern && p[-2] != '\n'	       && !(obscure_syntax & RE_CONTEXT_INDEP_OPS))	    goto normal_char;	  if (obscure_syntax & RE_TIGHT_VBAR)	    {	      if (p != pattern + 1		  && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))		goto normal_char;	      BUFPUSH (begline);	      begalt = b;	    }	  else	    BUFPUSH (begline);	  break;	case '+':	case '?':	  if ((obscure_syntax & RE_BK_PLUS_QM)	      || (obscure_syntax & RE_LIMITED_OPS))	    goto normal_char;	handle_plus:	case '*':	  /* If there is no previous pattern, char not special. */	  if (!laststart)            {              if (obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)                goto invalid_pattern;              else if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS))		goto normal_char;            }	  /* If there is a sequence of repetition chars,	     collapse it down to just one.  */	  zero_times_ok = 0;	  many_times_ok = 0;	  while (1)	    {	      zero_times_ok |= c != '+';	      many_times_ok |= c != '?';	      if (p == pend)		break;	      PATFETCH (c);	      if (c == '*')		;	      else if (!(obscure_syntax & RE_BK_PLUS_QM)		       && (c == '+' || c == '?'))		;	      else if ((obscure_syntax & RE_BK_PLUS_QM)		       && c == '\\')		{		  int c1;		  PATFETCH (c1);		  if (!(c1 == '+' || c1 == '?'))		    {		      PATUNFETCH;		      PATUNFETCH;		      break;		    }		  c = c1;		}	      else		{		  PATUNFETCH;		  break;		}	    }	  /* Star, etc. applied to an empty pattern is equivalent	     to an empty pattern.  */	  if (!laststart)  	    break;	  /* Now we know whether or not zero matches is allowed	     and also whether or not two or more matches is allowed.  */	  if (many_times_ok)	    {	      /* If more than one repetition is allowed, put in at the                 end a backward relative jump from b to before the next                 jump we're going to put in below (which jumps from                 laststart to after this jump).  */              GET_BUFFER_SPACE (3);	      store_jump (b, maybe_finalize_jump, laststart - 3);	      b += 3;  	/* Because store_jump put stuff here.  */	    }          /* On failure, jump from laststart to b + 3, which will be the             end of the buffer after this jump is inserted.  */          GET_BUFFER_SPACE (3);	  insert_jump (on_failure_jump, laststart, b + 3, b);	  pending_exact = 0;	  b += 3;	  if (!zero_times_ok)	    {	      /* At least one repetition is required, so insert a                 dummy-failure before the initial on-failure-jump                 instruction of the loop. This effects a skip over that                 instruction the first time we hit that loop.  */              GET_BUFFER_SPACE (6);              insert_jump (dummy_failure_jump, laststart, laststart + 6, b);	      b += 3;	    }	  break;	case '.':	  laststart = b;	  BUFPUSH (anychar);	  break;        case '[':          if (p == pend)            goto invalid_pattern;	  while (b - bufp->buffer		 > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)	    EXTEND_BUFFER;	  laststart = b;	  if (*p == '^')	    {              BUFPUSH (charset_not);               p++;            }	  else	    BUFPUSH (charset);	  p1 = p;	  BUFPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);	  /* Clear the whole map */	  memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH);          	  if ((obscure_syntax & RE_HAT_NOT_NEWLINE) && b[-2] == charset_not)            SET_LIST_BIT ('\n');	  /* Read in characters and ranges, setting map bits.  */	  while (1)	    {	      /* Don't translate while fetching, in case it's a range bound.		 When we set the bit for the character, we translate it.  */	      PATFETCH_RAW (c);	      /* If set, \ escapes characters when inside [...].  */	      if ((obscure_syntax & RE_AWK_CLASS_HACK) && c == '\\')	        {	          PATFETCH(c1);                  SET_LIST_BIT (c1);	          continue;	        }              if (c == ']')                {                  if (p == p1 + 1)                    {		      /* If this is an empty bracket expression.  */                      if ((obscure_syntax & RE_NO_EMPTY_BRACKETS)                           && p == pend)                        goto invalid_pattern;                    }                  else 		    /* Stop if this isn't merely a ] inside a bracket                       expression, but rather the end of a bracket                       expression.  */                    break;                }              /* Get a range.  */              if (p[0] == '-' && p[1] != ']')		{                  PATFETCH (c1);		  /* Don't translate the range bounds while fetching them.  */		  PATFETCH_RAW (c1);                  		  if ((obscure_syntax & RE_NO_EMPTY_RANGES) && c > c1)                    goto invalid_pattern;                    		  if ((obscure_syntax & RE_NO_HYPHEN_RANGE_END)                       && c1 == '-' && *p != ']')                    goto invalid_pattern;                                      while (c <= c1)		    {		      /* Translate each char that's in the range.  */		      if (translate)			SET_LIST_BIT (translate[c]);		      else			SET_LIST_BIT (c);                      c++;		    }                }	      else if ((obscure_syntax & RE_CHAR_CLASSES)			&&  c == '[' && p[0] == ':')                {		  /* Longest valid character class word has six characters.  */                  char str[CHAR_CLASS_MAX_LENGTH];		  PATFETCH (c);		  c1 = 0;		  /* If no ] at end.  */                  if (p == pend)                    goto invalid_pattern;		  while (1)		    {		      /* Don't translate the ``character class'' characters.  */                      PATFETCH_RAW (c);		      if (c == ':' || c == ']' || p == pend                          || c1 == CHAR_CLASS_MAX_LENGTH)		        break;		      str[c1++] = c;		    }		  str[c1] = '\0';		  if (p == pend 			      || c == ']'	/* End of the bracket expression.  */                      || p[0] != ']'		      || p + 1 == pend                      || (strcmp (str, "alpha") != 0                           && strcmp (str, "upper") != 0			  && strcmp (str, "lower") != 0                           && strcmp (str, "digit") != 0			  && strcmp (str, "alnum") != 0                           && strcmp (str, "xdigit") != 0			  && strcmp (str, "space") != 0                           && strcmp (str, "print") != 0			  && strcmp (str, "punct") != 0                           && strcmp (str, "graph") != 0			  && strcmp (str, "cntrl") != 0))		    {		       /* Undo the ending character, the letters, and leave                           the leading : and [ (but set bits for them).  */                      c1++;		      while (c1--)    			PATUNFETCH;		      SET_LIST_BIT ('[');		      SET_LIST_BIT (':');	            }                  else                    {                      /* The ] at the end of the character class.  */                      PATFETCH (c);					                      if (c != ']')                        goto invalid_pattern;		      for (c = 0; c < (1 << BYTEWIDTH); c++)			{			  if ((strcmp (str, "alpha") == 0  && isalpha (c))			       || (strcmp (str, "upper") == 0  && isupper (c))			       || (strcmp (str, "lower") == 0  && islower (c))			       || (strcmp (str, "digit") == 0  && isdigit (c))			       || (strcmp (str, "alnum") == 0  && isalnum (c))			       || (strcmp (str, "xdigit") == 0  && isxdigit (c))			       || (strcmp (str, "space") == 0  && isspace (c))			       || (strcmp (str, "print") == 0  && isprint (c))			       || (strcmp (str, "punct") == 0  && ispunct (c))			       || (strcmp (str, "graph") == 0  && isgraph (c))			       || (strcmp (str, "cntrl") == 0  && iscntrl (c)))			    SET_LIST_BIT (c);			}		    }                }              else if (translate)		SET_LIST_BIT (translate[c]);	      else                SET_LIST_BIT (c);	    }          /* Discard any character set/class bitmap bytes that are all             0 at the end of the map. Decrement the map-length byte too.  */          while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)             b[-1]--;           b += b[-1];          break;	case '(':	  if (! (obscure_syntax & RE_NO_BK_PARENS))	    goto normal_char;	  else	    goto handle_open;	case ')':	  if (! (obscure_syntax & RE_NO_BK_PARENS))	    goto normal_char;	  else	    goto handle_close;        case '\n':	  if (! (obscure_syntax & RE_NEWLINE_OR))	    goto normal_char;	  else	    goto handle_bar;	case '|':	  if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)              && (! laststart  ||  p == pend))	    goto invalid_pattern;          else if (! (obscure_syntax & RE_NO_BK_VBAR))	    goto normal_char;	  else	    goto handle_bar;	case '{':           if (! ((obscure_syntax & RE_NO_BK_CURLY_BRACES)                  && (obscure_syntax & RE_INTERVALS)))             goto normal_char;           else             goto handle_interval;                     case '\\':	  if (p == pend) goto invalid_pattern;	  PATFETCH_RAW (c);	  switch (c)	    {	    case '(':	      if (obscure_syntax & RE_NO_BK_PARENS)		goto normal_backsl;	    handle_open:	      if (stackp == stacke) goto nesting_too_deep;              /* Laststart should point to the start_memory that we are about                 to push (unless the pattern has RE_NREGS or more ('s).  */              *stackp++ = b - bufp->buffer;    	      if (regnum < RE_NREGS)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -