⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regex.c

📁 HLPDK V10.0+ System Extension Library
💻 C
📖 第 1 页 / 共 5 页
字号:
  char many_times_ok;

  /* Address of beginning of regexp, or inside of last \(.  */

  char *begalt = b;

  /* In processing an interval, at least this many matches must be made.  */
  int lower_bound;

  /* In processing an interval, at most this many matches can be made.  */
  int upper_bound;

  /* Place in pattern (i.e., the {) to which to go back if the interval
     is invalid.  */
  char *beg_interval = 0;
  
  /* Stack of information saved by \( and restored by \).
     Four stack elements are pushed by each \(:
       First, the value of b.
       Second, the value of fixup_jump.
       Third, the value of regnum.
       Fourth, the value of begalt.  */

  int stackb[40];
  int *stackp = stackb;
  int *stacke = stackb + 40;
  int *stackt;

  /* Counts \('s as they are encountered.  Remembered for the matching \),
     where it becomes the register number to put in the stop_memory
     command.  */

  int regnum = 1;

  bufp->fastmap_accurate = 0;

#ifndef emacs
#ifndef SYNTAX_TABLE
  /* Initialize the syntax table.  */
   init_syntax_once();
#endif
#endif

  if (bufp->allocated == 0)
    {
      bufp->allocated = INIT_BUF_SIZE;
      if (bufp->buffer)
	/* EXTEND_BUFFER loses when bufp->allocated is 0.  */
	bufp->buffer = (char *) realloc (bufp->buffer, INIT_BUF_SIZE);
      else
	/* Caller did not allocate a buffer.  Do it for them.  */
	bufp->buffer = (char *) malloc (INIT_BUF_SIZE);
      if (!bufp->buffer) goto memory_exhausted;
      begalt = b = bufp->buffer;
    }

  while (p != pend)
    {
      PATFETCH (c);

      switch (c)
	{
	case '$':
	  {
	    char *p1 = p;
	    /* When testing what follows the $,
	       look past the \-constructs that don't consume anything.  */
	    if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
	      while (p1 != pend)
		{
		  if (*p1 == '\\' && p1 + 1 != pend
		      && (p1[1] == '<' || p1[1] == '>'
			  || p1[1] == '`' || p1[1] == '\''
#ifdef emacs
			  || p1[1] == '='
#endif
			  || p1[1] == 'b' || p1[1] == 'B'))
		    p1 += 2;
		  else
		    break;
		}
            if (obscure_syntax & RE_TIGHT_VBAR)
	      {
		if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p1 != pend)
		  goto normal_char;
		/* Make operand of last vbar end before this `$'.  */
		if (fixup_jump)
		  store_jump (fixup_jump, jump, b);
		fixup_jump = 0;
		BUFPUSH (endline);
		break;
	      }
	    /* $ means succeed if at end of line, but only in special contexts.
	      If validly in the middle of a pattern, it is a normal character. */

            if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && p1 != pend)
	      goto invalid_pattern;
	    if (p1 == pend || *p1 == '\n'
		|| (obscure_syntax & RE_CONTEXT_INDEP_OPS)
		|| (obscure_syntax & RE_NO_BK_PARENS
		    ? *p1 == ')'
		    : *p1 == '\\' && p1[1] == ')')
		|| (obscure_syntax & RE_NO_BK_VBAR
		    ? *p1 == '|'
		    : *p1 == '\\' && p1[1] == '|'))
	      {
		BUFPUSH (endline);
		break;
	      }
	    goto normal_char;
          }
	case '^':
	  /* ^ means succeed if at beg of line, but only if no preceding 
             pattern.  */
             
          if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && laststart)
            goto invalid_pattern;
          if (laststart && p - 2 >= pattern && p[-2] != '\n'
	       && !(obscure_syntax & RE_CONTEXT_INDEP_OPS))
	    goto normal_char;
	  if (obscure_syntax & RE_TIGHT_VBAR)
	    {
	      if (p != pattern + 1
		  && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
		goto normal_char;
	      BUFPUSH (begline);
	      begalt = b;
	    }
	  else
	    BUFPUSH (begline);
	  break;

	case '+':
	case '?':
	  if ((obscure_syntax & RE_BK_PLUS_QM)
	      || (obscure_syntax & RE_LIMITED_OPS))
	    goto normal_char;
	handle_plus:
	case '*':
	  /* If there is no previous pattern, char not special. */
	  if (!laststart)
            {
              if (obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
                goto invalid_pattern;
              else if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
		goto normal_char;
            }
	  /* If there is a sequence of repetition chars,
	     collapse it down to just one.  */
	  zero_times_ok = 0;
	  many_times_ok = 0;
	  while (1)
	    {
	      zero_times_ok |= c != '+';
	      many_times_ok |= c != '?';
	      if (p == pend)
		break;
	      PATFETCH (c);
	      if (c == '*')
		;
	      else if (!(obscure_syntax & RE_BK_PLUS_QM)
		       && (c == '+' || c == '?'))
		;
	      else if ((obscure_syntax & RE_BK_PLUS_QM)
		       && c == '\\')
		{
		  int c1;
		  PATFETCH (c1);
		  if (!(c1 == '+' || c1 == '?'))
		    {
		      PATUNFETCH;
		      PATUNFETCH;
		      break;
		    }
		  c = c1;
		}
	      else
		{
		  PATUNFETCH;
		  break;
		}
	    }

	  /* Star, etc. applied to an empty pattern is equivalent
	     to an empty pattern.  */
	  if (!laststart)  
	    break;

	  /* Now we know whether or not zero matches is allowed
	     and also whether or not two or more matches is allowed.  */
	  if (many_times_ok)
	    {
	      /* If more than one repetition is allowed, put in at the
                 end a backward relative jump from b to before the next
                 jump we're going to put in below (which jumps from
                 laststart to after this jump).  */
              GET_BUFFER_SPACE (3);
	      store_jump (b, maybe_finalize_jump, laststart - 3);
	      b += 3;  	/* Because store_jump put stuff here.  */
	    }
          /* On failure, jump from laststart to b + 3, which will be the
             end of the buffer after this jump is inserted.  */
          GET_BUFFER_SPACE (3);
	  insert_jump (on_failure_jump, laststart, b + 3, b);
	  pending_exact = 0;
	  b += 3;
	  if (!zero_times_ok)
	    {
	      /* At least one repetition is required, so insert a
                 dummy-failure before the initial on-failure-jump
                 instruction of the loop. This effects a skip over that
                 instruction the first time we hit that loop.  */
              GET_BUFFER_SPACE (6);
              insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
	      b += 3;
	    }
	  break;

	case '.':
	  laststart = b;
	  BUFPUSH (anychar);
	  break;

        case '[':
          if (p == pend)
            goto invalid_pattern;
	  while (b - bufp->buffer
		 > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
	    EXTEND_BUFFER;

	  laststart = b;
	  if (*p == '^')
	    {
              BUFPUSH (charset_not); 
              p++;
            }
	  else
	    BUFPUSH (charset);
	  p1 = p;

	  BUFPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
	  /* Clear the whole map */
	  bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
          
	  if ((obscure_syntax & RE_HAT_NOT_NEWLINE) && b[-2] == charset_not)
            SET_LIST_BIT ('\n');


	  /* Read in characters and ranges, setting map bits.  */
	  while (1)
	    {
	      PATFETCH (c);

	      /* If set, \ escapes characters when inside [...].  */
	      if ((obscure_syntax & RE_AWK_CLASS_HACK) && c == '\\')
	        {
	          PATFETCH(c1);
                  SET_LIST_BIT (c1);
	          continue;
	        }
              if (c == ']')
                {
                  if (p == p1 + 1)
                    {
		      /* If this is an empty bracket expression.  */
                      if ((obscure_syntax & RE_NO_EMPTY_BRACKETS) 
                          && p == pend)
                        goto invalid_pattern;
                    }
                  else 
		    /* Stop if this isn't merely a ] inside a bracket
                       expression, but rather the end of a bracket
                       expression.  */
                    break;
                }
              /* Get a range.  */
              if (p[0] == '-' && p[1] != ']')
		{
                  PATFETCH (c1);
		  PATFETCH (c1);
                  
		  if ((obscure_syntax & RE_NO_EMPTY_RANGES) && c > c1)
                    goto invalid_pattern;
                    
		  if ((obscure_syntax & RE_NO_HYPHEN_RANGE_END) 
                      && c1 == '-' && *p != ']')
                    goto invalid_pattern;
                    
                  while (c <= c1)
		    {
                      SET_LIST_BIT (c);
                      c++;
		    }
                }
	      else if ((obscure_syntax & RE_CHAR_CLASSES)
			&&  c == '[' && p[0] == ':')
                {
		  /* Longest valid character class word has six characters.  */
                  char str[CHAR_CLASS_MAX_LENGTH];
		  PATFETCH (c);
		  c1 = 0;
		  /* If no ] at end.  */
                  if (p == pend)
                    goto invalid_pattern;
		  while (1)
		    {
		      /* Don't translate the ``character class'' characters.  */
                      PATFETCH_RAW (c);
		      if (c == ':' || c == ']' || p == pend
                          || c1 == CHAR_CLASS_MAX_LENGTH)
		        break;
		      str[c1++] = c;
		    }
		  str[c1] = '\0';
		  if (p == pend 	
		      || c == ']'	/* End of the bracket expression.  */
                      || p[0] != ']'
		      || p + 1 == pend
                      || (strcmp (str, "alpha") != 0 
                          && strcmp (str, "upper") != 0
			  && strcmp (str, "lower") != 0 
                          && strcmp (str, "digit") != 0
			  && strcmp (str, "alnum") != 0 
                          && strcmp (str, "xdigit") != 0
			  && strcmp (str, "space") != 0 
                          && strcmp (str, "print") != 0
			  && strcmp (str, "punct") != 0 
                          && strcmp (str, "graph") != 0
			  && strcmp (str, "cntrl") != 0))
		    {
		       /* Undo the ending character, the letters, and leave 
                          the leading : and [ (but set bits for them).  */
                      c1++;
		      while (c1--)    
			PATUNFETCH;
		      SET_LIST_BIT ('[');
		      SET_LIST_BIT (':');
	            }
                  else
                    {
                      /* The ] at the end of the character class.  */
                      PATFETCH (c);					
                      if (c != ']')
                        goto invalid_pattern;
		      for (c = 0; c < (1 << BYTEWIDTH); c++)
			{
			  if ((strcmp (str, "alpha") == 0  && isalpha (c))
			       || (strcmp (str, "upper") == 0  && isupper (c))
			       || (strcmp (str, "lower") == 0  && islower (c))
			       || (strcmp (str, "digit") == 0  && isdigit (c))
			       || (strcmp (str, "alnum") == 0  && isalnum (c))
			       || (strcmp (str, "xdigit") == 0  && isxdigit (c))
			       || (strcmp (str, "space") == 0  && isspace (c))
			       || (strcmp (str, "print") == 0  && isprint (c))
			       || (strcmp (str, "punct") == 0  && ispunct (c))
			       || (strcmp (str, "graph") == 0  && isgraph (c))
			       || (strcmp (str, "cntrl") == 0  && iscntrl (c)))
			    SET_LIST_BIT (c);
			}
		    }
                }
              else
                SET_LIST_BIT (c);
	    }

          /* Discard any character set/class bitmap bytes that are all
             0 at the end of the map. Decrement the map-length byte too.  */
          while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) 
            b[-1]--; 
          b += b[-1];
          break;

	case '(':
	  if (! (obscure_syntax & RE_NO_BK_PARENS))
	    goto normal_char;
	  else
	    goto handle_open;

	case ')':
	  if (! (obscure_syntax & RE_NO_BK_PARENS))
	    goto normal_char;
	  else
	    goto handle_close;

        case '\n':
	  if (! (obscure_syntax & RE_NEWLINE_OR))
	    goto normal_char;
	  else
	    goto handle_bar;

	case '|':
	  if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
              && (! laststart  ||  p == pend))
	    goto invalid_pattern;
          else if (! (obscure_syntax & RE_NO_BK_VBAR))
	    goto normal_char;
	  else
	    goto handle_bar;

	case '{':
           if (! ((obscure_syntax & RE_NO_BK_CURLY_BRACES)
                  && (obscure_syntax & RE_INTERVALS)))
             goto normal_char;
           else
             goto handle_interval;
             
        case '\\':
	  if (p == pend) goto invalid_pattern;
	  PATFETCH_RAW (c);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -