⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regex.c

📁 TURBOC 文本处理常规库
💻 C
📖 第 1 页 / 共 4 页
字号:
	      if (stacke - stackb > re_max_failures * 2)
		return -2;
	      stackx = (unsigned char **) alloca (2 * (stacke - stackb)
					 * sizeof (char *));
	      bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
	      stackp = stackx + (stackp - stackb);
	      stacke = stackx + 2 * (stacke - stackb);
	      stackb = stackx;
	    }
	  mcnt = *p++ & 0377;
	  mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
	  p++;
	  *stackp++ = mcnt + p;
	  *stackp++ = d;
	  break;

	/* The end of a smart repeat has an maybe_finalize_jump back.
	   Change it either to a finalize_jump or an ordinary jump. */

	case maybe_finalize_jump:
	  mcnt = *p++ & 0377;
	  mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
	  p++;
	  {
	    register unsigned char *p2 = p;
	    /* Compare what follows with the begining of the repeat.
	       If we can establish that there is nothing that they would
	       both match, we can change to finalize_jump */
	    while (p2 != pend
		   && (*p2 == (unsigned char) stop_memory
		       || *p2 == (unsigned char) start_memory))
	      p2++;
	    if (p2 == pend)
	      p[-3] = (unsigned char) finalize_jump;
	    else if (*p2 == (unsigned char) exactn
		     || *p2 == (unsigned char) endline)
	      {
		register int c = *p2 == (unsigned char) endline ? '\n' : p2[2];
		register unsigned char *p1 = p + mcnt;
		/* p1[0] ... p1[2] are an on_failure_jump.
		   Examine what follows that */
		if (p1[3] == (unsigned char) exactn && p1[5] != c)
		  p[-3] = (unsigned char) finalize_jump;
		else if (p1[3] == (unsigned char) charset
			 || p1[3] == (unsigned char) charset_not)
		  {
		    int not = p1[3] == (unsigned char) charset_not;
		    if (c < p1[4] * BYTEWIDTH
			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
		      not = !not;
		    /* not is 1 if c would match */
		    /* That means it is not safe to finalize */
		    if (!not)
		      p[-3] = (unsigned char) finalize_jump;
		  }
	      }
	  }
	  p -= 2;
	  if (p[-1] != (unsigned char) finalize_jump)
	    {
	      p[-1] = (unsigned char) jump;
	      goto nofinalize;
	    }

	/* The end of a stupid repeat has a finalize-jump
	   back to the start, where another failure point will be made
	   which will point after all the repetitions found so far. */

	case finalize_jump:
	  stackp -= 2;

	case jump:
	nofinalize:
	  mcnt = *p++ & 0377;
	  mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
	  p += mcnt + 1;	/* The 1 compensates for missing ++ above */
	  break;

	case dummy_failure_jump:
	  if (stackp == stacke)
	    {
	      unsigned char **stackx
		= (unsigned char **) alloca (2 * (stacke - stackb)
					     * sizeof (char *));
	      bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
	      stackp = stackx + (stackp - stackb);
	      stacke = stackx + 2 * (stacke - stackb);
	      stackb = stackx;
	    }
	  *stackp++ = 0;
	  *stackp++ = 0;
	  goto nofinalize;

	case wordbound:
	  if (d == string1  /* Points to first char */
	      || d == end2  /* Points to end */
	      || (d == end1 && size2 == 0)) /* Points to end */
	    break;
	  if ((SYNTAX (d[-1]) == Sword)
	      != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
	    break;
	  goto fail;

	case notwordbound:
	  if (d == string1  /* Points to first char */
	      || d == end2  /* Points to end */
	      || (d == end1 && size2 == 0)) /* Points to end */
	    goto fail;
	  if ((SYNTAX (d[-1]) == Sword)
	      != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
	    goto fail;
	  break;

	case wordbeg:
	  if (d == end2  /* Points to end */
	      || (d == end1 && size2 == 0) /* Points to end */
	      || SYNTAX (* (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */
	    goto fail;
	  if (d == string1  /* Points to first char */
	      || SYNTAX (d[-1]) != Sword)  /* prev char not letter */
	    break;
	  goto fail;

	case wordend:
	  if (d == string1  /* Points to first char */
	      || SYNTAX (d[-1]) != Sword)  /* prev char not letter */
	    goto fail;
	  if (d == end2  /* Points to end */
	      || (d == end1 && size2 == 0) /* Points to end */
	      || SYNTAX (d == end1 ? *string2 : *d) != Sword) /* Next char not a letter */
	    break;
	  goto fail;

#ifdef emacs
	case before_dot:
	  if (((d - string2 <= (unsigned) size2)
	       ? d - bf_p2 : d - bf_p1)
	      <= point)
	    goto fail;
	  break;

	case at_dot:
	  if (((d - string2 <= (unsigned) size2)
	       ? d - bf_p2 : d - bf_p1)
	      == point)
	    goto fail;
	  break;

	case after_dot:
	  if (((d - string2 <= (unsigned) size2)
	       ? d - bf_p2 : d - bf_p1)
	      >= point)
	    goto fail;
	  break;

	case wordchar:
	  mcnt = (int) Sword;
	  goto matchsyntax;

	case syntaxspec:
	  mcnt = *p++;
	matchsyntax:
	  PREFETCH;
	  if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
	  break;
	  
	case notwordchar:
	  mcnt = (int) Sword;
	  goto matchnotsyntax;

	case notsyntaxspec:
	  mcnt = *p++;
	matchnotsyntax:
	  PREFETCH;
	  if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
	  break;
#else
	case wordchar:
	  PREFETCH;
	  if (SYNTAX (*d++) == 0) goto fail;
	  break;
	  
	case notwordchar:
	  PREFETCH;
	  if (SYNTAX (*d++) != 0) goto fail;
	  break;
#endif /* not emacs */

	case begbuf:
	  if (d == string1)	/* Note, d cannot equal string2 */
	    break;		/* unless string1 == string2.  */
	  goto fail;

	case endbuf:
	  if (d == end2 || (d == end1 && size2 == 0))
	    break;
	  goto fail;

	case exactn:
	  /* Match the next few pattern characters exactly.
	     mcnt is how many characters to match. */
	  mcnt = *p++;
	  if (translate)
	    {
	      do
		{
		  PREFETCH;
		  if (translate[*d++] != *p++) goto fail;
		}
	      while (--mcnt);
	    }
	  else
	    {
	      do
		{
		  PREFETCH;
		  if (*d++ != *p++) goto fail;
		}
	      while (--mcnt);
	    }
	  break;
	}
      continue;    /* Successfully matched one pattern command; keep matching */

      /* Jump here if any matching operation fails. */
    fail:
      if (stackp != stackb)
	/* A restart point is known.  Restart there and pop it. */
	{
	  if (!stackp[-2])
	    {   /* If innermost failure point is dormant, flush it and keep looking */
	      stackp -= 2;
	      goto fail;
	    }
	  d = *--stackp;
	  p = *--stackp;
	  if (d >= string1 && d <= end1)
	    dend = end_match_1;
	}
      else break;   /* Matching at this starting point really fails! */
    }
  return -1;         /* Failure to match */
}

static int
bcmp_translate (s1, s2, len, translate)
     unsigned char *s1, *s2;
     register int len;
     unsigned char *translate;
{
  register unsigned char *p1 = s1, *p2 = s2;
  while (len)
    {
      if (translate [*p1++] != translate [*p2++]) return 1;
      len--;
    }
  return 0;
}

/* Entry points compatible with bsd4.2 regex library */

#ifndef emacs

static struct re_pattern_buffer re_comp_buf;

char *
re_comp (s)
     char *s;
{
  if (!s)
    {
      if (!re_comp_buf.buffer)
	return "No previous regular expression";
      return 0;
    }

  if (!re_comp_buf.buffer)
    {
      if (!(re_comp_buf.buffer = (char *) malloc (200)))
	return "Memory exhausted";
      re_comp_buf.allocated = 200;
      if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
	return "Memory exhausted";
    }
  return re_compile_pattern (s, strlen (s), &re_comp_buf);
}

int
re_exec (s)
     char *s;
{
  int len = strlen (s);
  return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
}

#endif /* emacs */

#ifdef test

#include <stdio.h>

/* Indexed by a character, gives the upper case equivalent of the character */

static char upcase[0400] = 
  { 000, 001, 002, 003, 004, 005, 006, 007,
    010, 011, 012, 013, 014, 015, 016, 017,
    020, 021, 022, 023, 024, 025, 026, 027,
    030, 031, 032, 033, 034, 035, 036, 037,
    040, 041, 042, 043, 044, 045, 046, 047,
    050, 051, 052, 053, 054, 055, 056, 057,
    060, 061, 062, 063, 064, 065, 066, 067,
    070, 071, 072, 073, 074, 075, 076, 077,
    0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
    0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
    0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
    0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
    0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
    0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
    0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
    0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
    0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
    0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
    0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
    0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
    0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
    0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
    0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
    0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
    0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
    0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
    0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
    0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
    0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
    0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
    0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
    0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
  };

main (argc, argv)
     int argc;
     char **argv;
{
  char pat[80];
  struct re_pattern_buffer buf;
  int i;
  char c;
  char fastmap[(1 << BYTEWIDTH)];

  /* Allow a command argument to specify the style of syntax.  */
  if (argc > 1)
    obscure_syntax = atoi (argv[1]);

  buf.allocated = 40;
  buf.buffer = (char *) malloc (buf.allocated);
  buf.fastmap = fastmap;
  buf.translate = upcase;

  while (1)
    {
      gets (pat);

      if (*pat)
	{
          re_compile_pattern (pat, strlen(pat), &buf);

/*	  for (i = 0; i < buf.used; i++)
	    printchar (buf.buffer[i]);
*/
	  putchar ('\n');

	  printf ("%d allocated, %d used.\n", buf.allocated, buf.used);

	  re_compile_fastmap (&buf);
	  printf ("Allowed by fastmap: ");
	  for (i = 0; i < (1 << BYTEWIDTH); i++)
	    if (fastmap[i]) printchar (i);
	  putchar ('\n');
	}

      gets (pat);	/* Now read the string to match against */

      i = re_match (&buf, pat, strlen (pat), 0, 0);
      printf ("Match value %d.\n", i);
    }
}

#ifdef NOTDEF
print_buf (bufp)
     struct re_pattern_buffer *bufp;
{
  int i;

  printf ("buf is :\n----------------\n");
  for (i = 0; i < bufp->used; i++)
    printchar (bufp->buffer[i]);
  
  printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
  
  printf ("Allowed by fastmap: ");
  for (i = 0; i < (1 << BYTEWIDTH); i++)
    if (bufp->fastmap[i])
      printchar (i);
  printf ("\nAllowed by translate: ");
  if (bufp->translate)
    for (i = 0; i < (1 << BYTEWIDTH); i++)
      if (bufp->translate[i])
	printchar (i);
  printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
  printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
}
#endif

printchar (c)
     char c;
{
  if (c < 041 || c >= 0177)
    {
      putchar ('\\');
      putchar (((c >> 6) & 3) + '0');
      putchar (((c >> 3) & 7) + '0');
      putchar ((c & 7) + '0');
    }
  else
    putchar (c);
}

regerror (string)
     char *string;
{
  puts (string);
  exit (1);
}

#endif /* test */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -