📄 regex.c

📁 早期freebsd实现
💻 C
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
		      regs->end[mcnt] = -1;		      continue;		    } 		  if (regstart_seg1[mcnt])		    regs->start[mcnt] = regstart[mcnt] - string1;		  else		    regs->start[mcnt] = regstart[mcnt] - string2 + size1; 		  if (regend_seg1[mcnt])		    regs->end[mcnt] = regend[mcnt] - string1;		  else		    regs->end[mcnt] = regend[mcnt] - string2 + size1;		}	    } 	  if (dend == end_match_1)	    return (d - string1 - pos);	  else	    return d - string2 + size1 - pos;	}      /* Otherwise match next pattern command */#ifdef SWITCH_ENUM_BUG      switch ((int) ((enum regexpcode) *p++))#else      switch ((enum regexpcode) *p++)#endif	{	/* \( is represented by a start_memory, \) by a stop_memory.	    Both of those commands contain a "register number" argument.	    The text matched within the \( and \) is recorded under that number.	    Then, \<digit> turns into a `duplicate' command which	    is followed by the numeric value of <digit> as the register number. */	case start_memory:	  regstart[*p] = d; 	  regstart_seg1[*p++] = (dend == end_match_1);	  break;	case stop_memory:	  regend[*p] = d; 	  regend_seg1[*p++] = (dend == end_match_1);	  break;	case duplicate:	  {	    int regno = *p++;   /* Get which register to match against */	    register unsigned char *d2, *dend2;	    /* Don't allow matching a register that hasn't been used.	       This isn't fully reliable in the current version,	       but it is better than crashing.  */	    if ((int) regend[regno] <= -1)	      goto fail;	    d2 = regstart[regno]; 	    dend2 = ((regstart_seg1[regno] == regend_seg1[regno])		     ? regend[regno] : end_match_1);	    while (1)	      {		/* Advance to next segment in register contents, if necessary */		while (d2 == dend2)		  {		    if (dend2 == end_match_2) break;		    if (dend2 == regend[regno]) break;		    d2 = string2, dend2 = regend[regno];  /* end of string1 => advance to string2. */		  }		/* At end of register contents => success */		if (d2 == dend2) break;		/* Advance to next segment in data being matched, if necessary */		PREFETCH;		/* mcnt gets # consecutive chars to compare */		mcnt = dend - d;		if (mcnt > dend2 - d2)		  mcnt = dend2 - d2;		/* Compare that many; failure if mismatch, else skip them. */		if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt))		  goto fail;		d += mcnt, d2 += mcnt;	      }	  }	  break;	case anychar:	  /* fetch a data character */	  PREFETCH;	  /* Match anything but a newline.  */	  if ((translate ? translate[*d++] : *d++) == '\n')	    goto fail;	  break;	case charset:	case charset_not:	  {	    /* Nonzero for charset_not */	    int not = 0;	    register int c;	    if (*(p - 1) == (unsigned char) charset_not)	      not = 1;	    /* fetch a data character */	    PREFETCH;	    if (translate)	      c = translate [*d];	    else	      c = *d;	    if (c < *p * BYTEWIDTH		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))	      not = !not;	    p += 1 + *p;	    if (!not) goto fail;	    d++;	    break;	  }	case begline:	  if (d == string1 || d[-1] == '\n')	    break;	  goto fail;	case endline:	  if (d == end2	      || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))	    break;	  goto fail;	/* "or" constructs ("|") are handled by starting each alternative	    with an on_failure_jump that points to the start of the next alternative.	    Each alternative except the last ends with a jump to the joining point.	    (Actually, each jump except for the last one really jumps	     to the following jump, because tensioning the jumps is a hassle.) */	/* The start of a stupid repeat has an on_failure_jump that points	   past the end of the repeat text.	   This makes a failure point so that, on failure to match a repetition,	   matching restarts past as many repetitions have been found	   with no way to fail and look for another one.  */	/* A smart repeat is similar but loops back to the on_failure_jump	   so that each repetition makes another failure point. */	case on_failure_jump:	  if (stackp == stacke)	    {	      unsigned char **stackx;	      if (stacke - stackb > re_max_failures)		return -2;	      stackx = (unsigned char **) alloca (2 * (stacke - stackb)					 * sizeof (char *));	      bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));	      stackp = stackx + (stackp - stackb);	      stacke = stackx + 2 * (stacke - stackb);	      stackb = stackx;	    }	  mcnt = *p++ & 0377;	  mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;	  p++;	  *stackp++ = mcnt + p;	  *stackp++ = d;	  break;	/* The end of a smart repeat has an maybe_finalize_jump back.	   Change it either to a finalize_jump or an ordinary jump. */	case maybe_finalize_jump:	  mcnt = *p++ & 0377;	  mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;	  p++;	  /* Compare what follows with the begining of the repeat.	     If we can establish that there is nothing that they would	     both match, we can change to finalize_jump */	  if (p == pend)	    p[-3] = (unsigned char) finalize_jump;	  else if (*p == (unsigned char) exactn		   || *p == (unsigned char) endline)	    {	      register int c = *p == (unsigned char) endline ? '\n' : p[2];	      register unsigned char *p1 = p + mcnt;	      /* p1[0] ... p1[2] are an on_failure_jump.		 Examine what follows that */	      if (p1[3] == (unsigned char) exactn && p1[5] != c)		p[-3] = (unsigned char) finalize_jump;	      else if (p1[3] == (unsigned char) charset		       || p1[3] == (unsigned char) charset_not)		{		  int not = p1[3] == (unsigned char) charset_not;		  if (c < p1[4] * BYTEWIDTH		      && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))		    not = !not;		  /* not is 1 if c would match */		  /* That means it is not safe to finalize */		  if (!not)		    p[-3] = (unsigned char) finalize_jump;		}	    }	  p -= 2;	  if (p[-1] != (unsigned char) finalize_jump)	    {	      p[-1] = (unsigned char) jump;	      goto nofinalize;	    }	/* The end of a stupid repeat has a finalize-jump	   back to the start, where another failure point will be made	   which will point after all the repetitions found so far. */	case finalize_jump:	  stackp -= 2;	case jump:	nofinalize:	  mcnt = *p++ & 0377;	  mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;	  p += mcnt + 1;	/* The 1 compensates for missing ++ above */	  break;	case dummy_failure_jump:	  if (stackp == stacke)	    {	      unsigned char **stackx		= (unsigned char **) alloca (2 * (stacke - stackb)					     * sizeof (char *));	      bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));	      stackp = stackx + (stackp - stackb);	      stacke = stackx + 2 * (stacke - stackb);	      stackb = stackx;	    }	  *stackp++ = 0;	  *stackp++ = 0;	  goto nofinalize;	case wordbound:	  if (d == string1  /* Points to first char */	      || d == end2  /* Points to end */	      || (d == end1 && size2 == 0)) /* Points to end */	    break;	  if ((SYNTAX (d[-1]) == Sword)	      != (SYNTAX (d == end1 ? *string2 : *d) == Sword))	    break;	  goto fail;	case notwordbound:	  if (d == string1  /* Points to first char */	      || d == end2  /* Points to end */	      || (d == end1 && size2 == 0)) /* Points to end */	    goto fail;	  if ((SYNTAX (d[-1]) == Sword)	      != (SYNTAX (d == end1 ? *string2 : *d) == Sword))	    goto fail;	  break;	case wordbeg:	  if (d == end2  /* Points to end */	      || (d == end1 && size2 == 0) /* Points to end */	      || SYNTAX (* (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */	    goto fail;	  if (d == string1  /* Points to first char */	      || SYNTAX (d[-1]) != Sword)  /* prev char not letter */	    break;	  goto fail;	case wordend:	  if (d == string1  /* Points to first char */	      || SYNTAX (d[-1]) != Sword)  /* prev char not letter */	    goto fail;	  if (d == end2  /* Points to end */	      || (d == end1 && size2 == 0) /* Points to end */	      || SYNTAX (d == end1 ? *string2 : *d) != Sword) /* Next char not a letter */	    break;	  goto fail;#ifdef emacs	case before_dot:	  if (PTR_CHAR_POS (d) + 1 >= point)	    goto fail;	  break;	case at_dot:	  if (PTR_CHAR_POS (d) + 1 != point)	    goto fail;	  break;	case after_dot:	  if (PTR_CHAR_POS (d) + 1 <= point)	    goto fail;	  break;	case wordchar:	  mcnt = (int) Sword;	  goto matchsyntax;	case syntaxspec:	  mcnt = *p++;	matchsyntax:	  PREFETCH;	  if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;	  break;	  	case notwordchar:	  mcnt = (int) Sword;	  goto matchnotsyntax;	case notsyntaxspec:	  mcnt = *p++;	matchnotsyntax:	  PREFETCH;	  if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;	  break;#else	case wordchar:	  PREFETCH;	  if (SYNTAX (*d++) == 0) goto fail;	  break;	  	case notwordchar:	  PREFETCH;	  if (SYNTAX (*d++) != 0) goto fail;	  break;#endif not emacs	case begbuf:	  if (d == string1)	/* Note, d cannot equal string2 */	    break;		/* unless string1 == string2.  */	  goto fail;	case endbuf:	  if (d == end2 || (d == end1 && size2 == 0))	    break;	  goto fail;	case exactn:	  /* Match the next few pattern characters exactly.	     mcnt is how many characters to match. */	  mcnt = *p++;	  if (translate)	    {	      do		{		  PREFETCH;		  if (translate[*d++] != *p++) goto fail;		}	      while (--mcnt);	    }	  else	    {	      do		{		  PREFETCH;		  if (*d++ != *p++) goto fail;		}	      while (--mcnt);	    }	  break;	}      continue;    /* Successfully matched one pattern command; keep matching */      /* Jump here if any matching operation fails. */    fail:      if (stackp != stackb)	/* A restart point is known.  Restart there and pop it. */	{	  if (!stackp[-2])	    {   /* If innermost failure point is dormant, flush it and keep looking */	      stackp -= 2;	      goto fail;	    }	  d = *--stackp;	  p = *--stackp;	  if (d >= string1 && d <= end1)	    dend = end_match_1;	}      else break;   /* Matching at this starting point really fails! */    }  return -1;         /* Failure to match */}static intbcmp_translate (s1, s2, len, translate)     unsigned char *s1, *s2;     register int len;     unsigned char *translate;{  register unsigned char *p1 = s1, *p2 = s2;  while (len)    {      if (translate [*p1++] != translate [*p2++]) return 1;      len--;    }  return 0;}/* Entry points compatible with bsd4.2 regex library */#ifndef emacsstatic struct re_pattern_buffer re_comp_buf;char *re_comp (s)     char *s;{  if (!s)    {      if (!re_comp_buf.buffer)	return "No previous regular expression";      return 0;    }  if (!re_comp_buf.buffer)    {      if (!(re_comp_buf.buffer = (char *) malloc (200)))	return "Memory exhausted";      re_comp_buf.allocated = 200;      if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))	return "Memory exhausted";    }  return re_compile_pattern (s, strlen (s), &re_comp_buf);}intre_exec (s)     char *s;{  int len = strlen (s);  return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);}#endif /* emacs */#ifdef test#include <stdio.h>/* Indexed by a character, gives the upper case equivalent of the character */static char upcase[0400] =   { 000, 001, 002, 003, 004, 005, 006, 007,    010, 011, 012, 013, 014, 015, 016, 017,    020, 021, 022, 023, 024, 025, 026, 027,    030, 031, 032, 033, 034, 035, 036, 037,    040, 041, 042, 043, 044, 045, 046, 047,    050, 051, 052, 053, 054, 055, 056, 057,    060, 061, 062, 063, 064, 065, 066, 067,    070, 071, 072, 073, 074, 075, 076, 077,    0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,    0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,    0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,    0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,    0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,    0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,    0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,    0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,    0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,    0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,    0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,    0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,    0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,    0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,    0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,    0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,    0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,    0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,    0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,    0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,    0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,    0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,    0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,    0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377  };main (argc, argv)     int argc;     char **argv;{  char pat[80];  struct re_pattern_buffer buf;  int i;  char c;  char fastmap[(1 << BYTEWIDTH)];  /* Allow a command argument to specify the style of syntax.  */  if (argc > 1)    obscure_syntax = atoi (argv[1]);  buf.allocated = 40;  buf.buffer = (char *) malloc (buf.allocated);  buf.fastmap = fastmap;  buf.translate = upcase;  while (1)    {      gets (pat);      if (*pat)	{          re_compile_pattern (pat, strlen(pat), &buf);	  for (i = 0; i < buf.used; i++)	    printchar (buf.buffer[i]);	  putchar ('\n');	  printf ("%d allocated, %d used.\n", buf.allocated, buf.used);	  re_compile_fastmap (&buf);	  printf ("Allowed by fastmap: ");	  for (i = 0; i < (1 << BYTEWIDTH); i++)	    if (fastmap[i]) printchar (i);	  putchar ('\n');	}      gets (pat);	/* Now read the string to match against */      i = re_match (&buf, pat, strlen (pat), 0, 0);      printf ("Match value %d.\n", i);    }}#ifdef NOTDEFprint_buf (bufp)     struct re_pattern_buffer *bufp;{  int i;  printf ("buf is :\n----------------\n");  for (i = 0; i < bufp->used; i++)    printchar (bufp->buffer[i]);    printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);    printf ("Allowed by fastmap: ");  for (i = 0; i < (1 << BYTEWIDTH); i++)    if (bufp->fastmap[i])      printchar (i);  printf ("\nAllowed by translate: ");  if (bufp->translate)    for (i = 0; i < (1 << BYTEWIDTH); i++)      if (bufp->translate[i])	printchar (i);  printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");  printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");}#endifprintchar (c)     char c;{  if (c < 041 || c >= 0177)    {      putchar ('\\');      putchar (((c >> 6) & 3) + '0');      putchar (((c >> 3) & 7) + '0');      putchar ((c & 7) + '0');    }  else    putchar (c);}error (string)     char *string;{  puts (string);  exit (1);}#endif test
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -