📄 regex.c
字号:
if (stacke - stackb > re_max_failures * 2)
return -2;
stackx = (unsigned char **) alloca (2 * (stacke - stackb)
* sizeof (char *));
bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
stackp = stackx + (stackp - stackb);
stacke = stackx + 2 * (stacke - stackb);
stackb = stackx;
}
mcnt = *p++ & 0377;
mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
p++;
*stackp++ = mcnt + p;
*stackp++ = d;
break;
/* The end of a smart repeat has an maybe_finalize_jump back.
Change it either to a finalize_jump or an ordinary jump. */
case maybe_finalize_jump:
mcnt = *p++ & 0377;
mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
p++;
{
register unsigned char *p2 = p;
/* Compare what follows with the begining of the repeat.
If we can establish that there is nothing that they would
both match, we can change to finalize_jump */
while (p2 != pend
&& (*p2 == (unsigned char) stop_memory
|| *p2 == (unsigned char) start_memory))
p2++;
if (p2 == pend)
p[-3] = (unsigned char) finalize_jump;
else if (*p2 == (unsigned char) exactn
|| *p2 == (unsigned char) endline)
{
register int c = *p2 == (unsigned char) endline ? '\n' : p2[2];
register unsigned char *p1 = p + mcnt;
/* p1[0] ... p1[2] are an on_failure_jump.
Examine what follows that */
if (p1[3] == (unsigned char) exactn && p1[5] != c)
p[-3] = (unsigned char) finalize_jump;
else if (p1[3] == (unsigned char) charset
|| p1[3] == (unsigned char) charset_not)
{
int not = p1[3] == (unsigned char) charset_not;
if (c < p1[4] * BYTEWIDTH
&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
not = !not;
/* not is 1 if c would match */
/* That means it is not safe to finalize */
if (!not)
p[-3] = (unsigned char) finalize_jump;
}
}
}
p -= 2;
if (p[-1] != (unsigned char) finalize_jump)
{
p[-1] = (unsigned char) jump;
goto nofinalize;
}
/* The end of a stupid repeat has a finalize-jump
back to the start, where another failure point will be made
which will point after all the repetitions found so far. */
case finalize_jump:
stackp -= 2;
case jump:
nofinalize:
mcnt = *p++ & 0377;
mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
p += mcnt + 1; /* The 1 compensates for missing ++ above */
break;
case dummy_failure_jump:
if (stackp == stacke)
{
unsigned char **stackx
= (unsigned char **) alloca (2 * (stacke - stackb)
* sizeof (char *));
bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
stackp = stackx + (stackp - stackb);
stacke = stackx + 2 * (stacke - stackb);
stackb = stackx;
}
*stackp++ = 0;
*stackp++ = 0;
goto nofinalize;
case wordbound:
if (d == string1 /* Points to first char */
|| d == end2 /* Points to end */
|| (d == end1 && size2 == 0)) /* Points to end */
break;
if ((SYNTAX (d[-1]) == Sword)
!= (SYNTAX (d == end1 ? *string2 : *d) == Sword))
break;
goto fail;
case notwordbound:
if (d == string1 /* Points to first char */
|| d == end2 /* Points to end */
|| (d == end1 && size2 == 0)) /* Points to end */
goto fail;
if ((SYNTAX (d[-1]) == Sword)
!= (SYNTAX (d == end1 ? *string2 : *d) == Sword))
goto fail;
break;
case wordbeg:
if (d == end2 /* Points to end */
|| (d == end1 && size2 == 0) /* Points to end */
|| SYNTAX (* (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */
goto fail;
if (d == string1 /* Points to first char */
|| SYNTAX (d[-1]) != Sword) /* prev char not letter */
break;
goto fail;
case wordend:
if (d == string1 /* Points to first char */
|| SYNTAX (d[-1]) != Sword) /* prev char not letter */
goto fail;
if (d == end2 /* Points to end */
|| (d == end1 && size2 == 0) /* Points to end */
|| SYNTAX (d == end1 ? *string2 : *d) != Sword) /* Next char not a letter */
break;
goto fail;
#ifdef emacs
case before_dot:
if (((d - string2 <= (unsigned) size2)
? d - bf_p2 : d - bf_p1)
<= point)
goto fail;
break;
case at_dot:
if (((d - string2 <= (unsigned) size2)
? d - bf_p2 : d - bf_p1)
== point)
goto fail;
break;
case after_dot:
if (((d - string2 <= (unsigned) size2)
? d - bf_p2 : d - bf_p1)
>= point)
goto fail;
break;
case wordchar:
mcnt = (int) Sword;
goto matchsyntax;
case syntaxspec:
mcnt = *p++;
matchsyntax:
PREFETCH;
if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
break;
case notwordchar:
mcnt = (int) Sword;
goto matchnotsyntax;
case notsyntaxspec:
mcnt = *p++;
matchnotsyntax:
PREFETCH;
if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
break;
#else
case wordchar:
PREFETCH;
if (SYNTAX (*d++) == 0) goto fail;
break;
case notwordchar:
PREFETCH;
if (SYNTAX (*d++) != 0) goto fail;
break;
#endif /* not emacs */
case begbuf:
if (d == string1) /* Note, d cannot equal string2 */
break; /* unless string1 == string2. */
goto fail;
case endbuf:
if (d == end2 || (d == end1 && size2 == 0))
break;
goto fail;
case exactn:
/* Match the next few pattern characters exactly.
mcnt is how many characters to match. */
mcnt = *p++;
if (translate)
{
do
{
PREFETCH;
if (translate[*d++] != *p++) goto fail;
}
while (--mcnt);
}
else
{
do
{
PREFETCH;
if (*d++ != *p++) goto fail;
}
while (--mcnt);
}
break;
}
continue; /* Successfully matched one pattern command; keep matching */
/* Jump here if any matching operation fails. */
fail:
if (stackp != stackb)
/* A restart point is known. Restart there and pop it. */
{
if (!stackp[-2])
{ /* If innermost failure point is dormant, flush it and keep looking */
stackp -= 2;
goto fail;
}
d = *--stackp;
p = *--stackp;
if (d >= string1 && d <= end1)
dend = end_match_1;
}
else break; /* Matching at this starting point really fails! */
}
return -1; /* Failure to match */
}
static int
bcmp_translate (s1, s2, len, translate)
unsigned char *s1, *s2;
register int len;
unsigned char *translate;
{
register unsigned char *p1 = s1, *p2 = s2;
while (len)
{
if (translate [*p1++] != translate [*p2++]) return 1;
len--;
}
return 0;
}
/* Entry points compatible with bsd4.2 regex library */
#ifndef emacs
static struct re_pattern_buffer re_comp_buf;
char *
re_comp (s)
char *s;
{
if (!s)
{
if (!re_comp_buf.buffer)
return "No previous regular expression";
return 0;
}
if (!re_comp_buf.buffer)
{
if (!(re_comp_buf.buffer = (char *) malloc (200)))
return "Memory exhausted";
re_comp_buf.allocated = 200;
if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
return "Memory exhausted";
}
return re_compile_pattern (s, strlen (s), &re_comp_buf);
}
int
re_exec (s)
char *s;
{
int len = strlen (s);
return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
}
#endif /* emacs */
#ifdef test
#include <stdio.h>
/* Indexed by a character, gives the upper case equivalent of the character */
static char upcase[0400] =
{ 000, 001, 002, 003, 004, 005, 006, 007,
010, 011, 012, 013, 014, 015, 016, 017,
020, 021, 022, 023, 024, 025, 026, 027,
030, 031, 032, 033, 034, 035, 036, 037,
040, 041, 042, 043, 044, 045, 046, 047,
050, 051, 052, 053, 054, 055, 056, 057,
060, 061, 062, 063, 064, 065, 066, 067,
070, 071, 072, 073, 074, 075, 076, 077,
0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
};
main (argc, argv)
int argc;
char **argv;
{
char pat[80];
struct re_pattern_buffer buf;
int i;
char c;
char fastmap[(1 << BYTEWIDTH)];
/* Allow a command argument to specify the style of syntax. */
if (argc > 1)
obscure_syntax = atoi (argv[1]);
buf.allocated = 40;
buf.buffer = (char *) malloc (buf.allocated);
buf.fastmap = fastmap;
buf.translate = upcase;
while (1)
{
gets (pat);
if (*pat)
{
re_compile_pattern (pat, strlen(pat), &buf);
/* for (i = 0; i < buf.used; i++)
printchar (buf.buffer[i]);
*/
putchar ('\n');
printf ("%d allocated, %d used.\n", buf.allocated, buf.used);
re_compile_fastmap (&buf);
printf ("Allowed by fastmap: ");
for (i = 0; i < (1 << BYTEWIDTH); i++)
if (fastmap[i]) printchar (i);
putchar ('\n');
}
gets (pat); /* Now read the string to match against */
i = re_match (&buf, pat, strlen (pat), 0, 0);
printf ("Match value %d.\n", i);
}
}
#ifdef NOTDEF
print_buf (bufp)
struct re_pattern_buffer *bufp;
{
int i;
printf ("buf is :\n----------------\n");
for (i = 0; i < bufp->used; i++)
printchar (bufp->buffer[i]);
printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
printf ("Allowed by fastmap: ");
for (i = 0; i < (1 << BYTEWIDTH); i++)
if (bufp->fastmap[i])
printchar (i);
printf ("\nAllowed by translate: ");
if (bufp->translate)
for (i = 0; i < (1 << BYTEWIDTH); i++)
if (bufp->translate[i])
printchar (i);
printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
}
#endif
printchar (c)
char c;
{
if (c < 041 || c >= 0177)
{
putchar ('\\');
putchar (((c >> 6) & 3) + '0');
putchar (((c >> 3) & 7) + '0');
putchar ((c & 7) + '0');
}
else
putchar (c);
}
regerror (string)
char *string;
{
puts (string);
exit (1);
}
#endif /* test */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -