📄 regex.c
字号:
to the following jump, because tensioning the jumps is a hassle.) */ /* The start of a stupid repeat has an on_failure_jump that points past the end of the repeat text. This makes a failure point so that, on failure to match a repetition, matching restarts past as many repetitions have been found with no way to fail and look for another one. */ /* A smart repeat is similar but loops back to the on_failure_jump so that each repetition makes another failure point. */ case on_failure_jump: if (stackp == stacke) { char **stackx; if (stacke - stackb > re_max_failures) return -2; stackx = (char **) alloca (2 * (stacke - stackb) * sizeof (char *)); ALLOCA_CHECK(stackx); bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *)); stackp += stackx - stackb; stacke = stackx + 2 * (stacke - stackb); stackb = stackx; } mcnt = *p++ & 0377; mcnt += SIGN_EXTEND_CHAR (*p) << 8; p++; *stackp++ = mcnt + p; *stackp++ = d; break; /* The end of a smart repeat has an maybe_finalize_jump back. Change it either to a finalize_jump or an ordinary jump. */ case maybe_finalize_jump: mcnt = *p++ & 0377; mcnt += SIGN_EXTEND_CHAR (*p) << 8; p++; /* Compare what follows with the begining of the repeat. If we can establish that there is nothing that they would both match, we can change to finalize_jump */ if (p == pend) p[-3] = (char) finalize_jump; else if (*p == (char) exactn || *p == (char) endline) { register int c = *p == (char) endline ? '\n' : p[2]; register char *p1 = p + mcnt; /* p1[0] ... p1[2] are an on_failure_jump. Examine what follows that */ if (p1[3] == (char) exactn && p1[5] != c) p[-3] = (char) finalize_jump; else if (p1[3] == (char) charset || p1[3] == (char) charset_not) { int not = p1[3] == (char) charset_not; if (c < p1[4] * BYTEWIDTH && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) not = !not; /* not is 1 if c would match */ /* That means it is not safe to finalize */ if (!not) p[-3] = (char) finalize_jump; } } p -= 2; if (p[-1] != (char) finalize_jump) { p[-1] = (char) jump; goto nofinalize; } /* The end of a stupid repeat has a finalize-jump back to the start, where another failure point will be made which will point after all the repetitions found so far. */ case finalize_jump: stackp -= 2; case jump: nofinalize: mcnt = *p++ & 0377; mcnt += SIGN_EXTEND_CHAR (*p) << 8; p += mcnt + 1; /* The 1 compensates for missing ++ above */ break; case dummy_failure_jump: if (stackp == stacke) { char **stackx = (char **) alloca (2 * (stacke - stackb) * sizeof (char *)); ALLOCA_CHECK(stackx); bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *)); stackp += stackx - stackb; stacke = stackx + 2 * (stacke - stackb); stackb = stackx; } *stackp++ = 0; *stackp++ = 0; goto nofinalize; case wordbound: if (d == string1 /* Points to first char */ || d == end2 /* Points to end */ || (d == end1 && size2 == 0)) /* Points to end */ break; if ((SYNTAX (((unsigned char *)d)[-1]) == Sword) != (SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) == Sword)) break; goto fail; case notwordbound: if (d == string1 /* Points to first char */ || d == end2 /* Points to end */ || (d == end1 && size2 == 0)) /* Points to end */ goto fail; if ((SYNTAX (((unsigned char *)d)[-1]) == Sword) != (SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) == Sword)) goto fail; break; case wordbeg: if (d == end2 /* Points to end */ || (d == end1 && size2 == 0) /* Points to end */ || SYNTAX (*(unsigned char *) (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */ goto fail; /* prev char not letter */ if (d == string1 /* Points to first char */ || SYNTAX (((unsigned char *)d)[-1]) != Sword) break; goto fail; case wordend: if (d == string1 /* Points to first char */ || SYNTAX (((unsigned char *)d)[-1]) != Sword) /* prev char not letter */ goto fail; if (d == end2 /* Points to end */ || (d == end1 && size2 == 0) /* Points to end */ || SYNTAX (d == end1 ? *(unsigned char *)string2 : *(unsigned char *)d) != Sword) /* Next char not a letter */ break; goto fail;#ifdef emacs case before_dot: if (((d - string2 <= (unsigned) size2) ? d - (char *) bf_p2 : d - (char *) bf_p1) <= point) goto fail; break; case at_dot: if (((d - string2 <= (unsigned) size2) ? d - (char *) bf_p2 : d - (char *) bf_p1) == point) goto fail; break; case after_dot: if (((d - string2 <= (unsigned) size2) ? d - (char *) bf_p2 : d - (char *) bf_p1) >= point) goto fail; break; case wordchar: mcnt = (int) Sword; goto matchsyntax; case syntaxspec: mcnt = *p++; matchsyntax: PREFETCH; if (SYNTAX (*(unsigned char *)d++) != (enum syntaxcode) mcnt) goto fail; break; case notwordchar: mcnt = (int) Sword; goto matchnotsyntax; case notsyntaxspec: mcnt = *p++; matchnotsyntax: PREFETCH; if (SYNTAX (*(unsigned char *)d++) == (enum syntaxcode) mcnt) goto fail; break;#else case wordchar: PREFETCH; if (SYNTAX (*(unsigned char *)d++) == 0) goto fail; break; case notwordchar: PREFETCH; if (SYNTAX (*(unsigned char *)d++) != 0) goto fail; break;#endif /* not emacs */ case begbuf: if (d == string1) /* Note, d cannot equal string2 */ break; /* unless string1 == string2. */ goto fail; case endbuf: if (d == end2 || (d == end1 && size2 == 0)) break; goto fail; case exactn: /* Match the next few pattern characters exactly. mcnt is how many characters to match. */ mcnt = *p++; if (translate) { do { PREFETCH; if (translate[*(unsigned char *)d++] != *p++) goto fail; } while (--mcnt); } else { do { PREFETCH; if (*d++ != *p++) goto fail; } while (--mcnt); } break; } continue; /* Successfully matched one pattern command; keep matching */ /* Jump here if any matching operation fails. */ fail: if (stackp != stackb) /* A restart point is known. Restart there and pop it. */ { if (!stackp[-2]) { /* If innermost failure point is dormant, flush it and keep looking */ stackp -= 2; goto fail; } d = *--stackp; p = *--stackp; if (d >= string1 && d <= end1) dend = end_match_1; } else break; /* Matching at this starting point really fails! */ } return -1; /* Failure to match */}static intbcmp_translate (s1, s2, len, translate) unsigned char *s1, *s2; register int len; unsigned char *translate;{ register unsigned char *p1 = s1, *p2 = s2; while (len) { if (translate [*p1++] != translate [*p2++]) return 1; len--; } return 0;}/* Entry points compatible with bsd4.2 regex library */#ifndef emacsstatic struct re_pattern_buffer re_comp_buf;char *re_comp (s) char *s;{ if (!s) { if (!re_comp_buf.buffer) return "No previous regular expression"; return 0; } if (!re_comp_buf.buffer) { if (!(re_comp_buf.buffer = (char *) malloc (200))) return "Memory exhausted"; re_comp_buf.allocated = 200; if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH))) return "Memory exhausted"; } return re_compile_pattern (s, strlen (s), &re_comp_buf);}intre_exec (s) char *s;{ int len = strlen (s); return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);}#endif /* emacs */#ifdef test#include <stdio.h>/* Indexed by a character, gives the upper case equivalent of the character */static char upcase[0400] = { 000, 001, 002, 003, 004, 005, 006, 007, 010, 011, 012, 013, 014, 015, 016, 017, 020, 021, 022, 023, 024, 025, 026, 027, 030, 031, 032, 033, 034, 035, 036, 037, 040, 041, 042, 043, 044, 045, 046, 047, 050, 051, 052, 053, 054, 055, 056, 057, 060, 061, 062, 063, 064, 065, 066, 067, 070, 071, 072, 073, 074, 075, 076, 077, 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107, 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137, 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107, 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177, 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207, 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217, 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227, 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237, 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247, 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257, 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267, 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307, 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317, 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327, 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337, 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347, 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357, 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367, 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377 };main (){ char pat[80]; struct re_pattern_buffer buf; int i; char c; char fastmap[(1 << BYTEWIDTH)]; buf.allocated = 40; buf.buffer = (char *) malloc (buf.allocated); buf.fastmap = fastmap; buf.translate = upcase; while (1) { gets (pat); if (*pat) { re_compile_pattern (pat, strlen(pat), &buf); for (i = 0; i < buf.used; i++) printchar (buf.buffer[i]); putchar ('\n'); printf ("%d allocated, %d used.\n", buf.allocated, buf.used); re_compile_fastmap (&buf); printf ("Allowed by fastmap: "); for (i = 0; i < (1 << BYTEWIDTH); i++) if (fastmap[i]) printchar (i); putchar ('\n'); } gets (pat); /* Now read the string to match against */ i = re_match (&buf, pat, strlen (pat), 0, 0); printf ("Match value %d.\n", i); }}#ifdef NOTDEFprint_buf (bufp) struct re_pattern_buffer *bufp;{ int i; printf ("buf is :\n----------------\n"); for (i = 0; i < bufp->used; i++) printchar (bufp->buffer[i]); printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used); printf ("Allowed by fastmap: "); for (i = 0; i < (1 << BYTEWIDTH); i++) if (bufp->fastmap[i]) printchar (i); printf ("\nAllowed by translate: "); if (bufp->translate) for (i = 0; i < (1 << BYTEWIDTH); i++) if (bufp->translate[i]) printchar (i); printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't"); printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");}#endifprintchar (c) char c;{ if (c < 041 || c >= 0177) { putchar ('\\'); putchar (((c >> 6) & 3) + '0'); putchar (((c >> 3) & 7) + '0'); putchar ((c & 7) + '0'); } else putchar (c);}error (string) char *string;{ puts (string);#ifdef CREATE_IO_WINDOW if (iowinpid) kill(iowinpid, SIGKILL); iowinpid = 0;#endif /* CREATE_IO_WINDOW */ exit (1);}#endif /* test */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -