📄 regex.c
字号:
regs->end[mcnt] = -1; continue; } if (regstart_seg1[mcnt]) regs->start[mcnt] = regstart[mcnt] - string1; else regs->start[mcnt] = regstart[mcnt] - string2 + size1; if (regend_seg1[mcnt]) regs->end[mcnt] = regend[mcnt] - string1; else regs->end[mcnt] = regend[mcnt] - string2 + size1; } } if (dend == end_match_1) return (d - string1 - pos); else return d - string2 + size1 - pos; } /* Otherwise match next pattern command */#ifdef SWITCH_ENUM_BUG switch ((int) ((enum regexpcode) *p++))#else switch ((enum regexpcode) *p++)#endif { /* \( is represented by a start_memory, \) by a stop_memory. Both of those commands contain a "register number" argument. The text matched within the \( and \) is recorded under that number. Then, \<digit> turns into a `duplicate' command which is followed by the numeric value of <digit> as the register number. */ case start_memory: regstart[*p] = d; regstart_seg1[*p++] = (dend == end_match_1); break; case stop_memory: regend[*p] = d; regend_seg1[*p++] = (dend == end_match_1); break; case duplicate: { int regno = *p++; /* Get which register to match against */ register unsigned char *d2, *dend2; /* Don't allow matching a register that hasn't been used. This isn't fully reliable in the current version, but it is better than crashing. */ if ((int) regend[regno] <= -1) goto fail; d2 = regstart[regno]; dend2 = ((regstart_seg1[regno] == regend_seg1[regno]) ? regend[regno] : end_match_1); while (1) { /* Advance to next segment in register contents, if necessary */ while (d2 == dend2) { if (dend2 == end_match_2) break; if (dend2 == regend[regno]) break; d2 = string2, dend2 = regend[regno]; /* end of string1 => advance to string2. */ } /* At end of register contents => success */ if (d2 == dend2) break; /* Advance to next segment in data being matched, if necessary */ PREFETCH; /* mcnt gets # consecutive chars to compare */ mcnt = dend - d; if (mcnt > dend2 - d2) mcnt = dend2 - d2; /* Compare that many; failure if mismatch, else skip them. */ if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt)) goto fail; d += mcnt, d2 += mcnt; } } break; case anychar: /* fetch a data character */ PREFETCH; /* Match anything but a newline. */ if ((translate ? translate[*d++] : *d++) == '\n') goto fail; break; case charset: case charset_not: { /* Nonzero for charset_not */ int not = 0; register int c; if (*(p - 1) == (unsigned char) charset_not) not = 1; /* fetch a data character */ PREFETCH; if (translate) c = translate [*d]; else c = *d; if (c < *p * BYTEWIDTH && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) not = !not; p += 1 + *p; if (!not) goto fail; d++; break; } case begline: if (d == string1 || d[-1] == '\n') break; goto fail; case endline: if (d == end2 || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n')) break; goto fail; /* "or" constructs ("|") are handled by starting each alternative with an on_failure_jump that points to the start of the next alternative. Each alternative except the last ends with a jump to the joining point. (Actually, each jump except for the last one really jumps to the following jump, because tensioning the jumps is a hassle.) */ /* The start of a stupid repeat has an on_failure_jump that points past the end of the repeat text. This makes a failure point so that, on failure to match a repetition, matching restarts past as many repetitions have been found with no way to fail and look for another one. */ /* A smart repeat is similar but loops back to the on_failure_jump so that each repetition makes another failure point. */ case on_failure_jump: if (stackp == stacke) { unsigned char **stackx; if (stacke - stackb > re_max_failures) return -2; stackx = (unsigned char **) alloca (2 * (stacke - stackb) * sizeof (char *)); bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *)); stackp = stackx + (stackp - stackb); stacke = stackx + 2 * (stacke - stackb); stackb = stackx; } mcnt = *p++ & 0377; mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8; p++; *stackp++ = mcnt + p; *stackp++ = d; break; /* The end of a smart repeat has an maybe_finalize_jump back. Change it either to a finalize_jump or an ordinary jump. */ case maybe_finalize_jump: mcnt = *p++ & 0377; mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8; p++; /* Compare what follows with the begining of the repeat. If we can establish that there is nothing that they would both match, we can change to finalize_jump */ if (p == pend) p[-3] = (unsigned char) finalize_jump; else if (*p == (unsigned char) exactn || *p == (unsigned char) endline) { register int c = *p == (unsigned char) endline ? '\n' : p[2]; register unsigned char *p1 = p + mcnt; /* p1[0] ... p1[2] are an on_failure_jump. Examine what follows that */ if (p1[3] == (unsigned char) exactn && p1[5] != c) p[-3] = (unsigned char) finalize_jump; else if (p1[3] == (unsigned char) charset || p1[3] == (unsigned char) charset_not) { int not = p1[3] == (unsigned char) charset_not; if (c < p1[4] * BYTEWIDTH && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) not = !not; /* not is 1 if c would match */ /* That means it is not safe to finalize */ if (!not) p[-3] = (unsigned char) finalize_jump; } } p -= 2; if (p[-1] != (unsigned char) finalize_jump) { p[-1] = (unsigned char) jump; goto nofinalize; } /* The end of a stupid repeat has a finalize-jump back to the start, where another failure point will be made which will point after all the repetitions found so far. */ case finalize_jump: stackp -= 2; case jump: nofinalize: mcnt = *p++ & 0377; mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8; p += mcnt + 1; /* The 1 compensates for missing ++ above */ break; case dummy_failure_jump: if (stackp == stacke) { unsigned char **stackx = (unsigned char **) alloca (2 * (stacke - stackb) * sizeof (char *)); bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *)); stackp = stackx + (stackp - stackb); stacke = stackx + 2 * (stacke - stackb); stackb = stackx; } *stackp++ = 0; *stackp++ = 0; goto nofinalize; case wordbound: if (d == string1 /* Points to first char */ || d == end2 /* Points to end */ || (d == end1 && size2 == 0)) /* Points to end */ break; if ((SYNTAX (d[-1]) == Sword) != (SYNTAX (d == end1 ? *string2 : *d) == Sword)) break; goto fail; case notwordbound: if (d == string1 /* Points to first char */ || d == end2 /* Points to end */ || (d == end1 && size2 == 0)) /* Points to end */ goto fail; if ((SYNTAX (d[-1]) == Sword) != (SYNTAX (d == end1 ? *string2 : *d) == Sword)) goto fail; break; case wordbeg: if (d == end2 /* Points to end */ || (d == end1 && size2 == 0) /* Points to end */ || SYNTAX (* (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */ goto fail; if (d == string1 /* Points to first char */ || SYNTAX (d[-1]) != Sword) /* prev char not letter */ break; goto fail; case wordend: if (d == string1 /* Points to first char */ || SYNTAX (d[-1]) != Sword) /* prev char not letter */ goto fail; if (d == end2 /* Points to end */ || (d == end1 && size2 == 0) /* Points to end */ || SYNTAX (d == end1 ? *string2 : *d) != Sword) /* Next char not a letter */ break; goto fail;#ifdef emacs case before_dot: if (PTR_CHAR_POS (d) + 1 >= point) goto fail; break; case at_dot: if (PTR_CHAR_POS (d) + 1 != point) goto fail; break; case after_dot: if (PTR_CHAR_POS (d) + 1 <= point) goto fail; break; case wordchar: mcnt = (int) Sword; goto matchsyntax; case syntaxspec: mcnt = *p++; matchsyntax: PREFETCH; if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail; break; case notwordchar: mcnt = (int) Sword; goto matchnotsyntax; case notsyntaxspec: mcnt = *p++; matchnotsyntax: PREFETCH; if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail; break;#else case wordchar: PREFETCH; if (SYNTAX (*d++) == 0) goto fail; break; case notwordchar: PREFETCH; if (SYNTAX (*d++) != 0) goto fail; break;#endif not emacs case begbuf: if (d == string1) /* Note, d cannot equal string2 */ break; /* unless string1 == string2. */ goto fail; case endbuf: if (d == end2 || (d == end1 && size2 == 0)) break; goto fail; case exactn: /* Match the next few pattern characters exactly. mcnt is how many characters to match. */ mcnt = *p++; if (translate) { do { PREFETCH; if (translate[*d++] != *p++) goto fail; } while (--mcnt); } else { do { PREFETCH; if (*d++ != *p++) goto fail; } while (--mcnt); } break; } continue; /* Successfully matched one pattern command; keep matching */ /* Jump here if any matching operation fails. */ fail: if (stackp != stackb) /* A restart point is known. Restart there and pop it. */ { if (!stackp[-2]) { /* If innermost failure point is dormant, flush it and keep looking */ stackp -= 2; goto fail; } d = *--stackp; p = *--stackp; if (d >= string1 && d <= end1) dend = end_match_1; } else break; /* Matching at this starting point really fails! */ } return -1; /* Failure to match */}static intbcmp_translate (s1, s2, len, translate) unsigned char *s1, *s2; register int len; unsigned char *translate;{ register unsigned char *p1 = s1, *p2 = s2; while (len) { if (translate [*p1++] != translate [*p2++]) return 1; len--; } return 0;}/* Entry points compatible with bsd4.2 regex library */#ifndef emacsstatic struct re_pattern_buffer re_comp_buf;char *re_comp (s) char *s;{ if (!s) { if (!re_comp_buf.buffer) return "No previous regular expression"; return 0; } if (!re_comp_buf.buffer) { if (!(re_comp_buf.buffer = (char *) malloc (200))) return "Memory exhausted"; re_comp_buf.allocated = 200; if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH))) return "Memory exhausted"; } return re_compile_pattern (s, strlen (s), &re_comp_buf);}intre_exec (s) char *s;{ int len = strlen (s); return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);}#endif /* emacs */#ifdef test#include <stdio.h>/* Indexed by a character, gives the upper case equivalent of the character */static char upcase[0400] = { 000, 001, 002, 003, 004, 005, 006, 007, 010, 011, 012, 013, 014, 015, 016, 017, 020, 021, 022, 023, 024, 025, 026, 027, 030, 031, 032, 033, 034, 035, 036, 037, 040, 041, 042, 043, 044, 045, 046, 047, 050, 051, 052, 053, 054, 055, 056, 057, 060, 061, 062, 063, 064, 065, 066, 067, 070, 071, 072, 073, 074, 075, 076, 077, 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107, 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137, 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107, 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177, 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207, 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217, 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227, 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237, 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247, 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257, 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267, 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277, 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307, 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317, 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327, 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337, 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347, 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357, 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367, 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377 };main (argc, argv) int argc; char **argv;{ char pat[80]; struct re_pattern_buffer buf; int i; char c; char fastmap[(1 << BYTEWIDTH)]; /* Allow a command argument to specify the style of syntax. */ if (argc > 1) obscure_syntax = atoi (argv[1]); buf.allocated = 40; buf.buffer = (char *) malloc (buf.allocated); buf.fastmap = fastmap; buf.translate = upcase; while (1) { gets (pat); if (*pat) { re_compile_pattern (pat, strlen(pat), &buf); for (i = 0; i < buf.used; i++) printchar (buf.buffer[i]); putchar ('\n'); printf ("%d allocated, %d used.\n", buf.allocated, buf.used); re_compile_fastmap (&buf); printf ("Allowed by fastmap: "); for (i = 0; i < (1 << BYTEWIDTH); i++) if (fastmap[i]) printchar (i); putchar ('\n'); } gets (pat); /* Now read the string to match against */ i = re_match (&buf, pat, strlen (pat), 0, 0); printf ("Match value %d.\n", i); }}#ifdef NOTDEFprint_buf (bufp) struct re_pattern_buffer *bufp;{ int i; printf ("buf is :\n----------------\n"); for (i = 0; i < bufp->used; i++) printchar (bufp->buffer[i]); printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used); printf ("Allowed by fastmap: "); for (i = 0; i < (1 << BYTEWIDTH); i++) if (bufp->fastmap[i]) printchar (i); printf ("\nAllowed by translate: "); if (bufp->translate) for (i = 0; i < (1 << BYTEWIDTH); i++) if (bufp->translate[i]) printchar (i); printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't"); printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");}#endifprintchar (c) char c;{ if (c < 041 || c >= 0177) { putchar ('\\'); putchar (((c >> 6) & 3) + '0'); putchar (((c >> 3) & 7) + '0'); putchar ((c & 7) + '0'); } else putchar (c);}error (string) char *string;{ puts (string); exit (1);}#endif test
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -