📄 regexp.c
字号:
} if (digit != '}') { FAIL("Bad characters after \\{"); } else if (to < from || to == 0 || from >= 255) { FAIL("Invalid range for \\{ \\}"); } re->minlen += from; } else#endif if (peek != M_SPLAT) { re->minlen++; } /* it is okay -- make it prefix instead of postfix */ ADD_META(build, peek);#ifndef CRUNCH if (peek == M_RANGE) { *build++ = from; *build++ = (to < 255 ? to : 255); }#endif /* take care of "needfirst" - is this the first char? */ if (needfirst && peek == M_PLUS && !IS_META(token)) { re->first = token; } needfirst = 0; /* we used "peek" -- need to refill it */ peek = gettoken(&exp, re); if (IS_CLOSURE(peek)) { FAIL("* or \\+ or \\? doubled up"); } } else if (!IS_META(token)) { /* normal char is NOT argument of closure */ if (needfirst) { re->first = token; needfirst = 0; } re->minlen++; } else if (token == M_ANY || IS_CLASS(token)) { /* . or [] is NOT argument of closure */ needfirst = 0; re->minlen++; } /* the "token" character is not closure -- process it normally */ if (token == M_BEGLINE) { /* set the BOL flag instead of storing M_BEGLINE */ re->bol = 1; } else if (IS_META(token)) { ADD_META(build, token); } else { *build++ = token; } } /* end it with a \) which MUST MATCH the opening \( */ ADD_META(build, M_END(0)); if (end_sp > 0) { FAIL("Not enough \\)s"); } return re;}/*---------------------------------------------------------------------------*//* This function checks for a match between a character and a token which is * known to represent a single character. It returns 0 if they match, or * 1 if they don't. */int match1(re, ch, token) regexp *re; REG char ch; REG int token;{ if (!ch) { /* the end of a line can't match any RE of width 1 */ return 1; } if (token == M_ANY) { return 0; } else if (IS_CLASS(token)) { if (re->program[1 + 32 * (token - M_CLASS(0)) + (ch >> 3)] & (1 << (ch & 7))) return 0; } else if (ch == token || *o_ignorecase && tolower(ch) == tolower(token)) { return 0; } return 1;}/* This function checks characters up to and including the next closure, at * which point it does a recursive call to check the rest of it. This function * returns 0 if everything matches, or 1 if something doesn't match. */int match(re, str, prog, here) regexp *re; /* the regular expression */ char *str; /* the string */ REG char *prog; /* a portion of re->program, an compiled RE */ REG char *here; /* a portion of str, the string to compare it to */{ REG int token; /* the roken pointed to by prog */ REG int nmatched;/* counter, used during closure matching */ REG int closure;/* the token denoting the type of closure */ int from; /* minimum number of matches in closure */ int to; /* maximum number of matches in closure */ for (token = GET_META(prog); !IS_CLOSURE(token); prog++, token = GET_META(prog)) { switch (token) { /*case M_BEGLINE: can't happen; re->bol is used instead */ case M_ENDLINE: if (*here) return 1; break; case M_BEGWORD: if (here != str && (here[-1] == '_' || isalnum(here[-1]))) return 1; break; case M_ENDWORD: if (here[0] == '_' || isalnum(here[0])) return 1; break; case M_START(0): case M_START(1): case M_START(2): case M_START(3): case M_START(4): case M_START(5): case M_START(6): case M_START(7): case M_START(8): case M_START(9): re->startp[token - M_START(0)] = (char *)here; break; case M_END(0): case M_END(1): case M_END(2): case M_END(3): case M_END(4): case M_END(5): case M_END(6): case M_END(7): case M_END(8): case M_END(9): re->endp[token - M_END(0)] = (char *)here; if (token == M_END(0)) { return 0; } break; default: /* literal, M_CLASS(n), or M_ANY */ if (match1(re, *here, token) != 0) return 1; here++; } } /* C L O S U R E */ /* step 1: see what we have to match against, and move "prog" to point * to the remainder of the compiled RE. */ closure = token; prog++; switch (closure) { case M_SPLAT: from = 0; to = strlen(str); /* infinity */ break; case M_PLUS: from = 1; to = strlen(str); /* infinity */ break; case M_QMARK: from = 0; to = 1; break;#ifndef CRUNCH case M_RANGE: from = UCHAR(*prog++); to = UCHAR(*prog++); if (to == 255) { to = strlen(str); /* infinity */ } break;#endif } token = GET_META(prog); prog++; /* step 2: see how many times we can match that token against the string */ for (nmatched = 0; nmatched < to && *here && match1(re, *here, token) == 0; nmatched++, here++) { } /* step 3: try to match the remainder, and back off if it doesn't */ while (nmatched >= from && match(re, str, prog, here) != 0) { nmatched--; here--; } /* so how did it work out? */ if (nmatched >= from) return 0; return 1;}/* This function searches through a string for text that matches an RE. */int regexec(re, str, bol) regexp *re; /* the compiled regexp to search for */ char *str; /* the string to search through */ int bol; /* boolean: does str start at the beginning of a line? */{ char *prog; /* the entry point of re->program */ int len; /* length of the string */ REG char *here; /* if must start at the beginning of a line, and this isn't, then fail */ if (re->bol && !bol) { return 0; } len = strlen(str); prog = re->program + 1 + 32 * re->program[0]; /* search for the RE in the string */ if (re->bol) { /* must occur at BOL */ if ((re->first && match1(re, *(char *)str, re->first))/* wrong first letter? */ || len < re->minlen /* not long enough? */ || match(re, (char *)str, prog, str)) /* doesn't match? */ return 0; /* THEN FAIL! */ }#ifndef CRUNCH else if (!*o_ignorecase) { /* can occur anywhere in the line, noignorecase */ for (here = (char *)str; (re->first && re->first != *here) || match(re, (char *)str, prog, here); here++, len--) { if (len < re->minlen) return 0; } }#endif else { /* can occur anywhere in the line, ignorecase */ for (here = (char *)str; (re->first && match1(re, *here, (int)re->first)) || match(re, (char *)str, prog, here); here++, len--) { if (len < re->minlen) return 0; } } /* if we didn't fail, then we must have succeeded */ return 1;}/*============================================================================*/#else /* NO_MAGIC */regexp *regcomp(exp) char *exp;{ char *src; char *dest; regexp *re; int i; /* allocate a big enough regexp structure */#ifdef lint re = (regexp *)0;#else re = (regexp *)malloc((unsigned)(strlen(exp) + 1 + sizeof(struct regexp)));#endif if (!re) { regerror("Could not malloc a regexp structure"); return (regexp *)0; } /* initialize all fields of the structure */ for (i = 0; i < NSUBEXP; i++) { re->startp[i] = re->endp[i] = (char *)0; } re->minlen = 0; re->first = 0; re->bol = 0; /* copy the string into it, translating ^ and $ as needed */ for (src = exp, dest = re->program + 1; *src; src++) { switch (*src) { case '^': if (src == exp) { re->bol += 1; } else { *dest++ = '^'; re->minlen++; } break; case '$': if (!src[1]) { re->bol += 2; } else { *dest++ = '$'; re->minlen++; } break; case '\\': if (src[1]) { *dest++ = *++src; re->minlen++; } else { regerror("extra \\ at end of regular expression"); } break; default: *dest++ = *src; re->minlen++; } } *dest = '\0'; return re;}/* This "helper" function checks for a match at a given location. It returns * 1 if it matches, 0 if it doesn't match here but might match later on in the * string, or -1 if it could not possibly match */static int reghelp(prog, string, bolflag) struct regexp *prog; char *string; int bolflag;{ char *scan; char *str; /* if ^, then require bolflag */ if ((prog->bol & 1) && !bolflag) { return -1; } /* if it matches, then it will start here */ prog->startp[0] = string; /* compare, possibly ignoring case */ if (*o_ignorecase) { for (scan = &prog->program[1]; *scan; scan++, string++) if (tolower(*scan) != tolower(*string)) return *string ? 0 : -1; } else { for (scan = &prog->program[1]; *scan; scan++, string++) if (*scan != *string) return *string ? 0 : -1; } /* if $, then require string to end here, too */ if ((prog->bol & 2) && *string) { return 0; } /* if we get to here, it matches */ prog->endp[0] = string; return 1;}int regexec(prog, string, bolflag) struct regexp *prog; char *string; int bolflag;{ int rc; /* keep trying to match it */ for (rc = reghelp(prog, string, bolflag); rc == 0; rc = reghelp(prog, string, 0)) { string++; } /* did we match? */ return rc == 1;}#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -