📄 regexp.c
字号:
register char op; register char *next; int flags; ret = regatom(&flags,rcstate); if (ret == NULL) return(NULL); op = *rcstate->regparse; if (!ISMULT(op)) { *flagp = flags; return(ret); } if (!(flags&HASWIDTH) && op != '?') FAIL("*+ operand could be empty"); *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH); if (op == '*' && (flags&SIMPLE)) reginsert(STAR, ret, rcstate); else if (op == '*') { /* Emit x* as (x&|), where & means "self". */ reginsert(BRANCH, ret, rcstate); /* Either x */ regoptail(ret, regnode(BACK,rcstate)); /* and loop */ regoptail(ret, ret); /* back */ regtail(ret, regnode(BRANCH,rcstate)); /* or */ regtail(ret, regnode(NOTHING,rcstate)); /* null. */ } else if (op == '+' && (flags&SIMPLE)) reginsert(PLUS, ret, rcstate); else if (op == '+') { /* Emit x+ as x(&|), where & means "self". */ next = regnode(BRANCH,rcstate); /* Either */ regtail(ret, next); regtail(regnode(BACK,rcstate), ret); /* loop back */ regtail(next, regnode(BRANCH,rcstate)); /* or */ regtail(ret, regnode(NOTHING,rcstate)); /* null. */ } else if (op == '?') { /* Emit x? as (x|) */ reginsert(BRANCH, ret, rcstate); /* Either x */ regtail(ret, regnode(BRANCH,rcstate)); /* or */ next = regnode(NOTHING,rcstate); /* null. */ regtail(ret, next); regoptail(ret, next); } rcstate->regparse++; if (ISMULT(*rcstate->regparse)) FAIL("nested *?+"); return(ret);}/* - regatom - the lowest level * * Optimization: gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and * faster to run. Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */static char *regatom(flagp, rcstate)int *flagp;struct regcomp_state *rcstate;{ register char *ret; int flags; *flagp = WORST; /* Tentatively. */ switch (*rcstate->regparse++) { case '^': ret = regnode(BOL,rcstate); break; case '$': ret = regnode(EOL,rcstate); break; case '.': ret = regnode(ANY,rcstate); *flagp |= HASWIDTH|SIMPLE; break; case '[': { register int clss; register int classend; if (*rcstate->regparse == '^') { /* Complement of range. */ ret = regnode(ANYBUT,rcstate); rcstate->regparse++; } else ret = regnode(ANYOF,rcstate); if (*rcstate->regparse == ']' || *rcstate->regparse == '-') regc(*rcstate->regparse++,rcstate); while (*rcstate->regparse != '\0' && *rcstate->regparse != ']') { if (*rcstate->regparse == '-') { rcstate->regparse++; if (*rcstate->regparse == ']' || *rcstate->regparse == '\0') regc('-',rcstate); else { clss = UCHARAT(rcstate->regparse-2)+1; classend = UCHARAT(rcstate->regparse); if (clss > classend+1) FAIL("invalid [] range"); for (; clss <= classend; clss++) regc((char)clss,rcstate); rcstate->regparse++; } } else regc(*rcstate->regparse++,rcstate); } regc('\0',rcstate); if (*rcstate->regparse != ']') FAIL("unmatched []"); rcstate->regparse++; *flagp |= HASWIDTH|SIMPLE; } break; case '(': ret = reg(1, &flags, rcstate); if (ret == NULL) return(NULL); *flagp |= flags&(HASWIDTH|SPSTART); break; case '\0': case '|': case ')': FAIL("internal urp"); /* Supposed to be caught earlier. */ /* NOTREACHED */ case '?': case '+': case '*': FAIL("?+* follows nothing"); /* NOTREACHED */ case '\\': if (*rcstate->regparse == '\0') FAIL("trailing \\"); ret = regnode(EXACTLY,rcstate); regc(*rcstate->regparse++,rcstate); regc('\0',rcstate); *flagp |= HASWIDTH|SIMPLE; break; default: { register int len; register char ender; rcstate->regparse--; len = strcspn(rcstate->regparse, META); if (len <= 0) FAIL("internal disaster"); ender = *(rcstate->regparse+len); if (len > 1 && ISMULT(ender)) len--; /* Back off clear of ?+* operand. */ *flagp |= HASWIDTH; if (len == 1) *flagp |= SIMPLE; ret = regnode(EXACTLY,rcstate); while (len > 0) { regc(*rcstate->regparse++,rcstate); len--; } regc('\0',rcstate); } break; } return(ret);}/* - regnode - emit a node */static char * /* Location. */regnode(op, rcstate)int op;struct regcomp_state *rcstate;{ register char *ret; register char *ptr; ret = rcstate->regcode; if (ret == ®dummy) { rcstate->regsize += 3; return(ret); } ptr = ret; *ptr++ = (char)op; *ptr++ = '\0'; /* Null "next" pointer. */ *ptr++ = '\0'; rcstate->regcode = ptr; return(ret);}/* - regc - emit (if appropriate) a byte of code */static voidregc(b, rcstate)int b;struct regcomp_state *rcstate;{ if (rcstate->regcode != ®dummy) *rcstate->regcode++ = (char)b; else rcstate->regsize++;}/* - reginsert - insert an operator in front of already-emitted operand * * Means relocating the operand. */static voidreginsert(op, opnd, rcstate)int op;char *opnd;struct regcomp_state *rcstate;{ register char *src; register char *dst; register char *place; if (rcstate->regcode == ®dummy) { rcstate->regsize += 3; return; } src = rcstate->regcode; rcstate->regcode += 3; dst = rcstate->regcode; while (src > opnd) *--dst = *--src; place = opnd; /* Op node, where operand used to be. */ *place++ = (char)op; *place++ = '\0'; *place = '\0';}/* - regtail - set the next-pointer at the end of a node chain */static voidregtail(p, val)char *p;char *val;{ register char *scan; register char *temp; register int offset; if (p == ®dummy) return; /* Find last node. */ scan = p; for (;;) { temp = regnext(scan); if (temp == NULL) break; scan = temp; } if (OP(scan) == BACK) offset = scan - val; else offset = val - scan; *(scan+1) = (char)((offset>>8)&0377); *(scan+2) = (char)(offset&0377);}/* - regoptail - regtail on operand of first argument; nop if operandless */static voidregoptail(p, val)char *p;char *val;{ /* "Operandless" and "op != BRANCH" are synonymous in practice. */ if (p == NULL || p == ®dummy || OP(p) != BRANCH) return; regtail(OPERAND(p), val);}/* * TclRegExec and friends *//* * Global work variables for TclRegExec(). */struct regexec_state { char *reginput; /* String-input pointer. */ char *regbol; /* Beginning of input, for ^ check. */ char **regstartp; /* Pointer to startp array. */ char **regendp; /* Ditto for endp. */};/* * Forwards. */static int regtry _ANSI_ARGS_((regexp *prog, char *string, struct regexec_state *restate));static int regmatch _ANSI_ARGS_((char *prog, struct regexec_state *restate));static int regrepeat _ANSI_ARGS_((char *p, struct regexec_state *restate));#ifdef DEBUGint regnarrate = 0;void regdump _ANSI_ARGS_((regexp *r));static char *regprop _ANSI_ARGS_((char *op));#endif/* - TclRegExec - match a regexp against a string */intTclRegExec(prog, string, start)register regexp *prog;register char *string;char *start;{ register char *s; struct regexec_state state; struct regexec_state *restate= &state; /* Be paranoid... */ if (prog == NULL || string == NULL) { TclRegError("NULL parameter"); return(0); } /* Check validity of program. */ if (UCHARAT(prog->program) != MAGIC) { TclRegError("corrupted program"); return(0); } /* If there is a "must appear" string, look for it. */ if (prog->regmust != NULL) { s = string; while ((s = strchr(s, prog->regmust[0])) != NULL) { if (strncmp(s, prog->regmust, (size_t) prog->regmlen) == 0) break; /* Found it. */ s++; } if (s == NULL) /* Not present. */ return(0); } /* Mark beginning of line for ^ . */ restate->regbol = start; /* Simplest case: anchored match need be tried only once. */ if (prog->reganch) return(regtry(prog, string, restate)); /* Messy cases: unanchored match. */ s = string; if (prog->regstart != '\0') /* We know what char it must start with. */ while ((s = strchr(s, prog->regstart)) != NULL) { if (regtry(prog, s, restate)) return(1); s++; } else /* We don't -- general case. */ do { if (regtry(prog, s, restate)) return(1); } while (*s++ != '\0'); /* Failure. */ return(0);}/* - regtry - try match at specific point */static int /* 0 failure, 1 success */regtry(prog, string, restate)regexp *prog;char *string;struct regexec_state *restate;{ register int i; register char **sp; register char **ep; restate->reginput = string; restate->regstartp = prog->startp; restate->regendp = prog->endp; sp = prog->startp; ep = prog->endp; for (i = NSUBEXP; i > 0; i--) { *sp++ = NULL; *ep++ = NULL; } if (regmatch(prog->program + 1,restate)) { prog->startp[0] = string; prog->endp[0] = restate->reginput; return(1); } else return(0);}/* - regmatch - main matching routine * * Conceptually the strategy is simple: check to see whether the current * node matches, call self recursively to see whether the rest matches, * and then act accordingly. In practice we make some effort to avoid * recursion, in particular by going through "ordinary" nodes (that don't * need to know whether the rest of the match failed) by a loop instead of * by recursion. */static int /* 0 failure, 1 success */regmatch(prog, restate)char *prog;struct regexec_state *restate;{ register char *scan; /* Current node. */ char *next; /* Next node. */ scan = prog;#ifdef DEBUG if (scan != NULL && regnarrate) fprintf(stderr, "%s(\n", regprop(scan));#endif while (scan != NULL) {#ifdef DEBUG if (regnarrate) fprintf(stderr, "%s...\n", regprop(scan));#endif next = regnext(scan); switch (OP(scan)) { case BOL: if (restate->reginput != restate->regbol) { return 0; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -