📄 hsregex.c
字号:
*flagp |= flags&SPSTART; while (*cp->regparse == '|') { cp->regparse++; br = regbranch(cp, &flags); if (br == NULL) return(NULL); regtail(cp, ret, br); /* BRANCH -> BRANCH. */ *flagp &= ~(~flags&HASWIDTH); *flagp |= flags&SPSTART; } /* Make a closing node, and hook it on the end. */ ender = regnode(cp, (paren) ? CLOSE+parno : END); regtail(cp, ret, ender); /* Hook the tails of the branches to the closing node. */ for (br = ret; br != NULL; br = regnext(br)) regoptail(cp, br, ender); /* Check for proper termination. */ if (paren && *cp->regparse++ != ')') { FAIL("unterminated ()"); } else if (!paren && *cp->regparse != '\0') { if (*cp->regparse == ')') { FAIL("unmatched ()"); } else FAIL("internal error: junk on end"); /* NOTREACHED */ } return(ret);}/* - regbranch - one alternative of an | operator * * Implements the concatenation operator. */static char *regbranch(cp, flagp)register struct comp *cp;int *flagp;{ register char *ret; register char *chain; register char *latest; int flags; register int c; *flagp = WORST; /* Tentatively. */ ret = regnode(cp, BRANCH); chain = NULL; while ((c = *cp->regparse) != '\0' && c != '|' && c != ')') { latest = regpiece(cp, &flags); if (latest == NULL) return(NULL); *flagp |= flags&HASWIDTH; if (chain == NULL) /* First piece. */ *flagp |= flags&SPSTART; else regtail(cp, chain, latest); chain = latest; } if (chain == NULL) /* Loop ran zero times. */ (void) regnode(cp, NOTHING); return(ret);}/* - regpiece - something followed by possible [*+?] * * Note that the branching code sequences used for ? and the general cases * of * and + are somewhat optimized: they use the same NOTHING node as * both the endmarker for their branch list and the body of the last branch. * It might seem that this node could be dispensed with entirely, but the * endmarker role is not redundant. */static char *regpiece(cp, flagp)register struct comp *cp;int *flagp;{ register char *ret; register char op; register char *next; int flags; ret = regatom(cp, &flags); if (ret == NULL) return(NULL); op = *cp->regparse; if (!ISREPN(op)) { *flagp = flags; return(ret); } if (!(flags&HASWIDTH) && op != '?') FAIL("*+ operand could be empty"); switch (op) { case '*': *flagp = WORST|SPSTART; break; case '+': *flagp = WORST|SPSTART|HASWIDTH; break; case '?': *flagp = WORST; break; } if (op == '*' && (flags&SIMPLE)) reginsert(cp, STAR, ret); else if (op == '*') { /* Emit x* as (x&|), where & means "self". */ reginsert(cp, BRANCH, ret); /* Either x */ regoptail(cp, ret, regnode(cp, BACK)); /* and loop */ regoptail(cp, ret, ret); /* back */ regtail(cp, ret, regnode(cp, BRANCH)); /* or */ regtail(cp, ret, regnode(cp, NOTHING)); /* null. */ } else if (op == '+' && (flags&SIMPLE)) reginsert(cp, PLUS, ret); else if (op == '+') { /* Emit x+ as x(&|), where & means "self". */ next = regnode(cp, BRANCH); /* Either */ regtail(cp, ret, next); regtail(cp, regnode(cp, BACK), ret); /* loop back */ regtail(cp, next, regnode(cp, BRANCH)); /* or */ regtail(cp, ret, regnode(cp, NOTHING)); /* null. */ } else if (op == '?') { /* Emit x? as (x|) */ reginsert(cp, BRANCH, ret); /* Either x */ regtail(cp, ret, regnode(cp, BRANCH)); /* or */ next = regnode(cp, NOTHING); /* null. */ regtail(cp, ret, next); regoptail(cp, ret, next); } cp->regparse++; if (ISREPN(*cp->regparse)) FAIL("nested *?+"); return(ret);}/* - regatom - the lowest level * * Optimization: gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and * faster to run. Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */static char *regatom(cp, flagp)register struct comp *cp;int *flagp;{ register char *ret; int flags; *flagp = WORST; /* Tentatively. */ switch (*cp->regparse++) { case '^': ret = regnode(cp, BOL); break; case '$': ret = regnode(cp, EOL); break; case '.': ret = regnode(cp, ANY); *flagp |= HASWIDTH|SIMPLE; break; case '[': { register int range; register int rangeend; register int c; if (*cp->regparse == '^') { /* Complement of range. */ ret = regnode(cp, ANYBUT); cp->regparse++; } else ret = regnode(cp, ANYOF); if ((c = *cp->regparse) == ']' || c == '-') { regc(cp, c); cp->regparse++; } while ((c = *cp->regparse++) != '\0' && c != ']') { if (c != '-') regc(cp, c); else if ((c = *cp->regparse) == ']' || c == '\0') regc(cp, '-'); else { range = (unsigned char)*(cp->regparse-2); rangeend = (unsigned char)c; if (range > rangeend) FAIL("invalid [] range"); for (range++; range <= rangeend; range++) regc(cp, range); cp->regparse++; } } regc(cp, '\0'); if (c != ']') FAIL("unmatched []"); *flagp |= HASWIDTH|SIMPLE; break; } case '(': ret = reg(cp, 1, &flags); if (ret == NULL) return(NULL); *flagp |= flags&(HASWIDTH|SPSTART); break; case '\0': case '|': case ')': /* supposed to be caught earlier */ FAIL("internal error: \\0|) unexpected"); break; case '?': case '+': case '*': FAIL("?+* follows nothing"); break; case '\\': if (*cp->regparse == '\0') FAIL("trailing \\"); ret = regnode(cp, EXACTLY); regc(cp, *cp->regparse++); regc(cp, '\0'); *flagp |= HASWIDTH|SIMPLE; break; default: { register size_t len; register char ender; cp->regparse--; len = strcspn(cp->regparse, META); if (len == 0) FAIL("internal error: strcspn 0"); ender = *(cp->regparse+len); if (len > 1 && ISREPN(ender)) len--; /* Back off clear of ?+* operand. */ *flagp |= HASWIDTH; if (len == 1) *flagp |= SIMPLE; ret = regnode(cp, EXACTLY); for (; len > 0; len--) regc(cp, *cp->regparse++); regc(cp, '\0'); break; } } return(ret);}/* - regnode - emit a node */static char * /* Location. */regnode(cp, op)register struct comp *cp;char op;{ register char *const ret = cp->regcode; register char *ptr; if (!EMITTING(cp)) { cp->regsize += 3; return(ret); } ptr = ret; *ptr++ = op; *ptr++ = '\0'; /* Null next pointer. */ *ptr++ = '\0'; cp->regcode = ptr; return(ret);}/* - regc - emit (if appropriate) a byte of code */static voidregc(cp, b)register struct comp *cp;char b;{ if (EMITTING(cp)) *cp->regcode++ = b; else cp->regsize++;}/* - reginsert - insert an operator in front of already-emitted operand * * Means relocating the operand. */static voidreginsert(cp, op, opnd)register struct comp *cp;char op;char *opnd;{ register char *place; if (!EMITTING(cp)) { cp->regsize += 3; return; } (void) memmove(opnd+3, opnd, (size_t)(cp->regcode - opnd)); cp->regcode += 3; place = opnd; /* Op node, where operand used to be. */ *place++ = op; *place++ = '\0'; *place++ = '\0';}/* - regtail - set the next-pointer at the end of a node chain */static voidregtail(cp, p, val)register struct comp *cp;char *p;char *val;{ register char *scan; register char *temp; register int offset; if (!EMITTING(cp)) return; /* Find last node. */ for (scan = p; (temp = regnext(scan)) != NULL; scan = temp) continue; offset = (OP(scan) == BACK) ? scan - val : val - scan; *(scan+1) = (offset>>8)&0177; *(scan+2) = offset&0377;}/* - regoptail - regtail on operand of first argument; nop if operandless */static voidregoptail(cp, p, val)register struct comp *cp;char *p;char *val;{ /* "Operandless" and "op != BRANCH" are synonymous in practice. */ if (!EMITTING(cp) || OP(p) != BRANCH) return; regtail(cp, OPERAND(p), val);}/* * sqd_regexec and friends *//* * Work-variable struct for sqd_regexec(). */struct exec { char *reginput; /* String-input pointer. */ char *regbol; /* Beginning of input, for ^ check. */ char **regstartp; /* Pointer to startp array. */ char **regendp; /* Ditto for endp. */};/* * Forwards. */static int regtry(struct exec *ep, sqd_regexp *rp, char *string);static int regmatch(struct exec *ep, char *prog);static size_t regrepeat(struct exec *ep, char *node);#ifdef DEBUGint regnarrate = 0;void regdump();static char *regprop();#endif/* - sqd_regexec - match a regexp against a string */intsqd_regexec(prog, str)register sqd_regexp *prog;const char *str;{ register char *string = (char *)str; /* avert const poisoning */ register char *s; struct exec ex; /* Be paranoid. */ if (prog == NULL || string == NULL) { sqd_regerror("NULL argument to sqd_regexec"); return(0); } /* Check validity of program. */ if ((unsigned char)*prog->program != SQD_REGMAGIC) { sqd_regerror("corrupted regexp"); return(0); } /* If there is a "must appear" string, look for it. */ if (prog->regmust != NULL && strstr(string, prog->regmust) == NULL) return(0); /* Mark beginning of line for ^ . */ ex.regbol = string; ex.regstartp = prog->startp; ex.regendp = prog->endp; /* Simplest case: anchored match need be tried only once. */ if (prog->reganch) return(regtry(&ex, prog, string)); /* Messy cases: unanchored match. */ if (prog->regstart != '\0') { /* We know what char it must start with. */ for (s = string; s != NULL; s = strchr(s+1, prog->regstart)) if (regtry(&ex, prog, s)) return(1); return(0); } else { /* We don't -- general case. */ for (s = string; !regtry(&ex, prog, s); s++) if (*s == '\0') return(0); return(1); } /* NOTREACHED */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -