📄 regexp.c
字号:
reginsert(iop, opnd)int iop;unsigned char *opnd;{ register unsigned char *src; register unsigned char *dst; register unsigned char *place; unsigned char op; op = (unsigned char) iop; if (regcode == ®dummy) { regsize += 3; return; } src = regcode; regcode += 3; dst = regcode; while (src > opnd) *--dst = *--src; place = opnd; /* Op node, where operand used to be. */ *place++ = op; *place++ = '\0'; *place++ = '\0';}/* - regtail - set the next-pointer at the end of a node chain */STATIC voidregtail(p, val)unsigned char *p;unsigned char *val;{ register unsigned char *scan; register unsigned char *temp; register int offset; if (p == ®dummy) return; /* Find last node. */ scan = p; for (;;) { temp = regnext(scan); if (temp == NULL) break; scan = temp; } if (OP(scan) == BACK) offset = scan - val; else offset = val - scan; *(scan+1) = (offset>>8)&0377; *(scan+2) = offset&0377;}/* - regoptail - regtail on operand of first argument; nop if operandless */STATIC voidregoptail(p, val)unsigned char *p;unsigned char *val;{ /* "Operandless" and "op != BRANCH" are synonymous in practice. */ if (p == NULL || p == ®dummy || OP(p) != BRANCH) return; regtail(OPERAND(p), val);}/* * regexec and friends *//* * Global work variables for regexec(). */STATIC unsigned char *reginput; /* String-input pointer. */STATIC unsigned char *regbol; /* Beginning of input, for ^ check. */STATIC unsigned char **regstartp; /* Pointer to startp array. */STATIC unsigned char **regendp; /* Ditto for endp. */#ifdef DEBUGint regnarrate = 0;void regdump();STATIC unsigned char *regprop();#endif/* - regexec - match a regexp against a string */intregexec(prog, string)register regexp *prog;register unsigned char *string;{ register unsigned char *s;#ifndef STDLIB extern char *strchr();#endif /* Be paranoid... */ if (prog == NULL || string == NULL) { regerror("NULL parameter"); return(0); } /* Check validity of program. */ if (UCHARAT(prog->program) != MAGIC) { regerror("corrupted program"); return(0); } /* If there is a "must appear" string, look for it. */ if (prog->regmust != NULL) { s = string; while ((s = (unsigned char *)strchr((char *)s,prog->regmust[0])) != NULL) { if (strncmp((char *)s, (char *)prog->regmust, prog->regmlen) == 0) break; /* Found it. */ s++; } if (s == NULL) /* Not present. */ return(0); } /* Mark beginning of line for ^ . */ regbol = string; /* Simplest case: anchored match need be tried only once. */ if (prog->reganch) return(regtry(prog, string)); /* Messy cases: unanchored match. */ s = string; if (prog->regstart != '\0') /* We know what char it must start with. */ while ((s = (unsigned char *)strchr((char *)s, prog->regstart)) != NULL) { if (regtry(prog, s)) return(1); s++; } else /* We don't -- general case. */ do { if (regtry(prog, s)) return(1); } while (*s++ != '\0'); /* Failure. */ return(0);}/* - regtry - try match at specific point */STATIC int /* 0 failure, 1 success */regtry(prog, string)regexp *prog;unsigned char *string;{ register int i; register unsigned char **sp; register unsigned char **ep; reginput = string; regstartp = prog->startp; regendp = prog->endp; sp = prog->startp; ep = prog->endp; for (i = NSUBEXP; i > 0; i--) { *sp++ = NULL; *ep++ = NULL; } if (regmatch(prog->program + 1)) { prog->startp[0] = string; prog->endp[0] = reginput; return(1); } else return(0);}/* - regmatch - main matching routine * * Conceptually the strategy is simple: check to see whether the current * node matches, call self recursively to see whether the rest matches, * and then act accordingly. In practice we make some effort to avoid * recursion, in particular by going through "ordinary" nodes (that don't * need to know whether the rest of the match failed) by a loop instead of * by recursion. */STATIC int /* 0 failure, 1 success */regmatch(prog)unsigned char *prog;{ register unsigned char *scan; /* Current node. */ unsigned char *next; /* Next node. */#ifndef STDLIB extern char *strchr();#endif scan = prog;#ifdef DEBUG if (scan != NULL && regnarrate) fprintf(stderr, "%s(\n", regprop(scan));#endif while (scan != NULL) {#ifdef DEBUG if (regnarrate) fprintf(stderr, "%s...\n", regprop(scan));#endif next = regnext(scan); switch (OP(scan)) { case BOL: if (reginput != regbol) return(0); break; case EOL: if (*reginput != '\0') return(0); break; case ANY: if (*reginput == '\0') return(0); reginput++; break; case EXACTLY: { register int len; register unsigned char *opnd; opnd = OPERAND(scan); /* Inline the first character, for speed. */ if (*opnd != *reginput) return(0); len = strlen((char *)opnd); if (len > 1 && strncmp((char *)opnd, (char *)reginput, len) != 0) return(0); reginput += len; } break; case ANYOF: if (*reginput == '\0' || strchr((char *)OPERAND(scan), *reginput) == NULL) return(0); reginput++; break; case ANYBUT: if (*reginput == '\0' || strchr((char *)OPERAND(scan), *reginput) != NULL) return(0); reginput++; break; case NOTHING: break; case BACK: break; case OPEN+1: case OPEN+2: case OPEN+3: case OPEN+4: case OPEN+5: case OPEN+6: case OPEN+7: case OPEN+8: case OPEN+9: { register int no; register unsigned char *save; no = OP(scan) - OPEN; save = reginput; if (regmatch(next)) { /* * Don't set startp if some later * invocation of the same parentheses * already has. */ if (regstartp[no] == NULL) regstartp[no] = save; return(1); } else return(0); } break; case CLOSE+1: case CLOSE+2: case CLOSE+3: case CLOSE+4: case CLOSE+5: case CLOSE+6: case CLOSE+7: case CLOSE+8: case CLOSE+9: { register int no; register unsigned char *save; no = OP(scan) - CLOSE; save = reginput; if (regmatch(next)) { /* * Don't set endp if some later * invocation of the same parentheses * already has. */ if (regendp[no] == NULL) regendp[no] = save; return(1); } else return(0); } break; case BRANCH: { register unsigned char *save; if (OP(next) != BRANCH) /* No choice. */ next = OPERAND(scan); /* Avoid recursion. */ else { do { save = reginput; if (regmatch(OPERAND(scan))) return(1); reginput = save; scan = regnext(scan); } while (scan != NULL && OP(scan) == BRANCH); return(0); /* NOTREACHED */ } } break; case STAR: case PLUS: { register unsigned char nextch; register int no; register unsigned char *save; register int min; /* * Lookahead to avoid useless match attempts * when we know what character comes next. */ nextch = '\0'; if (OP(next) == EXACTLY) nextch = *OPERAND(next); min = (OP(scan) == STAR) ? 0 : 1; save = reginput; no = regrepeat(OPERAND(scan)); while (no >= min) { /* If it could work, try it. */ if (nextch == '\0' || *reginput == nextch) if (regmatch(next)) return(1); /* Couldn't or didn't -- back up. */ no--; reginput = save + no; } return(0); } break; case END: return(1); /* Success! */ break; default: regerror("memory corruption"); return(0); break; } scan = next; } /* * We get here only if there's trouble -- normally "case END" is * the terminating point. */ regerror("corrupted pointers"); return(0);}/* - regrepeat - repeatedly match something simple, report how many */STATIC intregrepeat(p)unsigned char *p;{ register int count = 0; register unsigned char *scan; register unsigned char *opnd; scan = reginput; opnd = OPERAND(p); switch (OP(p)) { case ANY: count = strlen((char *)scan); scan += count; break; case EXACTLY: while (*opnd == *scan) { count++; scan++; } break; case ANYOF: while (*scan != '\0' && strchr((char *)opnd, *scan) != NULL) { count++; scan++; } break; case ANYBUT: while (*scan != '\0' && strchr((char *)opnd, *scan) == NULL) { count++; scan++; } break; default: /* Oh dear. Called inappropriately. */ regerror("internal foulup"); count = 0; /* Best compromise. */ break; } reginput = scan; return(count);}/* - regnext - dig the "next" pointer out of a node */STATIC unsigned char *regnext(p)register unsigned char *p;{ register int offset; if (p == ®dummy) return(NULL); offset = NEXT(p); if (offset == 0) return(NULL); if (OP(p) == BACK) return(p-offset); else return(p+offset);}#ifdef DEBUGSTATIC unsigned char *regprop();/* - regdump - dump a regexp onto stdout in vaguely comprehensible form */voidregdump(r)regexp *r;{ register unsigned char *s; register unsigned char op = EXACTLY; /* Arbitrary non-END op. */ register unsigned char *next; extern char *strchr(); s = r->program + 1; while (op != END) { /* While that wasn't END last time... */ op = OP(s); printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */ next = regnext(s); if (next == NULL) /* Next ptr. */ printf("(0)"); else printf("(%d)", (s-r->program)+(next-s)); s += 3; if (op == ANYOF || op == ANYBUT || op == EXACTLY) { /* Literal string, where present. */ while (*s != '\0') { putchar(*s); s++; } s++; } putchar('\n'); } /* Header fields of interest. */ if (r->regstart != '\0') printf("start `%c' ", r->regstart); if (r->reganch) printf("anchored "); if (r->regmust != NULL) printf("must have \"%s\"", r->regmust); printf("\n");}/* - regprop - printable representation of opcode */STATIC unsigned char *regprop(op)unsigned char *op;{ register unsigned char *p; STATIC unsigned char buf[50]; (void) strcpy(buf, ":"); switch (OP(op)) { case BOL: p = "BOL"; break; case EOL: p = "EOL"; break; case ANY: p = "ANY"; break; case ANYOF: p = "ANYOF"; break; case ANYBUT: p = "ANYBUT"; break; case BRANCH: p = "BRANCH"; break; case EXACTLY: p = "EXACTLY"; break; case NOTHING: p = "NOTHING"; break; case BACK: p = "BACK"; break; case END: p = "END"; break; case OPEN+1: case OPEN+2: case OPEN+3: case OPEN+4: case OPEN+5: case OPEN+6: case OPEN+7: case OPEN+8: case OPEN+9: sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN); p = NULL; break; case CLOSE+1: case CLOSE+2: case CLOSE+3: case CLOSE+4: case CLOSE+5: case CLOSE+6: case CLOSE+7: case CLOSE+8: case CLOSE+9: sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE); p = NULL; break; case STAR: p = "STAR"; break; case PLUS: p = "PLUS"; break; default: regerror("corrupted opcode"); break; } if (p != NULL) (void) strcat(buf, p); return(buf);}#endif/* * The following is provided for those people who do not have strcspn() in * their C libraries. They should get off their butts and do something * about it; at least one public-domain implementation of those (highly * useful) string routines has been published on Usenet. */#ifdef STRCSPN/* * strcspn - find length of initial segment of s1 consisting entirely * of characters not from s2 */STATIC intstrcspn(s1, s2)unsigned char *s1;unsigned char *s2;{ register unsigned char *scan1; register unsigned char *scan2; register int count; count = 0; for (scan1 = s1; *scan1 != '\0'; scan1++) { for (scan2 = s2; *scan2 != '\0';) /* ++ moved down. */ if (*scan1 == *scan2++) return(count); count++; } return(count);}#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -