📄 regexp.c
字号:
register int class; register int classend; register int c; if (*regparse == '^') { /* Complement of range. */ ret = regnode(ANYBUT); regparse++; } else ret = regnode(ANYOF); if (*regparse == ']' || *regparse == '-') regc(*regparse++); while (*regparse != '\0' && *regparse != ']') { if (*regparse == '-') { regparse++; if (*regparse == ']' || *regparse == '\0') regc((ushort) '-'); else { class = UCHARAT(regparse-2)+1; classend = UCHARAT(regparse); if (class > classend+1) FAIL("invalid [] range"); regc((ushort) 0xffff); regc((ushort) class); regc((ushort) classend); regparse++; } } else { if ((c = *regparse++) == '\\') { if ((c = *regparse++) == 'n') c = '\n'; else if (c == 't') c = '\t'; } regc(c); } } regc((ushort) 0); if (*regparse != ']') FAIL("unmatched []"); regparse++; *flagp |= HASWIDTH|SIMPLE; } break; case '(': ret = reg(1, &flags); if (ret == NULL) return(NULL); *flagp |= flags&(HASWIDTH|SPSTART); break; case '\0': case '|': case ')': FAIL("internal urp"); /* Supposed to be caught earlier. */ break; case '?': case '+': case '*': FAIL("?+* follows nothing"); break; case '\\': if (*regparse == '\0') FAIL("trailing \\"); ret = regnode(EXACTLY);/* regc(*regparse++);*/ c = *regparse++; if (c == 'n') c = '\n'; else if (c == 't') c = '\t'; else if (c == 'f') c = '\f'; else if (c == '\r') c = '\r'; else if (c == '\b') c = '\b'; else if (IsDigid(c)) { d = c - '0'; if (IsDigid(*regparse)) { d = d * 8 + *regparse++ - '0'; if (IsDigid(*regparse)) d = d * 8 + *regparse++ - '0'; } c = d; } regc(c); regc((ushort) 0); *flagp |= HASWIDTH|SIMPLE; break; default: { register int len; register char ender; regparse--; len = Strcspn(regparse, META); if (len <= 0) FAIL("internal disaster"); ender = *(regparse+len); if (len > 1 && ISMULT(ender)) len--; /* Back off clear of ?+* operand. */ *flagp |= HASWIDTH; if (len == 1) *flagp |= SIMPLE; ret = regnode(EXACTLY); while (len > 0) { regc(*regparse++); len--; } regc((ushort) 0); } break; } return(ret);}IsDigid(c) ushort c;{ return '0' <= c && c <= '9';}/* - regnode - emit a node */static ushort * /* Location. */regnode(op)ushort op;{ register ushort *ret; register ushort *ptr; ret = regcode; if (ret == ®dummy) { regsize += 3; return(ret); } ptr = ret; *ptr++ = op; *ptr++ = '\0'; /* Null "next" pointer. */ *ptr++ = '\0'; regcode = ptr; return(ret);}/* - regc - emit (if appropriate) a byte of code */static voidregc(b)ushort b;{ if (regcode != ®dummy) *regcode++ = b; else regsize++;}/* - reginsert - insert an operator in front of already-emitted operand * * Means relocating the operand. */static voidreginsert(op, opnd)ushort op;ushort *opnd;{ register ushort *src; register ushort *dst; register ushort *place; if (regcode == ®dummy) { regsize += 3; return; } src = regcode; regcode += 3; dst = regcode; while (src > opnd) *--dst = *--src; place = opnd; /* Op node, where operand used to be. */ *place++ = op; *place++ = '\0'; *place++ = '\0';}/* - regtail - set the next-pointer at the end of a node chain */static voidregtail(p, val)ushort *p;ushort *val;{ register ushort *scan; register ushort *temp; register int offset; if (p == ®dummy) return; /* Find last node. */ scan = p; for (;;) { temp = regnext(scan); if (temp == NULL) break; scan = temp; } if (OP(scan) == BACK) offset = scan - val; else offset = val - scan; *(scan+1) = (offset>>8)&0377; *(scan+2) = offset&0377;}/* - regoptail - regtail on operand of first argument; nop if operandless */static voidregoptail(p, val)ushort *p;ushort *val;{ /* "Operandless" and "op != BRANCH" are synonymous in practice. */ if (p == NULL || p == ®dummy || OP(p) != BRANCH) return; regtail(OPERAND(p), val);}/* * regexec and friends *//* * Global work variables for regexec(). */static ushort *reginput; /* String-input pointer. */static ushort *regbol; /* Beginning of input, for ^ check. */static ushort **regstartp; /* Pointer to startp array. */static ushort **regendp; /* Ditto for endp. *//* * Forwards. */STATIC int regtry();STATIC int regmatch();STATIC int regrepeat();#ifdef DEBUGint regnarrate = 0;void regdump();STATIC char *regprop();#endif/* - regexec - match a regexp against a string */intregexec(prog, string, bolflag)register regexp *prog;register ushort *string;int bolflag;{ register ushort *s; extern ushort *Strchr(); /* Be paranoid... */ if (prog == NULL || string == NULL) { regerror("NULL parameter"); return(0); } /* Check validity of program. */ if (prog->program[0] != MAGIC) { regerror("corrupted program"); return(0); } /* If there is a "must appear" string, look for it. */ if (prog->regmust != NULL) { s = string; while ((s = Strchr(s, prog->regmust[0])) != NULL) { if (Strncmp(s, prog->regmust, prog->regmlen) == 0) break; /* Found it. */ s++; } if (s == NULL) /* Not present. */ return(0); } /* Mark beginning of line for ^ . */ if(bolflag) regbol = string; else regbol = NULL; /* Simplest case: anchored match need be tried only once. */ if (prog->reganch) return(regtry(prog, string)); /* Messy cases: unanchored match. */ s = string; if (prog->regstart != '\0') { /* We know what char it must start with. */ while ((s = Strchr(s, prog->regstart)) != NULL) { if (regtry(prog, s)) return(1); s++; }} else /* We don't -- general case. */ do { if (regtry(prog, s)) return(1); } while (*s++ != '\0'); /* Failure. */ return(0);}/* - regtry - try match at specific point */static int /* 0 failure, 1 success */regtry(prog, string)regexp *prog;ushort *string;{ register int i; register ushort **sp; register ushort **ep; reginput = string; regstartp = prog->startp; regendp = prog->endp; sp = prog->startp; ep = prog->endp; for (i = NSUBEXP; i > 0; i--) { *sp++ = NULL; *ep++ = NULL; } if (regmatch(prog->program + 1)) { prog->startp[0] = string; prog->endp[0] = reginput; return(1); } else return(0);}/* - regmatch - main matching routine * * Conceptually the strategy is simple: check to see whether the current * node matches, call self recursively to see whether the rest matches, * and then act accordingly. In practice we make some effort to avoid * recursion, in particular by going through "ordinary" nodes (that don't * need to know whether the rest of the match failed) by a loop instead of * by recursion. */static int /* 0 failure, 1 success */regmatch(prog)ushort *prog;{ register ushort *scan; /* Current node. */ ushort *next; /* Next node. */ extern ushort *Strchr(); scan = prog;#ifdef DEBUG if (scan != NULL && regnarrate) fprintf(stderr, "%s(\n", regprop(scan));#endif while (scan != NULL) {#ifdef DEBUG if (regnarrate) fprintf(stderr, "%s...\n", regprop(scan));#endif next = regnext(scan); switch ((int) OP(scan)) { case BOL: if (reginput != regbol) return(0); break; case EOL: if (*reginput != '\0') return(0); break; case ANY: if (*reginput == '\0') return(0); reginput++; break; case EXACTLY: { register int len; register ushort *opnd; opnd = OPERAND(scan); /* Inline the first character, for speed. */ if (*opnd != *reginput) return(0); len = Strlen(opnd); if (len > 1 && Strncmp(opnd, reginput, len) != 0) return(0); reginput += len; } break; case ANYOF: if (*reginput == '\0' || isthere(OPERAND(scan), *reginput) == 0) return(0); reginput++; break; case ANYBUT: if (*reginput == '\0' || isthere(OPERAND(scan), *reginput) != 0) return(0); reginput++; break; case NOTHING: break; case BACK: break; case OPEN+1: case OPEN+2: case OPEN+3: case OPEN+4: case OPEN+5: case OPEN+6: case OPEN+7: case OPEN+8: case OPEN+9: { register int no; register ushort *save; no = OP(scan) - OPEN; save = reginput; if (regmatch(next)) { /* * Don't set startp if some later * invocation of the same parentheses * already has. */ if (regstartp[no] == NULL) regstartp[no] = save; return(1); } else return(0); } break; case CLOSE+1: case CLOSE+2: case CLOSE+3: case CLOSE+4: case CLOSE+5: case CLOSE+6: case CLOSE+7: case CLOSE+8: case CLOSE+9: { register int no; register ushort *save; no = OP(scan) - CLOSE; save = reginput; if (regmatch(next)) { /* * Don't set endp if some later * invocation of the same parentheses * already has. */ if (regendp[no] == NULL)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -