📄 hsregex.c
字号:
}/* - regtry - try match at specific point */static int /* 0 failure, 1 success */regtry(ep, prog, string)register struct exec *ep;sqd_regexp *prog;char *string;{ register int i; register char **stp; register char **enp; ep->reginput = string; stp = prog->startp; enp = prog->endp; for (i = NSUBEXP; i > 0; i--) { *stp++ = NULL; *enp++ = NULL; } if (regmatch(ep, prog->program + 1)) { prog->startp[0] = string; prog->endp[0] = ep->reginput; return(1); } else return(0);}/* - regmatch - main matching routine * * Conceptually the strategy is simple: check to see whether the current * node matches, call self recursively to see whether the rest matches, * and then act accordingly. In practice we make some effort to avoid * recursion, in particular by going through "ordinary" nodes (that don't * need to know whether the rest of the match failed) by a loop instead of * by recursion. */static int /* 0 failure, 1 success */regmatch(ep, prog)register struct exec *ep;char *prog;{ register char *scan; /* Current node. */ char *next; /* Next node. */#ifdef DEBUG if (prog != NULL && regnarrate) fprintf(stderr, "%s(\n", regprop(prog));#endif for (scan = prog; scan != NULL; scan = next) {#ifdef DEBUG if (regnarrate) fprintf(stderr, "%s...\n", regprop(scan));#endif next = regnext(scan); switch (OP(scan)) { case BOL: if (ep->reginput != ep->regbol) return(0); break; case EOL: if (*ep->reginput != '\0') return(0); break; case ANY: if (*ep->reginput == '\0') return(0); ep->reginput++; break; case EXACTLY: { register size_t len; register char *const opnd = OPERAND(scan); /* Inline the first character, for speed. */ if (*opnd != *ep->reginput) return(0); len = strlen(opnd); if (len > 1 && strncmp(opnd, ep->reginput, len) != 0) return(0); ep->reginput += len; break; } case ANYOF: if (*ep->reginput == '\0' || strchr(OPERAND(scan), *ep->reginput) == NULL) return(0); ep->reginput++; break; case ANYBUT: if (*ep->reginput == '\0' || strchr(OPERAND(scan), *ep->reginput) != NULL) return(0); ep->reginput++; break; case NOTHING: break; case BACK: break; case OPEN+1: case OPEN+2: case OPEN+3: case OPEN+4: case OPEN+5: case OPEN+6: case OPEN+7: case OPEN+8: case OPEN+9: { register const int no = OP(scan) - OPEN; register char *const input = ep->reginput; if (regmatch(ep, next)) { /* * Don't set startp if some later * invocation of the same parentheses * already has. */ if (ep->regstartp[no] == NULL) ep->regstartp[no] = input; return(1); } else return(0); break; } case CLOSE+1: case CLOSE+2: case CLOSE+3: case CLOSE+4: case CLOSE+5: case CLOSE+6: case CLOSE+7: case CLOSE+8: case CLOSE+9: { register const int no = OP(scan) - CLOSE; register char *const input = ep->reginput; if (regmatch(ep, next)) { /* * Don't set endp if some later * invocation of the same parentheses * already has. */ if (ep->regendp[no] == NULL) ep->regendp[no] = input; return(1); } else return(0); break; } case BRANCH: { register char *const save = ep->reginput; if (OP(next) != BRANCH) /* No choice. */ next = OPERAND(scan); /* Avoid recursion. */ else { while (OP(scan) == BRANCH) { if (regmatch(ep, OPERAND(scan))) return(1); ep->reginput = save; scan = regnext(scan); } return(0); /* NOTREACHED */ } break; } case STAR: case PLUS: { register const char nextch = (OP(next) == EXACTLY) ? *OPERAND(next) : '\0'; register size_t no; register char *const save = ep->reginput; register const size_t min = (OP(scan) == STAR) ? 0 : 1; for (no = regrepeat(ep, OPERAND(scan)) + 1; no > min; no--) { ep->reginput = save + no - 1; /* If it could work, try it. */ if (nextch == '\0' || *ep->reginput == nextch) if (regmatch(ep, next)) return(1); } return(0); break; } case END: return(1); /* Success! */ break; default: sqd_regerror("regexp corruption"); return(0); break; } } /* * We get here only if there's trouble -- normally "case END" is * the terminating point. */ sqd_regerror("corrupted pointers"); return(0);}/* - regrepeat - report how many times something simple would match */static size_tregrepeat(ep, node)register struct exec *ep;char *node;{ register size_t count; register char *scan; register char ch; switch (OP(node)) { case ANY: return(strlen(ep->reginput)); break; case EXACTLY: ch = *OPERAND(node); count = 0; for (scan = ep->reginput; *scan == ch; scan++) count++; return(count); break; case ANYOF: return(strspn(ep->reginput, OPERAND(node))); break; case ANYBUT: return(strcspn(ep->reginput, OPERAND(node))); break; default: /* Oh dear. Called inappropriately. */ sqd_regerror("internal error: bad call of regrepeat"); return(0); /* Best compromise. */ break; } /* NOTREACHED */}/* - regnext - dig the "next" pointer out of a node */static char *regnext(p)register char *p;{ register const int offset = NEXT(p); if (offset == 0) return(NULL); return((OP(p) == BACK) ? p-offset : p+offset);}#ifdef DEBUGstatic char *regprop();/* - regdump - dump a regexp onto stdout in vaguely comprehensible form */voidregdump(r)sqd_regexp *r;{ register char *s; register char op = EXACTLY; /* Arbitrary non-END op. */ register char *next; s = r->program + 1; while (op != END) { /* While that wasn't END last time... */ op = OP(s); printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */ next = regnext(s); if (next == NULL) /* Next ptr. */ printf("(0)"); else printf("(%d)", (s-r->program)+(next-s)); s += 3; if (op == ANYOF || op == ANYBUT || op == EXACTLY) { /* Literal string, where present. */ while (*s != '\0') { putchar(*s); s++; } s++; } putchar('\n'); } /* Header fields of interest. */ if (r->regstart != '\0') printf("start `%c' ", r->regstart); if (r->reganch) printf("anchored "); if (r->regmust != NULL) printf("must have \"%s\"", r->regmust); printf("\n");}/* - regprop - printable representation of opcode */static char *regprop(op)char *op;{ register char *p; static char buf[50]; (void) strcpy(buf, ":"); switch (OP(op)) { case BOL: p = "BOL"; break; case EOL: p = "EOL"; break; case ANY: p = "ANY"; break; case ANYOF: p = "ANYOF"; break; case ANYBUT: p = "ANYBUT"; break; case BRANCH: p = "BRANCH"; break; case EXACTLY: p = "EXACTLY"; break; case NOTHING: p = "NOTHING"; break; case BACK: p = "BACK"; break; case END: p = "END"; break; case OPEN+1: case OPEN+2: case OPEN+3: case OPEN+4: case OPEN+5: case OPEN+6: case OPEN+7: case OPEN+8: case OPEN+9: sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN); p = NULL; break; case CLOSE+1: case CLOSE+2: case CLOSE+3: case CLOSE+4: case CLOSE+5: case CLOSE+6: case CLOSE+7: case CLOSE+8: case CLOSE+9: sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE); p = NULL; break; case STAR: p = "STAR"; break; case PLUS: p = "PLUS"; break; default: sqd_regerror("corrupted opcode"); break; } if (p != NULL) (void) strcat(buf, p); return(buf);}#endif/* - sqd_regsub - perform substitutions after a regexp match */voidsqd_regsub(rp, source, dest)const sqd_regexp *rp;const char *source;char *dest;{ register sqd_regexp * const prog = (sqd_regexp *)rp; register char *src = (char *)source; register char *dst = dest; register char c; register int no; register size_t len; if (prog == NULL || source == NULL || dest == NULL) { sqd_regerror("NULL parameter to sqd_regsub"); return; } if ((unsigned char)*(prog->program) != SQD_REGMAGIC) { sqd_regerror("damaged regexp"); return; } while ((c = *src++) != '\0') { if (c == '&') no = 0; else if (c == '\\' && isdigit((int) (*src))) no = *src++ - '0'; else no = -1; if (no < 0) { /* Ordinary character. */ if (c == '\\' && (*src == '\\' || *src == '&')) c = *src++; *dst++ = c; } else if (prog->startp[no] != NULL && prog->endp[no] != NULL && prog->endp[no] > prog->startp[no]) { len = prog->endp[no] - prog->startp[no]; (void) strncpy(dst, prog->startp[no], len); dst += len; if (*(dst-1) == '\0') { /* strncpy hit NUL. */ sqd_regerror("damaged match string"); return; } } } *dst++ = '\0';}voidsqd_regerror(s)char *s;{ fprintf(stderr, "regexp(3): %s\n", s); exit(EXIT_FAILURE); /* NOTREACHED */}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -