📄 regexp.c
字号:
regendp[no] = save; return(1); } else return(0); } break; case BRANCH: { register ushort *save; if (OP(next) != BRANCH) /* No choice. */ next = OPERAND(scan); /* Avoid recursion. */ else { do { save = reginput; if (regmatch(OPERAND(scan))) return(1); reginput = save; scan = regnext(scan); } while (scan != NULL && OP(scan) == BRANCH); return(0); /* NOTREACHED */ } } break; case STAR: case PLUS: { register ushort nextch; register int no; register ushort *save; register int min; /* * Lookahead to avoid useless match attempts * when we know what character comes next. */ nextch = '\0'; if (OP(next) == EXACTLY) nextch = *OPERAND(next); min = (OP(scan) == STAR) ? 0 : 1; save = reginput; no = regrepeat(OPERAND(scan)); while (no >= min) { /* If it could work, try it. */ if (nextch == '\0' || *reginput == nextch) if (regmatch(next)) return(1); /* Couldn't or didn't -- back up. */ no--; reginput = save + no; } return(0); } break; case END: return(1); /* Success! */ break; default: regerror("memory corruption"); return(0); break; } scan = next; } /* * We get here only if there's trouble -- normally "case END" is * the terminating point. */ regerror("corrupted pointers"); return(0);}/* - regrepeat - repeatedly match something simple, report how many */static intregrepeat(p)ushort *p;{ register int count = 0; register ushort *scan; register ushort *opnd; scan = reginput; opnd = OPERAND(p); switch (OP(p)) { case ANY: count = Strlen(scan); scan += count; break; case EXACTLY: while (*opnd == *scan) { count++; scan++; } break; case ANYOF: while (*scan != '\0' && isthere(opnd, *scan) != 0) { count++; scan++; } break; case ANYBUT: while (*scan != '\0' && isthere(opnd, *scan) == 0) { count++; scan++; } break; default: /* Oh dear. Called inappropriately. */ regerror("internal foulup"); count = 0; /* Best compromise. */ break; } reginput = scan; return(count);}/* - regnext - dig the "next" pointer out of a node */static ushort *regnext(p)register ushort *p;{ register int offset; if (p == ®dummy) return(NULL); offset = NEXT(p); if (offset == 0) return(NULL); if (OP(p) == BACK) return(p-offset); else return(p+offset);}#ifdef DEBUGSTATIC char *regprop();/* - regdump - dump a regexp onto stdout in vaguely comprehensible form */voidregdump(r)regexp *r;{ register ushort *s; register ushort op = EXACTLY; /* Arbitrary non-END op. */ register ushort *next; s = r->program + 1; while (op != END) { /* While that wasn't END last time... */ op = OP(s); printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */ next = regnext(s); if (next == NULL) /* Next ptr. */ printf("(0)"); else printf("(%d)", (s-r->program)+(next-s)); s += 3; if (op == ANYOF || op == ANYBUT || op == EXACTLY) { /* Literal string, where present. */ while (*s != '\0') { if (*s == 0xffff) { /* range */ kputchar(*++s); putchar('-'); ++s; } kputchar(*s++); } s++; } putchar('\n'); } /* Header fields of interest. */ if (r->regstart != '\0') { fputs("start `", stdout); kputchar(r->regstart); fputs("' ", stdout); } if (r->reganch) printf("anchored "); if (r->regmust != NULL) { fputs("must have \"", stdout); kputs(r->regmust); putchar('"'); } printf("\n");}kputchar(c) ushort c;{ if (c & 0xff00) putchar(c >> 8); putchar(c & 0xff);}kputs(s) ushort *s;{ while (*s) kputchar(*s++);}/* - regprop - printable representation of opcode */static char *regprop(op)ushort *op;{ register char *p; static char buf[50]; (void) strcpy(buf, ":"); switch ((int) OP(op)) { case BOL: p = "BOL"; break; case EOL: p = "EOL"; break; case ANY: p = "ANY"; break; case ANYOF: p = "ANYOF"; break; case ANYBUT: p = "ANYBUT"; break; case BRANCH: p = "BRANCH"; break; case EXACTLY: p = "EXACTLY"; break; case NOTHING: p = "NOTHING"; break; case BACK: p = "BACK"; break; case END: p = "END"; break; case OPEN+1: case OPEN+2: case OPEN+3: case OPEN+4: case OPEN+5: case OPEN+6: case OPEN+7: case OPEN+8: case OPEN+9: sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN); p = NULL; break; case CLOSE+1: case CLOSE+2: case CLOSE+3: case CLOSE+4: case CLOSE+5: case CLOSE+6: case CLOSE+7: case CLOSE+8: case CLOSE+9: sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE); p = NULL; break; case STAR: p = "STAR"; break; case PLUS: p = "PLUS"; break; default: regerror("corrupted opcode"); break; } if (p != NULL) (void) strcat(buf, p); return(buf);}#endif/* * The following is provided for those people who do not have strcspn() in * their C libraries. They should get off their butts and do something * about it; at least one public-domain implementation of those (highly * useful) string routines has been published on Usenet. *//* * strcspn - find length of initial segment of s1 consisting entirely * of characters not from s2 */static intStrcspn(s1, s2)ushort *s1;unsigned char *s2;{ register ushort *scan1; register unsigned char *scan2; register int count; count = 0; for (scan1 = s1; *scan1 != '\0'; scan1++) { for (scan2 = s2; *scan2 != '\0';) /* ++ moved down. */ if (*scan1 == *scan2++) return(count); count++; } return(count);}isthere(s, c) ushort *s, c;{ register unsigned int c1, c2; for ( ; *s; s++) { if (*s == 0xffff) { /* range */ c1 = *++s; c2 = *++s; if (c1 <= c && c <= c2) return 1; } else if (*s == c) return 1; } return 0;}ushort *Strchr(s, c) ushort *s, c;{ for ( ; *s; s++) if (*s == c) return s; return NULL;}Strncmp(s, t, n) ushort *s, *t;{ for ( ; --n > 0 && *s == *t; s++, t++) ; return *s - *t;}Strlen(s) ushort *s;{ int i; for (i = 0; *s; i++, s++) ; return i;}ushort kbuf[BUFSIZ];char *mkpat(s) char *s;{ sjtok(kbuf, s); return (char *) regcomp(kbuf);}match(p, s) regexp *p; char *s;{ register int i; sjtok(kbuf, s); if (i = regexec(p, kbuf, 1)) { r_start = p->startp[0] - kbuf + 1; r_length = p->endp[0] - p->startp[0]; } else r_start = r_length = 0; return i;}sjtok(s, t) ushort *s; unsigned char *t;{ register c; for ( ; *t; t++) { if (isKanji(c = *t)) c = (c << 8) | (*++t & 0xff); *s++ = c; } *s = 0;}ktosj(s, t) unsigned char *s; ushort *t;{ register c; while (*t) { if ((c = *t++) & 0xff00) *s++ = c >> 8; *s++ = c & 0xff; } *s = '\0';}regsub(dst, exp, src, pat, pos) ushort *dst, *src, *pat; regexp *exp;{ /* dst <-- s/src/pat/pos global substitution for pos == 0 */ register int c, i; register ushort *loc1, *loc2, *s, *t, *u; register int n = 0; if (exp->program[0] != MAGIC) { regerror("damaged regexp fed to regsub"); return 0; } while (*src) {next: if (regexec(exp, src, 1) == 0) break; loc1 = exp->startp[0]; loc2 = exp->endp[0]; if (pos-- > 1) { while (src < loc2) *dst++ = *src++; goto next; } while (src < loc1) *dst++ = *src++; for (s = pat; c = *s++; ) { if (c == '&') i = 0; else if (c == '\\' && '0' <= *s && *s <= '9') i = *s++ - '0'; else { if (c == '\\' && (*s == '\\' || *s == '&')) c = *s++; *dst++ = c; continue; } if ((t = exp->startp[i]) != NULL && (u = exp->endp[i]) != NULL) { while (t < u) *dst++ = *t++; } } src = loc2; n++; if (pos == 0) break; } while (*src) *dst++ = *src++; *dst++ = 0; return n;}static ushort kbuf1[BUFSIZ], kbuf2[BUFSIZ];Sub(u, exp, str, s, t, pos) char *exp; char *s, *t, *u;{ register int i; regexp *r; if (str) { sjtok(kbuf, exp); r = regcomp(kbuf); } else r = (regexp *) exp; sjtok(kbuf, s); sjtok(kbuf1, t); i = regsub(kbuf2, r, kbuf1, kbuf, pos); ktosj(u, kbuf2); if (str) sfree(r); return i;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -