regcomp.c
来自「tcl是工具命令语言」· C语言 代码 · 共 2,176 行 · 第 1/4 页
C
2,176 行
t = top->right; if (!(SEE('|') || SEE(stopper) || SEE(EOS))) t->right = parsebranch(v, stopper, type, atom->end, rp, 1); else { EMPTYARC(atom->end, rp); t->right = subre(v, '=', 0, atom->end, rp); } assert(SEE('|') || SEE(stopper) || SEE(EOS)); t->flags |= COMBINE(t->flags, t->right->flags); top->flags |= COMBINE(top->flags, t->flags);}/* - nonword - generate arcs for non-word-character ahead or behind ^ static VOID nonword(struct vars *, int, struct state *, struct state *); */static VOIDnonword(v, dir, lp, rp)struct vars *v;int dir; /* AHEAD or BEHIND */struct state *lp;struct state *rp;{ int anchor = (dir == AHEAD) ? '$' : '^'; assert(dir == AHEAD || dir == BEHIND); newarc(v->nfa, anchor, 1, lp, rp); newarc(v->nfa, anchor, 0, lp, rp); colorcomplement(v->nfa, v->cm, dir, v->wordchrs, lp, rp); /* (no need for special attention to \n) */}/* - word - generate arcs for word character ahead or behind ^ static VOID word(struct vars *, int, struct state *, struct state *); */static VOIDword(v, dir, lp, rp)struct vars *v;int dir; /* AHEAD or BEHIND */struct state *lp;struct state *rp;{ assert(dir == AHEAD || dir == BEHIND); cloneouts(v->nfa, v->wordchrs, lp, rp, dir); /* (no need for special attention to \n) */}/* - scannum - scan a number ^ static int scannum(struct vars *); */static int /* value, <= DUPMAX */scannum(v)struct vars *v;{ int n = 0; while (SEE(DIGIT) && n < DUPMAX) { n = n*10 + v->nextvalue; NEXT(); } if (SEE(DIGIT) || n > DUPMAX) { ERR(REG_BADBR); return 0; } return n;}/* - repeat - replicate subNFA for quantifiers * The duplication sequences used here are chosen carefully so that any * pointers starting out pointing into the subexpression end up pointing into * the last occurrence. (Note that it may not be strung between the same * left and right end states, however!) This used to be important for the * subRE tree, although the important bits are now handled by the in-line * code in parse(), and when this is called, it doesn't matter any more. ^ static VOID repeat(struct vars *, struct state *, struct state *, int, int); */static VOIDrepeat(v, lp, rp, m, n)struct vars *v;struct state *lp;struct state *rp;int m;int n;{# define SOME 2# define INF 3# define PAIR(x, y) ((x)*4 + (y))# define REDUCE(x) ( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) ) CONST int rm = REDUCE(m); CONST int rn = REDUCE(n); struct state *s; struct state *s2; switch (PAIR(rm, rn)) { case PAIR(0, 0): /* empty string */ delsub(v->nfa, lp, rp); EMPTYARC(lp, rp); break; case PAIR(0, 1): /* do as x| */ EMPTYARC(lp, rp); break; case PAIR(0, SOME): /* do as x{1,n}| */ repeat(v, lp, rp, 1, n); NOERR(); EMPTYARC(lp, rp); break; case PAIR(0, INF): /* loop x around */ s = newstate(v->nfa); NOERR(); moveouts(v->nfa, lp, s); moveins(v->nfa, rp, s); EMPTYARC(lp, s); EMPTYARC(s, rp); break; case PAIR(1, 1): /* no action required */ break; case PAIR(1, SOME): /* do as x{0,n-1}x = (x{1,n-1}|)x */ s = newstate(v->nfa); NOERR(); moveouts(v->nfa, lp, s); dupnfa(v->nfa, s, rp, lp, s); NOERR(); repeat(v, lp, s, 1, n-1); NOERR(); EMPTYARC(lp, s); break; case PAIR(1, INF): /* add loopback arc */ s = newstate(v->nfa); s2 = newstate(v->nfa); NOERR(); moveouts(v->nfa, lp, s); moveins(v->nfa, rp, s2); EMPTYARC(lp, s); EMPTYARC(s2, rp); EMPTYARC(s2, s); break; case PAIR(SOME, SOME): /* do as x{m-1,n-1}x */ s = newstate(v->nfa); NOERR(); moveouts(v->nfa, lp, s); dupnfa(v->nfa, s, rp, lp, s); NOERR(); repeat(v, lp, s, m-1, n-1); break; case PAIR(SOME, INF): /* do as x{m-1,}x */ s = newstate(v->nfa); NOERR(); moveouts(v->nfa, lp, s); dupnfa(v->nfa, s, rp, lp, s); NOERR(); repeat(v, lp, s, m-1, n); break; default: ERR(REG_ASSERT); break; }}/* - bracket - handle non-complemented bracket expression * Also called from cbracket for complemented bracket expressions. ^ static VOID bracket(struct vars *, struct state *, struct state *); */static VOIDbracket(v, lp, rp)struct vars *v;struct state *lp;struct state *rp;{ assert(SEE('[')); NEXT(); while (!SEE(']') && !SEE(EOS)) brackpart(v, lp, rp); assert(SEE(']') || ISERR()); okcolors(v->nfa, v->cm);}/* - cbracket - handle complemented bracket expression * We do it by calling bracket() with dummy endpoints, and then complementing * the result. The alternative would be to invoke rainbow(), and then delete * arcs as the b.e. is seen... but that gets messy. ^ static VOID cbracket(struct vars *, struct state *, struct state *); */static VOIDcbracket(v, lp, rp)struct vars *v;struct state *lp;struct state *rp;{ struct state *left = newstate(v->nfa); struct state *right = newstate(v->nfa); struct state *s; struct arc *a; /* arc from lp */ struct arc *ba; /* arc from left, from bracket() */ struct arc *pa; /* MCCE-prototype arc */ color co; chr *p; int i; NOERR(); bracket(v, left, right); if (v->cflags®_NLSTOP) newarc(v->nfa, PLAIN, v->nlcolor, left, right); NOERR(); assert(lp->nouts == 0); /* all outarcs will be ours */ /* easy part of complementing */ colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp); NOERR(); if (v->mcces == NULL) { /* no MCCEs -- we're done */ dropstate(v->nfa, left); assert(right->nins == 0); freestate(v->nfa, right); return; } /* but complementing gets messy in the presence of MCCEs... */ NOTE(REG_ULOCALE); for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--) { co = GETCOLOR(v->cm, *p); a = findarc(lp, PLAIN, co); ba = findarc(left, PLAIN, co); if (ba == NULL) { assert(a != NULL); freearc(v->nfa, a); } else { assert(a == NULL); } s = newstate(v->nfa); NOERR(); newarc(v->nfa, PLAIN, co, lp, s); NOERR(); pa = findarc(v->mccepbegin, PLAIN, co); assert(pa != NULL); if (ba == NULL) { /* easy case, need all of them */ cloneouts(v->nfa, pa->to, s, rp, PLAIN); newarc(v->nfa, '$', 1, s, rp); newarc(v->nfa, '$', 0, s, rp); colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp); } else { /* must be selective */ if (findarc(ba->to, '$', 1) == NULL) { newarc(v->nfa, '$', 1, s, rp); newarc(v->nfa, '$', 0, s, rp); colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp); } for (pa = pa->to->outs; pa != NULL; pa = pa->outchain) if (findarc(ba->to, PLAIN, pa->co) == NULL) newarc(v->nfa, PLAIN, pa->co, s, rp); if (s->nouts == 0) /* limit of selectivity: none */ dropstate(v->nfa, s); /* frees arc too */ } NOERR(); } delsub(v->nfa, left, right); assert(left->nouts == 0); freestate(v->nfa, left); assert(right->nins == 0); freestate(v->nfa, right);} /* - brackpart - handle one item (or range) within a bracket expression ^ static VOID brackpart(struct vars *, struct state *, struct state *); */static VOIDbrackpart(v, lp, rp)struct vars *v;struct state *lp;struct state *rp;{ celt startc; celt endc; struct cvec *cv; chr *startp; chr *endp; chr c[1]; /* parse something, get rid of special cases, take shortcuts */ switch (v->nexttype) { case RANGE: /* a-b-c or other botch */ ERR(REG_ERANGE); return; break; case PLAIN: c[0] = v->nextvalue; NEXT(); /* shortcut for ordinary chr (not range, not MCCE leader) */ if (!SEE(RANGE) && !ISCELEADER(v, c[0])) { onechr(v, c[0], lp, rp); return; } startc = element(v, c, c+1); NOERR(); break; case COLLEL: startp = v->now; endp = scanplain(v); INSIST(startp < endp, REG_ECOLLATE); NOERR(); startc = element(v, startp, endp); NOERR(); break; case ECLASS: startp = v->now; endp = scanplain(v); INSIST(startp < endp, REG_ECOLLATE); NOERR(); startc = element(v, startp, endp); NOERR(); cv = eclass(v, startc, (v->cflags®_ICASE)); NOERR(); dovec(v, cv, lp, rp); return; break; case CCLASS: startp = v->now; endp = scanplain(v); INSIST(startp < endp, REG_ECTYPE); NOERR(); cv = cclass(v, startp, endp, (v->cflags®_ICASE)); NOERR(); dovec(v, cv, lp, rp); return; break; default: ERR(REG_ASSERT); return; break; } if (SEE(RANGE)) { NEXT(); switch (v->nexttype) { case PLAIN: case RANGE: c[0] = v->nextvalue; NEXT(); endc = element(v, c, c+1); NOERR(); break; case COLLEL: startp = v->now; endp = scanplain(v); INSIST(startp < endp, REG_ECOLLATE); NOERR(); endc = element(v, startp, endp); NOERR(); break; default: ERR(REG_ERANGE); return; break; } } else endc = startc; /* * Ranges are unportable. Actually, standard C does * guarantee that digits are contiguous, but making * that an exception is just too complicated. */ if (startc != endc) NOTE(REG_UUNPORT); cv = range(v, startc, endc, (v->cflags®_ICASE)); NOERR(); dovec(v, cv, lp, rp);}/* - scanplain - scan PLAIN contents of [. etc. * Certain bits of trickery in lex.c know that this code does not try * to look past the final bracket of the [. etc. ^ static chr *scanplain(struct vars *); */static chr * /* just after end of sequence */scanplain(v)struct vars *v;{ chr *endp; assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS)); NEXT(); endp = v->now; while (SEE(PLAIN)) { endp = v->now; NEXT(); } assert(SEE(END) || ISERR()); NEXT(); return endp;}/* - leaders - process a cvec of collating elements to also include leaders * Also gives all characters involved their own colors, which is almost * certainly necessary, and sets up little disconnected subNFA. ^ static VOID leaders(struct vars *, struct cvec *); */static VOIDleaders(v, cv)struct vars *v;struct cvec *cv;{ int mcce; chr *p; chr leader; struct state *s; struct arc *a; v->mccepbegin = newstate(v->nfa); v->mccepend = newstate(v->nfa); NOERR(); for (mcce = 0; mcce < cv->nmcces; mcce++) { p = cv->mcces[mcce]; leader = *p; if (!haschr(cv, leader)) { addchr(cv, leader); s = newstate(v->nfa); newarc(v->nfa, PLAIN, subcolor(v->cm, leader), v->mccepbegin, s); okcolors(v->nfa, v->cm); } else { a = findarc(v->mccepbegin, PLAIN, GETCOLOR(v->cm, leader)); assert(a != NULL); s = a->to; assert(s != v->mccepend); } p++; assert(*p != 0 && *(p+1) == 0); /* only 2-char MCCEs for now */ newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend); okcolors(v->nfa, v->cm); }}/* - onechr - fill in arcs for a plain character, and possible case complements * This is mostly a shortcut for efficient handling of the common case. ^ static VOID onechr(struct vars *, pchr, struct state *, struct state *); */static VOIDonechr(v, c, lp, rp)struct vars *v;pchr c;struct state *lp;struct state *rp;{ if (!(v->cflags®_ICASE)) { newarc(v->nfa, PLAIN, subcolor(v->cm, c), lp, rp); return; } /* rats, need general case anyway... */ dovec(v, allcases(v, c), lp, rp);}/* - dovec - fill in arcs for each element of a cvec * This one has to handle the messy cases, like MCCEs and MCCE leaders. ^ static VOID dovec(struct vars *, struct cvec *, struct state *, ^ struct state *); */static VOIDdovec(v, cv, lp, rp)struct vars *v;struct cvec *cv;struct state *lp;struct state *rp;{ chr ch, from, to; celt ce; chr *p; int i; color co; struct cvec *leads; struct arc *a; struct arc *pa; /* arc in prototype */ struct state *s; struct state *ps; /* state in prototype */ /* need a place to store leaders, if any */ if (nmcces(v) > 0) { assert(v->mcces != NULL); if (v->cv2 == NULL || v->cv2->nchrs < v->mcces->nchrs) { if (v->cv2 != NULL) free(v->cv2); v->cv2 = newcvec(v->mcces->nchrs, 0, v->mcces->nmcces); NOERR(); leads = v->cv2; } else leads = clearcvec(v->cv2); } else leads = NULL; /* first, get the ordinary characters out of the way */ for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) { ch = *p; if (!ISCELEADER(v, ch)) newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp); else { assert(singleton(v->cm, ch)); assert(leads != NULL); if (!haschr(leads, ch)) addchr(leads, ch); } } /* and the ranges */ for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) { from = *p; to = *(p+1); while (from <= to && (ce = nextleader(v, from, to)) != NOCELT) { if (from < ce) subrange(v, from, ce - 1, lp, rp); assert(singleton(v->cm, ce)); assert(leads != NULL); if (!haschr(leads, ce)) addchr(leads, ce); from = ce + 1; } if (from <= to) subrange(v, from, to, lp, rp); } if ((leads == NULL || leads->nchrs == 0) && cv->nmcces == 0) return; /* deal with the MCCE leaders */ NOTE(REG_ULOCALE); for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--) { co = GETCOLOR(v->cm, *p); a = findarc(lp, PLAIN, co); if (a != NULL)
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?