📄 regcomp.c
字号:
/* special case: backrefs have internal quantifiers */ EMPTYARC(s, atom->begin); /* empty prefix */ /* just stuff everything into atom */ repeat(v, atom->begin, atom->end, m, n); atom->min = (short) m; atom->max = (short) n; atom->flags |= COMBINE(qprefer, atom->flags); } else if (m == 1 && n == 1) { /* no/vacuous quantifier: done */ EMPTYARC(s, atom->begin); /* empty prefix */ } else { /* turn x{m,n} into x{m-1,n-1}x, with capturing */ /* parens in only second x */ dupnfa(v->nfa, atom->begin, atom->end, s, atom->begin); assert(m >= 1 && m != INFINITY && n >= 1); repeat(v, s, atom->begin, m - 1, (n == INFINITY) ? n : n - 1); f = COMBINE(qprefer, atom->flags); t = subre(v, '.', f, s, atom->end); /* prefix and atom */ NOERR(); t->left = subre(v, '=', PREF(f), s, atom->begin); NOERR(); t->right = atom; *atomp = t; } /* and finally, look after that postponed recursion */ t = top->right; if (!(SEE('|') || SEE(stopper) || SEE(EOS))) t->right = parsebranch(v, stopper, type, atom->end, rp, 1); else { EMPTYARC(atom->end, rp); t->right = subre(v, '=', 0, atom->end, rp); } assert(SEE('|') || SEE(stopper) || SEE(EOS)); t->flags |= COMBINE(t->flags, t->right->flags); top->flags |= COMBINE(top->flags, t->flags);}/* * nonword - generate arcs for non-word-character ahead or behind */static voidnonword(struct vars * v, int dir, /* AHEAD or BEHIND */ struct state * lp, struct state * rp){ int anchor = (dir == AHEAD) ? '$' : '^'; assert(dir == AHEAD || dir == BEHIND); newarc(v->nfa, anchor, 1, lp, rp); newarc(v->nfa, anchor, 0, lp, rp); colorcomplement(v->nfa, v->cm, dir, v->wordchrs, lp, rp); /* (no need for special attention to \n) */}/* * word - generate arcs for word character ahead or behind */static voidword(struct vars * v, int dir, /* AHEAD or BEHIND */ struct state * lp, struct state * rp){ assert(dir == AHEAD || dir == BEHIND); cloneouts(v->nfa, v->wordchrs, lp, rp, dir); /* (no need for special attention to \n) */}/* * scannum - scan a number */static int /* value, <= DUPMAX */scannum(struct vars * v){ int n = 0; while (SEE(DIGIT) && n < DUPMAX) { n = n * 10 + v->nextvalue; NEXT(); } if (SEE(DIGIT) || n > DUPMAX) { ERR(REG_BADBR); return 0; } return n;}/* * repeat - replicate subNFA for quantifiers * * The duplication sequences used here are chosen carefully so that any * pointers starting out pointing into the subexpression end up pointing into * the last occurrence. (Note that it may not be strung between the same * left and right end states, however!) This used to be important for the * subRE tree, although the important bits are now handled by the in-line * code in parse(), and when this is called, it doesn't matter any more. */static voidrepeat(struct vars * v, struct state * lp, struct state * rp, int m, int n){#define SOME 2#define INF 3#define PAIR(x, y) ((x)*4 + (y))#define REDUCE(x) ( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) ) const int rm = REDUCE(m); const int rn = REDUCE(n); struct state *s; struct state *s2; switch (PAIR(rm, rn)) { case PAIR(0, 0): /* empty string */ delsub(v->nfa, lp, rp); EMPTYARC(lp, rp); break; case PAIR(0, 1): /* do as x| */ EMPTYARC(lp, rp); break; case PAIR(0, SOME): /* do as x{1,n}| */ repeat(v, lp, rp, 1, n); NOERR(); EMPTYARC(lp, rp); break; case PAIR(0, INF): /* loop x around */ s = newstate(v->nfa); NOERR(); moveouts(v->nfa, lp, s); moveins(v->nfa, rp, s); EMPTYARC(lp, s); EMPTYARC(s, rp); break; case PAIR(1, 1): /* no action required */ break; case PAIR(1, SOME): /* do as x{0,n-1}x = (x{1,n-1}|)x */ s = newstate(v->nfa); NOERR(); moveouts(v->nfa, lp, s); dupnfa(v->nfa, s, rp, lp, s); NOERR(); repeat(v, lp, s, 1, n - 1); NOERR(); EMPTYARC(lp, s); break; case PAIR(1, INF): /* add loopback arc */ s = newstate(v->nfa); s2 = newstate(v->nfa); NOERR(); moveouts(v->nfa, lp, s); moveins(v->nfa, rp, s2); EMPTYARC(lp, s); EMPTYARC(s2, rp); EMPTYARC(s2, s); break; case PAIR(SOME, SOME): /* do as x{m-1,n-1}x */ s = newstate(v->nfa); NOERR(); moveouts(v->nfa, lp, s); dupnfa(v->nfa, s, rp, lp, s); NOERR(); repeat(v, lp, s, m - 1, n - 1); break; case PAIR(SOME, INF): /* do as x{m-1,}x */ s = newstate(v->nfa); NOERR(); moveouts(v->nfa, lp, s); dupnfa(v->nfa, s, rp, lp, s); NOERR(); repeat(v, lp, s, m - 1, n); break; default: ERR(REG_ASSERT); break; }}/* * bracket - handle non-complemented bracket expression * Also called from cbracket for complemented bracket expressions. */static voidbracket(struct vars * v, struct state * lp, struct state * rp){ assert(SEE('[')); NEXT(); while (!SEE(']') && !SEE(EOS)) brackpart(v, lp, rp); assert(SEE(']') || ISERR()); okcolors(v->nfa, v->cm);}/* * cbracket - handle complemented bracket expression * We do it by calling bracket() with dummy endpoints, and then complementing * the result. The alternative would be to invoke rainbow(), and then delete * arcs as the b.e. is seen... but that gets messy. */static voidcbracket(struct vars * v, struct state * lp, struct state * rp){ struct state *left = newstate(v->nfa); struct state *right = newstate(v->nfa); struct state *s; struct arc *a; /* arc from lp */ struct arc *ba; /* arc from left, from bracket() */ struct arc *pa; /* MCCE-prototype arc */ color co; chr *p; int i; NOERR(); bracket(v, left, right); if (v->cflags & REG_NLSTOP) newarc(v->nfa, PLAIN, v->nlcolor, left, right); NOERR(); assert(lp->nouts == 0); /* all outarcs will be ours */ /* easy part of complementing */ colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp); NOERR(); if (v->mcces == NULL) { /* no MCCEs -- we're done */ dropstate(v->nfa, left); assert(right->nins == 0); freestate(v->nfa, right); return; } /* but complementing gets messy in the presence of MCCEs... */ NOTE(REG_ULOCALE); for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--) { co = GETCOLOR(v->cm, *p); a = findarc(lp, PLAIN, co); ba = findarc(left, PLAIN, co); if (ba == NULL) { assert(a != NULL); freearc(v->nfa, a); } else assert(a == NULL); s = newstate(v->nfa); NOERR(); newarc(v->nfa, PLAIN, co, lp, s); NOERR(); pa = findarc(v->mccepbegin, PLAIN, co); assert(pa != NULL); if (ba == NULL) { /* easy case, need all of them */ cloneouts(v->nfa, pa->to, s, rp, PLAIN); newarc(v->nfa, '$', 1, s, rp); newarc(v->nfa, '$', 0, s, rp); colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp); } else { /* must be selective */ if (findarc(ba->to, '$', 1) == NULL) { newarc(v->nfa, '$', 1, s, rp); newarc(v->nfa, '$', 0, s, rp); colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp); } for (pa = pa->to->outs; pa != NULL; pa = pa->outchain) if (findarc(ba->to, PLAIN, pa->co) == NULL) newarc(v->nfa, PLAIN, pa->co, s, rp); if (s->nouts == 0) /* limit of selectivity: none */ dropstate(v->nfa, s); /* frees arc too */ } NOERR(); } delsub(v->nfa, left, right); assert(left->nouts == 0); freestate(v->nfa, left); assert(right->nins == 0); freestate(v->nfa, right);}/* * brackpart - handle one item (or range) within a bracket expression */static voidbrackpart(struct vars * v, struct state * lp, struct state * rp){ celt startc; celt endc; struct cvec *cv; chr *startp; chr *endp; chr c[1]; /* parse something, get rid of special cases, take shortcuts */ switch (v->nexttype) { case RANGE: /* a-b-c or other botch */ ERR(REG_ERANGE); return; break; case PLAIN: c[0] = v->nextvalue; NEXT(); /* shortcut for ordinary chr (not range, not MCCE leader) */ if (!SEE(RANGE) && !ISCELEADER(v, c[0])) { onechr(v, c[0], lp, rp); return; } startc = element(v, c, c + 1); NOERR(); break; case COLLEL: startp = v->now; endp = scanplain(v); INSIST(startp < endp, REG_ECOLLATE); NOERR(); startc = element(v, startp, endp); NOERR(); break; case ECLASS: startp = v->now; endp = scanplain(v); INSIST(startp < endp, REG_ECOLLATE); NOERR(); startc = element(v, startp, endp); NOERR(); cv = eclass(v, startc, (v->cflags & REG_ICASE)); NOERR(); dovec(v, cv, lp, rp); return; break; case CCLASS: startp = v->now; endp = scanplain(v); INSIST(startp < endp, REG_ECTYPE); NOERR(); cv = cclass(v, startp, endp, (v->cflags & REG_ICASE)); NOERR(); dovec(v, cv, lp, rp); return; break; default: ERR(REG_ASSERT); return; break; } if (SEE(RANGE)) { NEXT(); switch (v->nexttype) { case PLAIN: case RANGE: c[0] = v->nextvalue; NEXT(); endc = element(v, c, c + 1); NOERR(); break; case COLLEL: startp = v->now; endp = scanplain(v); INSIST(startp < endp, REG_ECOLLATE); NOERR(); endc = element(v, startp, endp); NOERR(); break; default: ERR(REG_ERANGE); return; break; } } else endc = startc; /* * Ranges are unportable. Actually, standard C does guarantee that digits * are contiguous, but making that an exception is just too complicated. */ if (startc != endc) NOTE(REG_UUNPORT); cv = range(v, startc, endc, (v->cflags & REG_ICASE)); NOERR(); dovec(v, cv, lp, rp);}/* * scanplain - scan PLAIN contents of [. etc. * * Certain bits of trickery in lex.c know that this code does not try * to look past the final bracket of the [. etc. */static chr * /* just after end of sequence */scanplain(struct vars * v){ chr *endp; assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS)); NEXT(); endp = v->now; while (SEE(PLAIN)) { endp = v->now; NEXT(); } assert(SEE(END) || ISERR()); NEXT(); return endp;}/* * leaders - process a cvec of collating elements to also include leaders * Also gives all characters involved their own colors, which is almost * certainly necessary, and sets up little disconnected subNFA. */static voidleaders(struct vars * v, struct cvec * cv){ int mcce; chr *p; chr leader; struct state *s; struct arc *a; v->mccepbegin = newstate(v->nfa); v->mccepend = newstate(v->nfa); NOERR(); for (mcce = 0; mcce < cv->nmcces; mcce++) { p = cv->mcces[mcce]; leader = *p; if (!haschr(cv, leader)) { addchr(cv, leader); s = newstate(v->nfa); newarc(v->nfa, PLAIN, subcolor(v->cm, leader), v->mccepbegin, s); okcolors(v->nfa, v->cm); } else { a = findarc(v->mccepbegin, PLAIN, GETCOLOR(v->cm, leader)); assert(a != NULL); s = a->to; assert(s != v->mccepend); } p++; assert(*p != 0 && *(p + 1) == 0); /* only 2-char MCCEs for now */ newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend); okcolors(v->nfa, v->cm); }}/* * onechr - fill in arcs for a plain character, and possible case complements * This is mostly a shortcut for efficient handling of the common case. */static voidonechr(struct vars * v, chr c, struct state * lp, struct state * rp){ if (!(v->cflags & REG_ICASE)) { newarc(v->nfa, PLAIN, subcolor(v->cm, c), lp, rp); return; } /* rats, need general case anyway... */ dovec(v, allcases(v, c), lp, rp);}/* * dovec - fill in arcs for each element of a cvec * This one has to handle the messy cases, like MCCEs and MCCE leaders. */static voiddovec(struct vars * v, struct cvec * cv, struct state * lp, struct state * rp){ chr ch, from, to; celt ce; chr *p; int i; color co; struct cvec *leads; struct arc *a; struct arc *pa; /* arc in prototype */ struct state *s; struct state *ps; /* state in prototype */ /* need a place to store leaders, if any */ if (nmcces(v) > 0) { assert(v->mcces != NULL); if (v->cv2 == NULL || v->cv2->nchrs < v->mcces->nchrs) { if (v->cv2 != NULL) free(v->cv2); v->cv2 = newcvec(v->mcces->nchrs, 0, v->mcces->nmcces); NOERR(); leads = v->cv2; } else leads = clearcvec(v->cv2); } else leads = NULL; /* first, get the ordinary characters out of the way */ for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) { ch = *p; if (!ISCELEADER(v, ch)) newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp); else { assert(singleton(v->cm, ch)); assert(leads != NULL); if (!haschr(leads, ch)) addchr(leads, ch); } } /* and the ranges */ for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) { from = *p; to = *(p + 1);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -