📄 regcomp.c
字号:
while (from <= to && (ce = nextleader(v, from, to)) != NOCELT) { if (from < ce) subrange(v, from, ce - 1, lp, rp); assert(singleton(v->cm, ce)); assert(leads != NULL); if (!haschr(leads, ce)) addchr(leads, ce); from = ce + 1; } if (from <= to) subrange(v, from, to, lp, rp); } if ((leads == NULL || leads->nchrs == 0) && cv->nmcces == 0) return; /* deal with the MCCE leaders */ NOTE(REG_ULOCALE); for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--) { co = GETCOLOR(v->cm, *p); a = findarc(lp, PLAIN, co); if (a != NULL) s = a->to; else { s = newstate(v->nfa); NOERR(); newarc(v->nfa, PLAIN, co, lp, s); NOERR(); } pa = findarc(v->mccepbegin, PLAIN, co); assert(pa != NULL); ps = pa->to; newarc(v->nfa, '$', 1, s, rp); newarc(v->nfa, '$', 0, s, rp); colorcomplement(v->nfa, v->cm, AHEAD, ps, s, rp); NOERR(); } /* and the MCCEs */ for (i = 0; i < cv->nmcces; i++) { p = cv->mcces[i]; assert(singleton(v->cm, *p)); if (!singleton(v->cm, *p)) { ERR(REG_ASSERT); return; } ch = *p++; co = GETCOLOR(v->cm, ch); a = findarc(lp, PLAIN, co); if (a != NULL) s = a->to; else { s = newstate(v->nfa); NOERR(); newarc(v->nfa, PLAIN, co, lp, s); NOERR(); } assert(*p != 0); /* at least two chars */ assert(singleton(v->cm, *p)); ch = *p++; co = GETCOLOR(v->cm, ch); assert(*p == 0); /* and only two, for now */ newarc(v->nfa, PLAIN, co, s, rp); NOERR(); }}/* * nextleader - find next MCCE leader within range */static celt /* NOCELT means none */nextleader(struct vars * v, chr from, chr to){ int i; chr *p; chr ch; celt it = NOCELT; if (v->mcces == NULL) return it; for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++) { ch = *p; if (from <= ch && ch <= to) if (it == NOCELT || ch < it) it = ch; } return it;}/* * wordchrs - set up word-chr list for word-boundary stuff, if needed * * The list is kept as a bunch of arcs between two dummy states; it's * disposed of by the unreachable-states sweep in NFA optimization. * Does NEXT(). Must not be called from any unusual lexical context. * This should be reconciled with the \w etc. handling in lex.c, and * should be cleaned up to reduce dependencies on input scanning. */static voidwordchrs(struct vars * v){ struct state *left; struct state *right; if (v->wordchrs != NULL) { NEXT(); /* for consistency */ return; } left = newstate(v->nfa); right = newstate(v->nfa); NOERR(); /* fine point: implemented with [::], and lexer will set REG_ULOCALE */ lexword(v); NEXT(); assert(v->savenow != NULL && SEE('[')); bracket(v, left, right); assert((v->savenow != NULL && SEE(']')) || ISERR()); NEXT(); NOERR(); v->wordchrs = left;}/* * subre - allocate a subre */static struct subre *subre(struct vars * v, int op, int flags, struct state * begin, struct state * end){ struct subre *ret; ret = v->treefree; if (ret != NULL) v->treefree = ret->left; else { ret = (struct subre *) MALLOC(sizeof(struct subre)); if (ret == NULL) { ERR(REG_ESPACE); return NULL; } ret->chain = v->treechain; v->treechain = ret; } assert(strchr("|.b(=", op) != NULL); ret->op = op; ret->flags = flags; ret->retry = 0; ret->subno = 0; ret->min = ret->max = 1; ret->left = NULL; ret->right = NULL; ret->begin = begin; ret->end = end; ZAPCNFA(ret->cnfa); return ret;}/* * freesubre - free a subRE subtree */static voidfreesubre(struct vars * v, /* might be NULL */ struct subre * sr){ if (sr == NULL) return; if (sr->left != NULL) freesubre(v, sr->left); if (sr->right != NULL) freesubre(v, sr->right); freesrnode(v, sr);}/* * freesrnode - free one node in a subRE subtree */static voidfreesrnode(struct vars * v, /* might be NULL */ struct subre * sr){ if (sr == NULL) return; if (!NULLCNFA(sr->cnfa)) freecnfa(&sr->cnfa); sr->flags = 0; if (v != NULL) { sr->left = v->treefree; v->treefree = sr; } else FREE(sr);}/* * optst - optimize a subRE subtree */static voidoptst(struct vars * v, struct subre * t){ if (t == NULL) return; /* recurse through children */ if (t->left != NULL) optst(v, t->left); if (t->right != NULL) optst(v, t->right);}/* * numst - number tree nodes (assigning retry indexes) */static int /* next number */numst(struct subre * t, int start) /* starting point for subtree numbers */{ int i; assert(t != NULL); i = start; t->retry = (short) i++; if (t->left != NULL) i = numst(t->left, i); if (t->right != NULL) i = numst(t->right, i); return i;}/* * markst - mark tree nodes as INUSE */static voidmarkst(struct subre * t){ assert(t != NULL); t->flags |= INUSE; if (t->left != NULL) markst(t->left); if (t->right != NULL) markst(t->right);}/* * cleanst - free any tree nodes not marked INUSE */static voidcleanst(struct vars * v){ struct subre *t; struct subre *next; for (t = v->treechain; t != NULL; t = next) { next = t->chain; if (!(t->flags & INUSE)) FREE(t); } v->treechain = NULL; v->treefree = NULL; /* just on general principles */}/* * nfatree - turn a subRE subtree into a tree of compacted NFAs */static long /* optimize results from top node */nfatree(struct vars * v, struct subre * t, FILE *f) /* for debug output */{ assert(t != NULL && t->begin != NULL); if (t->left != NULL) (DISCARD) nfatree(v, t->left, f); if (t->right != NULL) (DISCARD) nfatree(v, t->right, f); return nfanode(v, t, f);}/* * nfanode - do one NFA for nfatree */static long /* optimize results */nfanode(struct vars * v, struct subre * t, FILE *f) /* for debug output */{ struct nfa *nfa; long ret = 0; assert(t->begin != NULL);#ifdef REG_DEBUG if (f != NULL) { char idbuf[50]; fprintf(f, "\n\n\n========= TREE NODE %s ==========\n", stid(t, idbuf, sizeof(idbuf))); }#endif nfa = newnfa(v, v->cm, v->nfa); NOERRZ(); dupnfa(nfa, t->begin, t->end, nfa->init, nfa->final); if (!ISERR()) { specialcolors(nfa); ret = optimize(nfa, f); } if (!ISERR()) compact(nfa, &t->cnfa); freenfa(nfa); return ret;}/* * newlacon - allocate a lookahead-constraint subRE */static int /* lacon number */newlacon(struct vars * v, struct state * begin, struct state * end, int pos){ int n; struct subre *sub; if (v->nlacons == 0) { v->lacons = (struct subre *) MALLOC(2 * sizeof(struct subre)); n = 1; /* skip 0th */ v->nlacons = 2; } else { v->lacons = (struct subre *) REALLOC(v->lacons, (v->nlacons + 1) * sizeof(struct subre)); n = v->nlacons++; } if (v->lacons == NULL) { ERR(REG_ESPACE); return 0; } sub = &v->lacons[n]; sub->begin = begin; sub->end = end; sub->subno = pos; ZAPCNFA(sub->cnfa); return n;}/* * freelacons - free lookahead-constraint subRE vector */static voidfreelacons(struct subre * subs, int n){ struct subre *sub; int i; assert(n > 0); for (sub = subs + 1, i = n - 1; i > 0; sub++, i--) /* no 0th */ if (!NULLCNFA(sub->cnfa)) freecnfa(&sub->cnfa); FREE(subs);}/* * rfree - free a whole RE (insides of regfree) */static voidrfree(regex_t *re){ struct guts *g; if (re == NULL || re->re_magic != REMAGIC) return; re->re_magic = 0; /* invalidate RE */ g = (struct guts *) re->re_guts; re->re_guts = NULL; re->re_fns = NULL; g->magic = 0; freecm(&g->cmap); if (g->tree != NULL) freesubre((struct vars *) NULL, g->tree); if (g->lacons != NULL) freelacons(g->lacons, g->nlacons); if (!NULLCNFA(g->search)) freecnfa(&g->search); FREE(g);}#ifdef REG_DEBUG/* * dump - dump an RE in human-readable form */static voiddump(regex_t *re, FILE *f){ struct guts *g; int i; if (re->re_magic != REMAGIC) fprintf(f, "bad magic number (0x%x not 0x%x)\n", re->re_magic, REMAGIC); if (re->re_guts == NULL) { fprintf(f, "NULL guts!!!\n"); return; } g = (struct guts *) re->re_guts; if (g->magic != GUTSMAGIC) fprintf(f, "bad guts magic number (0x%x not 0x%x)\n", g->magic, GUTSMAGIC); fprintf(f, "\n\n\n========= DUMP ==========\n"); fprintf(f, "nsub %d, info 0%lo, csize %d, ntree %d\n", (int) re->re_nsub, re->re_info, re->re_csize, g->ntree); dumpcolors(&g->cmap, f); if (!NULLCNFA(g->search)) { printf("\nsearch:\n"); dumpcnfa(&g->search, f); } for (i = 1; i < g->nlacons; i++) { fprintf(f, "\nla%d (%s):\n", i, (g->lacons[i].subno) ? "positive" : "negative"); dumpcnfa(&g->lacons[i].cnfa, f); } fprintf(f, "\n"); dumpst(g->tree, f, 0);}/* * dumpst - dump a subRE tree */static voiddumpst(struct subre * t, FILE *f, int nfapresent) /* is the original NFA still around? */{ if (t == NULL) fprintf(f, "null tree\n"); else stdump(t, f, nfapresent); fflush(f);}/* * stdump - recursive guts of dumpst */static voidstdump(struct subre * t, FILE *f, int nfapresent) /* is the original NFA still around? */{ char idbuf[50]; fprintf(f, "%s. `%c'", stid(t, idbuf, sizeof(idbuf)), t->op); if (t->flags & LONGER) fprintf(f, " longest"); if (t->flags & SHORTER) fprintf(f, " shortest"); if (t->flags & MIXED) fprintf(f, " hasmixed"); if (t->flags & CAP) fprintf(f, " hascapture"); if (t->flags & BACKR) fprintf(f, " hasbackref"); if (!(t->flags & INUSE)) fprintf(f, " UNUSED"); if (t->subno != 0) fprintf(f, " (#%d)", t->subno); if (t->min != 1 || t->max != 1) { fprintf(f, " {%d,", t->min); if (t->max != INFINITY) fprintf(f, "%d", t->max); fprintf(f, "}"); } if (nfapresent) fprintf(f, " %ld-%ld", (long) t->begin->no, (long) t->end->no); if (t->left != NULL) fprintf(f, " L:%s", stid(t->left, idbuf, sizeof(idbuf))); if (t->right != NULL) fprintf(f, " R:%s", stid(t->right, idbuf, sizeof(idbuf))); if (!NULLCNFA(t->cnfa)) { fprintf(f, "\n"); dumpcnfa(&t->cnfa, f); fprintf(f, "\n"); } if (t->left != NULL) stdump(t->left, f, nfapresent); if (t->right != NULL) stdump(t->right, f, nfapresent);}/* * stid - identify a subtree node for dumping */static char * /* points to buf or constant string */stid(struct subre * t, char *buf, size_t bufsize){ /* big enough for hex int or decimal t->retry? */ if (bufsize < sizeof(void *) * 2 + 3 || bufsize < sizeof(t->retry) * 3 + 1) return "unable"; if (t->retry != 0) sprintf(buf, "%d", t->retry); else sprintf(buf, "%p", t); return buf;}#endif /* REG_DEBUG */#include "regc_lex.c"#include "regc_color.c"#include "regc_nfa.c"#include "regc_cvec.c"#include "regc_locale.c"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -