📄 regc_nfa.c
字号:
case CA('^', AHEAD): case CA(BEHIND, '$'): case CA(BEHIND, AHEAD): case CA('$', '^'): case CA('$', BEHIND): case CA(AHEAD, '^'): case CA(AHEAD, BEHIND): case CA('^', LACON): case CA(BEHIND, LACON): case CA('$', LACON): case CA(AHEAD, LACON): return COMPATIBLE; break; } assert(NOTREACHED); return INCOMPATIBLE; /* for benefit of blind compilers */}/* * fixempties - get rid of EMPTY arcs */static voidfixempties(struct nfa * nfa, FILE *f) /* for debug output; NULL none */{ struct state *s; struct state *nexts; struct arc *a; struct arc *nexta; int progress; /* find and eliminate empties until there are no more */ do { progress = 0; for (s = nfa->states; s != NULL && !NISERR() && s->no != FREESTATE; s = nexts) { nexts = s->next; for (a = s->outs; a != NULL && !NISERR(); a = nexta) { nexta = a->outchain; if (a->type == EMPTY && unempty(nfa, a)) progress = 1; assert(nexta == NULL || s->no != FREESTATE); } } if (progress && f != NULL) dumpnfa(nfa, f); } while (progress && !NISERR());}/* * unempty - optimize out an EMPTY arc, if possible * * Actually, as it stands this function always succeeds, but the return * value is kept with an eye on possible future changes. */static int /* 0 couldn't, 1 could */unempty(struct nfa * nfa, struct arc * a){ struct state *from = a->from; struct state *to = a->to; int usefrom; /* work on from, as opposed to to? */ assert(a->type == EMPTY); assert(from != nfa->pre && to != nfa->post); if (from == to) { /* vacuous loop */ freearc(nfa, a); return 1; } /* decide which end to work on */ usefrom = 1; /* default: attack from */ if (from->nouts > to->nins) usefrom = 0; else if (from->nouts == to->nins) { /* decide on secondary issue: move/copy fewest arcs */ if (from->nins > to->nouts) usefrom = 0; } freearc(nfa, a); if (usefrom) { if (from->nouts == 0) { /* was the state's only outarc */ moveins(nfa, from, to); freestate(nfa, from); } else copyins(nfa, from, to); } else { if (to->nins == 0) { /* was the state's only inarc */ moveouts(nfa, to, from); freestate(nfa, to); } else copyouts(nfa, to, from); } return 1;}/* * cleanup - clean up NFA after optimizations */static voidcleanup(struct nfa * nfa){ struct state *s; struct state *nexts; int n; /* clear out unreachable or dead-end states */ /* use pre to mark reachable, then post to mark can-reach-post */ markreachable(nfa, nfa->pre, (struct state *) NULL, nfa->pre); markcanreach(nfa, nfa->post, nfa->pre, nfa->post); for (s = nfa->states; s != NULL; s = nexts) { nexts = s->next; if (s->tmp != nfa->post && !s->flag) dropstate(nfa, s); } assert(nfa->post->nins == 0 || nfa->post->tmp == nfa->post); cleartraverse(nfa, nfa->pre); assert(nfa->post->nins == 0 || nfa->post->tmp == NULL); /* the nins==0 (final unreachable) case will be caught later */ /* renumber surviving states */ n = 0; for (s = nfa->states; s != NULL; s = s->next) s->no = n++; nfa->nstates = n;}/* * markreachable - recursive marking of reachable states */static voidmarkreachable(struct nfa * nfa, struct state * s, struct state * okay, /* consider only states with this mark */ struct state * mark) /* the value to mark with */{ struct arc *a; if (s->tmp != okay) return; s->tmp = mark; for (a = s->outs; a != NULL; a = a->outchain) markreachable(nfa, a->to, okay, mark);}/* * markcanreach - recursive marking of states which can reach here */static voidmarkcanreach(struct nfa * nfa, struct state * s, struct state * okay, /* consider only states with this mark */ struct state * mark) /* the value to mark with */{ struct arc *a; if (s->tmp != okay) return; s->tmp = mark; for (a = s->ins; a != NULL; a = a->inchain) markcanreach(nfa, a->from, okay, mark);}/* * analyze - ascertain potentially-useful facts about an optimized NFA */static long /* re_info bits to be ORed in */analyze(struct nfa * nfa){ struct arc *a; struct arc *aa; if (nfa->pre->outs == NULL) return REG_UIMPOSSIBLE; for (a = nfa->pre->outs; a != NULL; a = a->outchain) for (aa = a->to->outs; aa != NULL; aa = aa->outchain) if (aa->to == nfa->post) return REG_UEMPTYMATCH; return 0;}/* * compact - compact an NFA */static voidcompact(struct nfa * nfa, struct cnfa * cnfa){ struct state *s; struct arc *a; size_t nstates; size_t narcs; struct carc *ca; struct carc *first; assert(!NISERR()); nstates = 0; narcs = 0; for (s = nfa->states; s != NULL; s = s->next) { nstates++; narcs += 1 + s->nouts + 1; /* 1 as a fake for flags, nouts for arcs, 1 as endmarker */ } cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *)); cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc)); if (cnfa->states == NULL || cnfa->arcs == NULL) { if (cnfa->states != NULL) FREE(cnfa->states); if (cnfa->arcs != NULL) FREE(cnfa->arcs); NERR(REG_ESPACE); return; } cnfa->nstates = nstates; cnfa->pre = nfa->pre->no; cnfa->post = nfa->post->no; cnfa->bos[0] = nfa->bos[0]; cnfa->bos[1] = nfa->bos[1]; cnfa->eos[0] = nfa->eos[0]; cnfa->eos[1] = nfa->eos[1]; cnfa->ncolors = maxcolor(nfa->cm) + 1; cnfa->flags = 0; ca = cnfa->arcs; for (s = nfa->states; s != NULL; s = s->next) { assert((size_t) s->no < nstates); cnfa->states[s->no] = ca; ca->co = 0; /* clear and skip flags "arc" */ ca++; first = ca; for (a = s->outs; a != NULL; a = a->outchain) switch (a->type) { case PLAIN: ca->co = a->co; ca->to = a->to->no; ca++; break; case LACON: assert(s->no != cnfa->pre); ca->co = (color) (cnfa->ncolors + a->co); ca->to = a->to->no; ca++; cnfa->flags |= HASLACONS; break; default: assert(NOTREACHED); break; } carcsort(first, ca - 1); ca->co = COLORLESS; ca->to = 0; ca++; } assert(ca == &cnfa->arcs[narcs]); assert(cnfa->nstates != 0); /* mark no-progress states */ for (a = nfa->pre->outs; a != NULL; a = a->outchain) cnfa->states[a->to->no]->co = 1; cnfa->states[nfa->pre->no]->co = 1;}/* * carcsort - sort compacted-NFA arcs by color * * Really dumb algorithm, but if the list is long enough for that to matter, * you're in real trouble anyway. */static voidcarcsort(struct carc * first, struct carc * last){ struct carc *p; struct carc *q; struct carc tmp; if (last - first <= 1) return; for (p = first; p <= last; p++) for (q = p; q <= last; q++) if (p->co > q->co || (p->co == q->co && p->to > q->to)) { assert(p != q); tmp = *p; *p = *q; *q = tmp; }}/* * freecnfa - free a compacted NFA */static voidfreecnfa(struct cnfa * cnfa){ assert(cnfa->nstates != 0); /* not empty already */ cnfa->nstates = 0; FREE(cnfa->states); FREE(cnfa->arcs);}/* * dumpnfa - dump an NFA in human-readable form */static voiddumpnfa(struct nfa * nfa, FILE *f){#ifdef REG_DEBUG struct state *s; fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no); if (nfa->bos[0] != COLORLESS) fprintf(f, ", bos [%ld]", (long) nfa->bos[0]); if (nfa->bos[1] != COLORLESS) fprintf(f, ", bol [%ld]", (long) nfa->bos[1]); if (nfa->eos[0] != COLORLESS) fprintf(f, ", eos [%ld]", (long) nfa->eos[0]); if (nfa->eos[1] != COLORLESS) fprintf(f, ", eol [%ld]", (long) nfa->eos[1]); fprintf(f, "\n"); for (s = nfa->states; s != NULL; s = s->next) dumpstate(s, f); if (nfa->parent == NULL) dumpcolors(nfa->cm, f); fflush(f);#endif}#ifdef REG_DEBUG /* subordinates of dumpnfa *//* * dumpstate - dump an NFA state in human-readable form */static voiddumpstate(struct state * s, FILE *f){ struct arc *a; fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "", (s->flag) ? s->flag : '.'); if (s->prev != NULL && s->prev->next != s) fprintf(f, "\tstate chain bad\n"); if (s->nouts == 0) fprintf(f, "\tno out arcs\n"); else dumparcs(s, f); fflush(f); for (a = s->ins; a != NULL; a = a->inchain) { if (a->to != s) fprintf(f, "\tlink from %d to %d on %d's in-chain\n", a->from->no, a->to->no, s->no); }}/* * dumparcs - dump out-arcs in human-readable form */static voiddumparcs(struct state * s, FILE *f){ int pos; assert(s->nouts > 0); /* printing arcs in reverse order is usually clearer */ pos = dumprarcs(s->outs, s, f, 1); if (pos != 1) fprintf(f, "\n");}/* * dumprarcs - dump remaining outarcs, recursively, in reverse order */static int /* resulting print position */dumprarcs(struct arc * a, struct state * s, FILE *f, int pos) /* initial print position */{ if (a->outchain != NULL) pos = dumprarcs(a->outchain, s, f, pos); dumparc(a, s, f); if (pos == 5) { fprintf(f, "\n"); pos = 1; } else pos++; return pos;}/* * dumparc - dump one outarc in readable form, including prefixing tab */static voiddumparc(struct arc * a, struct state * s, FILE *f){ struct arc *aa; struct arcbatch *ab; fprintf(f, "\t"); switch (a->type) { case PLAIN: fprintf(f, "[%ld]", (long) a->co); break; case AHEAD: fprintf(f, ">%ld>", (long) a->co); break; case BEHIND: fprintf(f, "<%ld<", (long) a->co); break; case LACON: fprintf(f, ":%ld:", (long) a->co); break; case '^': case '$': fprintf(f, "%c%d", a->type, (int) a->co); break; case EMPTY: break; default: fprintf(f, "0x%x/0%lo", a->type, (long) a->co); break; } if (a->from != s) fprintf(f, "?%d?", a->from->no); for (ab = &a->from->oas; ab != NULL; ab = ab->next) { for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++) if (aa == a) break; /* NOTE BREAK OUT */ if (aa < &ab->a[ABSIZE]) /* propagate break */ break; /* NOTE BREAK OUT */ } if (ab == NULL) fprintf(f, "?!?"); /* not in allocated space */ fprintf(f, "->"); if (a->to == NULL) { fprintf(f, "NULL"); return; } fprintf(f, "%d", a->to->no); for (aa = a->to->ins; aa != NULL; aa = aa->inchain) if (aa == a) break; /* NOTE BREAK OUT */ if (aa == NULL) fprintf(f, "?!?"); /* missing from in-chain */}#endif /* REG_DEBUG *//* * dumpcnfa - dump a compacted NFA in human-readable form */#ifdef REG_DEBUGstatic voiddumpcnfa(struct cnfa * cnfa, FILE *f){ int st; fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post); if (cnfa->bos[0] != COLORLESS) fprintf(f, ", bos [%ld]", (long) cnfa->bos[0]); if (cnfa->bos[1] != COLORLESS) fprintf(f, ", bol [%ld]", (long) cnfa->bos[1]); if (cnfa->eos[0] != COLORLESS) fprintf(f, ", eos [%ld]", (long) cnfa->eos[0]); if (cnfa->eos[1] != COLORLESS) fprintf(f, ", eol [%ld]", (long) cnfa->eos[1]); if (cnfa->flags & HASLACONS) fprintf(f, ", haslacons"); fprintf(f, "\n"); for (st = 0; st < cnfa->nstates; st++) dumpcstate(st, cnfa->states[st], cnfa, f); fflush(f);}#endif#ifdef REG_DEBUG /* subordinates of dumpcnfa *//* * dumpcstate - dump a compacted-NFA state in human-readable form */static voiddumpcstate(int st, struct carc * ca, struct cnfa * cnfa, FILE *f){ int i; int pos; fprintf(f, "%d%s", st, (ca[0].co) ? ":" : "."); pos = 1; for (i = 1; ca[i].co != COLORLESS; i++) { if (ca[i].co < cnfa->ncolors) fprintf(f, "\t[%ld]->%d", (long) ca[i].co, ca[i].to); else fprintf(f, "\t:%ld:->%d", (long) ca[i].co - cnfa->ncolors, ca[i].to); if (pos == 5) { fprintf(f, "\n"); pos = 1; } else pos++; } if (i == 1 || pos != 1) fprintf(f, "\n"); fflush(f);}#endif /* REG_DEBUG */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -