regc_nfa.c

来自「A*算法 A*算法 A*算法 A*算法A*算法A*算法」· C语言 代码 · 共 1,583 行 · 第 1/3 页

C
1,583
字号
				assert(nexta == NULL || s->no != FREESTATE);
			}
		}
		if (progress && f != NULL)
			dumpnfa(nfa, f);
	} while (progress && !NISERR());
}

/*
 - unempty - optimize out an EMPTY arc, if possible
 * Actually, as it stands this function always succeeds, but the return
 * value is kept with an eye on possible future changes.
 ^ static int unempty(struct nfa *, struct arc *);
 */
static int			/* 0 couldn't, 1 could */
unempty(nfa, a)
struct nfa *nfa;
struct arc *a;
{
	struct state *from = a->from;
	struct state *to = a->to;
	int usefrom;		/* work on from, as opposed to to? */

	assert(a->type == EMPTY);
	assert(from != nfa->pre && to != nfa->post);

	if (from == to) {		/* vacuous loop */
		freearc(nfa, a);
		return 1;
	}

	/* decide which end to work on */
	usefrom = 1;			/* default:  attack from */
	if (from->nouts > to->nins)
		usefrom = 0;
	else if (from->nouts == to->nins) {
		/* decide on secondary issue:  move/copy fewest arcs */
		if (from->nins > to->nouts)
			usefrom = 0;
	}
		
	freearc(nfa, a);
	if (usefrom) {
		if (from->nouts == 0) {
			/* was the state's only outarc */
			moveins(nfa, from, to);
			freestate(nfa, from);
		} else
			copyins(nfa, from, to);
	} else {
		if (to->nins == 0) {
			/* was the state's only inarc */
			moveouts(nfa, to, from);
			freestate(nfa, to);
		} else
			copyouts(nfa, to, from);
	}

	return 1;
}

/*
 - cleanup - clean up NFA after optimizations
 ^ static VOID cleanup(struct nfa *);
 */
static VOID
cleanup(nfa)
struct nfa *nfa;
{
	struct state *s;
	struct state *nexts;
	int n;

	/* clear out unreachable or dead-end states */
	/* use pre to mark reachable, then post to mark can-reach-post */
	markreachable(nfa, nfa->pre, (struct state *)NULL, nfa->pre);
	markcanreach(nfa, nfa->post, nfa->pre, nfa->post);
	for (s = nfa->states; s != NULL; s = nexts) {
		nexts = s->next;
		if (s->tmp != nfa->post && !s->flag)
			dropstate(nfa, s);
	}
	assert(nfa->post->nins == 0 || nfa->post->tmp == nfa->post);
	cleartraverse(nfa, nfa->pre);
	assert(nfa->post->nins == 0 || nfa->post->tmp == NULL);
	/* the nins==0 (final unreachable) case will be caught later */

	/* renumber surviving states */
	n = 0;
	for (s = nfa->states; s != NULL; s = s->next)
		s->no = n++;
	nfa->nstates = n;
}

/*
 - markreachable - recursive marking of reachable states
 ^ static VOID markreachable(struct nfa *, struct state *, struct state *,
 ^ 	struct state *);
 */
static VOID
markreachable(nfa, s, okay, mark)
struct nfa *nfa;
struct state *s;
struct state *okay;		/* consider only states with this mark */
struct state *mark;		/* the value to mark with */
{
	struct arc *a;

	if (s->tmp != okay)
		return;
	s->tmp = mark;

	for (a = s->outs; a != NULL; a = a->outchain)
		markreachable(nfa, a->to, okay, mark);
}

/*
 - markcanreach - recursive marking of states which can reach here
 ^ static VOID markcanreach(struct nfa *, struct state *, struct state *,
 ^ 	struct state *);
 */
static VOID
markcanreach(nfa, s, okay, mark)
struct nfa *nfa;
struct state *s;
struct state *okay;		/* consider only states with this mark */
struct state *mark;		/* the value to mark with */
{
	struct arc *a;

	if (s->tmp != okay)
		return;
	s->tmp = mark;

	for (a = s->ins; a != NULL; a = a->inchain)
		markcanreach(nfa, a->from, okay, mark);
}

/*
 - analyze - ascertain potentially-useful facts about an optimized NFA
 ^ static long analyze(struct nfa *);
 */
static long			/* re_info bits to be ORed in */
analyze(nfa)
struct nfa *nfa;
{
	struct arc *a;
	struct arc *aa;

	if (nfa->pre->outs == NULL)
		return REG_UIMPOSSIBLE;
	for (a = nfa->pre->outs; a != NULL; a = a->outchain)
		for (aa = a->to->outs; aa != NULL; aa = aa->outchain)
			if (aa->to == nfa->post)
				return REG_UEMPTYMATCH;
	return 0;
}

/*
 - compact - compact an NFA
 ^ static VOID compact(struct nfa *, struct cnfa *);
 */
static VOID
compact(nfa, cnfa)
struct nfa *nfa;
struct cnfa *cnfa;
{
	struct state *s;
	struct arc *a;
	size_t nstates;
	size_t narcs;
	struct carc *ca;
	struct carc *first;

	assert (!NISERR());

	nstates = 0;
	narcs = 0;
	for (s = nfa->states; s != NULL; s = s->next) {
		nstates++;
		narcs += 1 + s->nouts + 1;
		/* 1 as a fake for flags, nouts for arcs, 1 as endmarker */
	}

	cnfa->states = (struct carc **)MALLOC(nstates * sizeof(struct carc *));
	cnfa->arcs = (struct carc *)MALLOC(narcs * sizeof(struct carc));
	if (cnfa->states == NULL || cnfa->arcs == NULL) {
		if (cnfa->states != NULL)
			FREE(cnfa->states);
		if (cnfa->arcs != NULL)
			FREE(cnfa->arcs);
		NERR(REG_ESPACE);
		return;
	}
	cnfa->nstates = nstates;
	cnfa->pre = nfa->pre->no;
	cnfa->post = nfa->post->no;
	cnfa->bos[0] = nfa->bos[0];
	cnfa->bos[1] = nfa->bos[1];
	cnfa->eos[0] = nfa->eos[0];
	cnfa->eos[1] = nfa->eos[1];
	cnfa->ncolors = maxcolor(nfa->cm) + 1;
	cnfa->flags = 0;

	ca = cnfa->arcs;
	for (s = nfa->states; s != NULL; s = s->next) {
		assert((size_t)s->no < nstates);
		cnfa->states[s->no] = ca;
		ca->co = 0;		/* clear and skip flags "arc" */
		ca++;
		first = ca;
		for (a = s->outs; a != NULL; a = a->outchain)
			switch (a->type) {
			case PLAIN:
				ca->co = a->co;
				ca->to = a->to->no;
				ca++;
				break;
			case LACON:
				assert(s->no != cnfa->pre);
				ca->co = (color)(cnfa->ncolors + a->co);
				ca->to = a->to->no;
				ca++;
				cnfa->flags |= HASLACONS;
				break;
			default:
				assert(NOTREACHED);
				break;
			}
		carcsort(first, ca-1);
		ca->co = COLORLESS;
		ca->to = 0;
		ca++;
	}
	assert(ca == &cnfa->arcs[narcs]);
	assert(cnfa->nstates != 0);

	/* mark no-progress states */
	for (a = nfa->pre->outs; a != NULL; a = a->outchain)
		cnfa->states[a->to->no]->co = 1;
	cnfa->states[nfa->pre->no]->co = 1;
}

/*
 - carcsort - sort compacted-NFA arcs by color
 * Really dumb algorithm, but if the list is long enough for that to matter,
 * you're in real trouble anyway.
 ^ static VOID carcsort(struct carc *, struct carc *);
 */
static VOID
carcsort(first, last)
struct carc *first;
struct carc *last;
{
	struct carc *p;
	struct carc *q;
	struct carc tmp;

	if (last - first <= 1)
		return;

	for (p = first; p <= last; p++)
		for (q = p; q <= last; q++)
			if (p->co > q->co ||
					(p->co == q->co && p->to > q->to)) {
				assert(p != q);
				tmp = *p;
				*p = *q;
				*q = tmp;
			}
}

/*
 - freecnfa - free a compacted NFA
 ^ static VOID freecnfa(struct cnfa *);
 */
static VOID
freecnfa(cnfa)
struct cnfa *cnfa;
{
	assert(cnfa->nstates != 0);	/* not empty already */
	cnfa->nstates = 0;
	FREE(cnfa->states);
	FREE(cnfa->arcs);
}

/*
 - dumpnfa - dump an NFA in human-readable form
 ^ static VOID dumpnfa(struct nfa *, FILE *);
 */
static VOID
dumpnfa(nfa, f)
struct nfa *nfa;
FILE *f;
{
#ifdef REG_DEBUG
	struct state *s;

	fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
	if (nfa->bos[0] != COLORLESS)
		fprintf(f, ", bos [%ld]", (long)nfa->bos[0]);
	if (nfa->bos[1] != COLORLESS)
		fprintf(f, ", bol [%ld]", (long)nfa->bos[1]);
	if (nfa->eos[0] != COLORLESS)
		fprintf(f, ", eos [%ld]", (long)nfa->eos[0]);
	if (nfa->eos[1] != COLORLESS)
		fprintf(f, ", eol [%ld]", (long)nfa->eos[1]);
	fprintf(f, "\n");
	for (s = nfa->states; s != NULL; s = s->next)
		dumpstate(s, f);
	if (nfa->parent == NULL)
		dumpcolors(nfa->cm, f);
	fflush(f);
#endif
}

#ifdef REG_DEBUG		/* subordinates of dumpnfa */
/*
 ^ #ifdef REG_DEBUG
 */

/*
 - dumpstate - dump an NFA state in human-readable form
 ^ static VOID dumpstate(struct state *, FILE *);
 */
static VOID
dumpstate(s, f)
struct state *s;
FILE *f;
{
	struct arc *a;

	fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "",
					(s->flag) ? s->flag : '.');
	if (s->prev != NULL && s->prev->next != s)
		fprintf(f, "\tstate chain bad\n");
	if (s->nouts == 0)
		fprintf(f, "\tno out arcs\n");
	else
		dumparcs(s, f);
	fflush(f);
	for (a = s->ins; a != NULL; a = a->inchain) {
		if (a->to != s)
			fprintf(f, "\tlink from %d to %d on %d's in-chain\n",
					a->from->no, a->to->no, s->no);
	}
}

/*
 - dumparcs - dump out-arcs in human-readable form
 ^ static VOID dumparcs(struct state *, FILE *);
 */
static VOID
dumparcs(s, f)
struct state *s;
FILE *f;
{
	int pos;

	assert(s->nouts > 0);
	/* printing arcs in reverse order is usually clearer */
	pos = dumprarcs(s->outs, s, f, 1);
	if (pos != 1)
		fprintf(f, "\n");
}

/*
 - dumprarcs - dump remaining outarcs, recursively, in reverse order
 ^ static int dumprarcs(struct arc *, struct state *, FILE *, int);
 */
static int			/* resulting print position */
dumprarcs(a, s, f, pos)
struct arc *a;
struct state *s;
FILE *f;
int pos;			/* initial print position */
{
	if (a->outchain != NULL)
		pos = dumprarcs(a->outchain, s, f, pos);
	dumparc(a, s, f);
	if (pos == 5) {
		fprintf(f, "\n");
		pos = 1;
	} else
		pos++;
	return pos;
}

/*
 - dumparc - dump one outarc in readable form, including prefixing tab
 ^ static VOID dumparc(struct arc *, struct state *, FILE *);
 */
static VOID
dumparc(a, s, f)
struct arc *a;
struct state *s;
FILE *f;
{
	struct arc *aa;
	struct arcbatch *ab;

	fprintf(f, "\t");
	switch (a->type) {
	case PLAIN:
		fprintf(f, "[%ld]", (long)a->co);
		break;
	case AHEAD:
		fprintf(f, ">%ld>", (long)a->co);
		break;
	case BEHIND:
		fprintf(f, "<%ld<", (long)a->co);
		break;
	case LACON:
		fprintf(f, ":%ld:", (long)a->co);
		break;
	case '^':
	case '$':
		fprintf(f, "%c%d", a->type, (int)a->co);
		break;
	case EMPTY:
		break;
	default:
		fprintf(f, "0x%x/0%lo", a->type, (long)a->co);
		break;
	}
	if (a->from != s)
		fprintf(f, "?%d?", a->from->no);
	for (ab = &a->from->oas; ab != NULL; ab = ab->next) {
		for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++)
			if (aa == a)
				break;		/* NOTE BREAK OUT */
		if (aa < &ab->a[ABSIZE])	/* propagate break */
				break;		/* NOTE BREAK OUT */
	}
	if (ab == NULL)
		fprintf(f, "?!?");	/* not in allocated space */
	fprintf(f, "->");
	if (a->to == NULL) {
		fprintf(f, "NULL");
		return;
	}
	fprintf(f, "%d", a->to->no);
	for (aa = a->to->ins; aa != NULL; aa = aa->inchain)
		if (aa == a)
			break;		/* NOTE BREAK OUT */
	if (aa == NULL)
		fprintf(f, "?!?");	/* missing from in-chain */
}

/*
 ^ #endif
 */
#endif				/* ifdef REG_DEBUG */

/*
 - dumpcnfa - dump a compacted NFA in human-readable form
 ^ static VOID dumpcnfa(struct cnfa *, FILE *);
 */
static VOID
dumpcnfa(cnfa, f)
struct cnfa *cnfa;
FILE *f;
{
#ifdef REG_DEBUG
	int st;

	fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post);
	if (cnfa->bos[0] != COLORLESS)
		fprintf(f, ", bos [%ld]", (long)cnfa->bos[0]);
	if (cnfa->bos[1] != COLORLESS)
		fprintf(f, ", bol [%ld]", (long)cnfa->bos[1]);
	if (cnfa->eos[0] != COLORLESS)
		fprintf(f, ", eos [%ld]", (long)cnfa->eos[0]);
	if (cnfa->eos[1] != COLORLESS)
		fprintf(f, ", eol [%ld]", (long)cnfa->eos[1]);
	if (cnfa->flags&HASLACONS)
		fprintf(f, ", haslacons");
	fprintf(f, "\n");
	for (st = 0; st < cnfa->nstates; st++)
		dumpcstate(st, cnfa->states[st], cnfa, f);
	fflush(f);
#endif
}

#ifdef REG_DEBUG		/* subordinates of dumpcnfa */
/*
 ^ #ifdef REG_DEBUG
 */

/*
 - dumpcstate - dump a compacted-NFA state in human-readable form
 ^ static VOID dumpcstate(int, struct carc *, struct cnfa *, FILE *);
 */
static VOID
dumpcstate(st, ca, cnfa, f)
int st;
struct carc *ca;
struct cnfa *cnfa;
FILE *f;
{
	int i;
	int pos;

	fprintf(f, "%d%s", st, (ca[0].co) ? ":" : ".");
	pos = 1;
	for (i = 1; ca[i].co != COLORLESS; i++) {
		if (ca[i].co < cnfa->ncolors)
			fprintf(f, "\t[%ld]->%d", (long)ca[i].co, ca[i].to);
		else
			fprintf(f, "\t:%ld:->%d", (long)ca[i].co-cnfa->ncolors,
								ca[i].to);
		if (pos == 5) {
			fprintf(f, "\n");
			pos = 1;
		} else
			pos++;
	}
	if (i == 1 || pos != 1)
		fprintf(f, "\n");
	fflush(f);
}

/*
 ^ #endif
 */
#endif				/* ifdef REG_DEBUG */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?