regcomp.c

来自「tcl是工具命令语言」· C语言 代码 · 共 2,176 行 · 第 1/4 页

C
2,176
字号
	t = top->right;	if (!(SEE('|') || SEE(stopper) || SEE(EOS)))		t->right = parsebranch(v, stopper, type, atom->end, rp, 1);	else {		EMPTYARC(atom->end, rp);		t->right = subre(v, '=', 0, atom->end, rp);	}	assert(SEE('|') || SEE(stopper) || SEE(EOS));	t->flags |= COMBINE(t->flags, t->right->flags);	top->flags |= COMBINE(top->flags, t->flags);}/* - nonword - generate arcs for non-word-character ahead or behind ^ static VOID nonword(struct vars *, int, struct state *, struct state *); */static VOIDnonword(v, dir, lp, rp)struct vars *v;int dir;			/* AHEAD or BEHIND */struct state *lp;struct state *rp;{	int anchor = (dir == AHEAD) ? '$' : '^';	assert(dir == AHEAD || dir == BEHIND);	newarc(v->nfa, anchor, 1, lp, rp);	newarc(v->nfa, anchor, 0, lp, rp);	colorcomplement(v->nfa, v->cm, dir, v->wordchrs, lp, rp);	/* (no need for special attention to \n) */}/* - word - generate arcs for word character ahead or behind ^ static VOID word(struct vars *, int, struct state *, struct state *); */static VOIDword(v, dir, lp, rp)struct vars *v;int dir;			/* AHEAD or BEHIND */struct state *lp;struct state *rp;{	assert(dir == AHEAD || dir == BEHIND);	cloneouts(v->nfa, v->wordchrs, lp, rp, dir);	/* (no need for special attention to \n) */}/* - scannum - scan a number ^ static int scannum(struct vars *); */static int			/* value, <= DUPMAX */scannum(v)struct vars *v;{	int n = 0;	while (SEE(DIGIT) && n < DUPMAX) {		n = n*10 + v->nextvalue;		NEXT();	}	if (SEE(DIGIT) || n > DUPMAX) {		ERR(REG_BADBR);		return 0;	}	return n;}/* - repeat - replicate subNFA for quantifiers * The duplication sequences used here are chosen carefully so that any * pointers starting out pointing into the subexpression end up pointing into * the last occurrence.  (Note that it may not be strung between the same * left and right end states, however!)  This used to be important for the * subRE tree, although the important bits are now handled by the in-line * code in parse(), and when this is called, it doesn't matter any more. ^ static VOID repeat(struct vars *, struct state *, struct state *, int, int); */static VOIDrepeat(v, lp, rp, m, n)struct vars *v;struct state *lp;struct state *rp;int m;int n;{#	define	SOME	2#	define	INF	3#	define	PAIR(x, y)	((x)*4 + (y))#	define	REDUCE(x)	( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) )	CONST int rm = REDUCE(m);	CONST int rn = REDUCE(n);	struct state *s;	struct state *s2;	switch (PAIR(rm, rn)) {	case PAIR(0, 0):		/* empty string */		delsub(v->nfa, lp, rp);		EMPTYARC(lp, rp);		break;	case PAIR(0, 1):		/* do as x| */		EMPTYARC(lp, rp);		break;	case PAIR(0, SOME):		/* do as x{1,n}| */		repeat(v, lp, rp, 1, n);		NOERR();		EMPTYARC(lp, rp);		break;	case PAIR(0, INF):		/* loop x around */		s = newstate(v->nfa);		NOERR();		moveouts(v->nfa, lp, s);		moveins(v->nfa, rp, s);		EMPTYARC(lp, s);		EMPTYARC(s, rp);		break;	case PAIR(1, 1):		/* no action required */		break;	case PAIR(1, SOME):		/* do as x{0,n-1}x = (x{1,n-1}|)x */		s = newstate(v->nfa);		NOERR();		moveouts(v->nfa, lp, s);		dupnfa(v->nfa, s, rp, lp, s);		NOERR();		repeat(v, lp, s, 1, n-1);		NOERR();		EMPTYARC(lp, s);		break;	case PAIR(1, INF):		/* add loopback arc */		s = newstate(v->nfa);		s2 = newstate(v->nfa);		NOERR();		moveouts(v->nfa, lp, s);		moveins(v->nfa, rp, s2);		EMPTYARC(lp, s);		EMPTYARC(s2, rp);		EMPTYARC(s2, s);		break;	case PAIR(SOME, SOME):		/* do as x{m-1,n-1}x */		s = newstate(v->nfa);		NOERR();		moveouts(v->nfa, lp, s);		dupnfa(v->nfa, s, rp, lp, s);		NOERR();		repeat(v, lp, s, m-1, n-1);		break;	case PAIR(SOME, INF):		/* do as x{m-1,}x */		s = newstate(v->nfa);		NOERR();		moveouts(v->nfa, lp, s);		dupnfa(v->nfa, s, rp, lp, s);		NOERR();		repeat(v, lp, s, m-1, n);		break;	default:		ERR(REG_ASSERT);		break;	}}/* - bracket - handle non-complemented bracket expression * Also called from cbracket for complemented bracket expressions. ^ static VOID bracket(struct vars *, struct state *, struct state *); */static VOIDbracket(v, lp, rp)struct vars *v;struct state *lp;struct state *rp;{	assert(SEE('['));	NEXT();	while (!SEE(']') && !SEE(EOS))		brackpart(v, lp, rp);	assert(SEE(']') || ISERR());	okcolors(v->nfa, v->cm);}/* - cbracket - handle complemented bracket expression * We do it by calling bracket() with dummy endpoints, and then complementing * the result.  The alternative would be to invoke rainbow(), and then delete * arcs as the b.e. is seen... but that gets messy. ^ static VOID cbracket(struct vars *, struct state *, struct state *); */static VOIDcbracket(v, lp, rp)struct vars *v;struct state *lp;struct state *rp;{	struct state *left = newstate(v->nfa);	struct state *right = newstate(v->nfa);	struct state *s;	struct arc *a;			/* arc from lp */	struct arc *ba;			/* arc from left, from bracket() */	struct arc *pa;			/* MCCE-prototype arc */	color co;	chr *p;	int i;	NOERR();	bracket(v, left, right);	if (v->cflags&REG_NLSTOP)		newarc(v->nfa, PLAIN, v->nlcolor, left, right);	NOERR();	assert(lp->nouts == 0);		/* all outarcs will be ours */	/* easy part of complementing */	colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp);	NOERR();	if (v->mcces == NULL) {		/* no MCCEs -- we're done */		dropstate(v->nfa, left);		assert(right->nins == 0);		freestate(v->nfa, right);		return;	}	/* but complementing gets messy in the presence of MCCEs... */	NOTE(REG_ULOCALE);	for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--) {		co = GETCOLOR(v->cm, *p);		a = findarc(lp, PLAIN, co);		ba = findarc(left, PLAIN, co);		if (ba == NULL) {			assert(a != NULL);			freearc(v->nfa, a);		} else {			assert(a == NULL);		}		s = newstate(v->nfa);		NOERR();		newarc(v->nfa, PLAIN, co, lp, s);		NOERR();		pa = findarc(v->mccepbegin, PLAIN, co);		assert(pa != NULL);		if (ba == NULL) {	/* easy case, need all of them */			cloneouts(v->nfa, pa->to, s, rp, PLAIN);			newarc(v->nfa, '$', 1, s, rp);			newarc(v->nfa, '$', 0, s, rp);			colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp);		} else {		/* must be selective */			if (findarc(ba->to, '$', 1) == NULL) {				newarc(v->nfa, '$', 1, s, rp);				newarc(v->nfa, '$', 0, s, rp);				colorcomplement(v->nfa, v->cm, AHEAD, pa->to,									 s, rp);			}			for (pa = pa->to->outs; pa != NULL; pa = pa->outchain)				if (findarc(ba->to, PLAIN, pa->co) == NULL)					newarc(v->nfa, PLAIN, pa->co, s, rp);			if (s->nouts == 0)	/* limit of selectivity: none */				dropstate(v->nfa, s);	/* frees arc too */		}		NOERR();	}	delsub(v->nfa, left, right);	assert(left->nouts == 0);	freestate(v->nfa, left);	assert(right->nins == 0);	freestate(v->nfa, right);}			/* - brackpart - handle one item (or range) within a bracket expression ^ static VOID brackpart(struct vars *, struct state *, struct state *); */static VOIDbrackpart(v, lp, rp)struct vars *v;struct state *lp;struct state *rp;{	celt startc;	celt endc;	struct cvec *cv;	chr *startp;	chr *endp;	chr c[1];	/* parse something, get rid of special cases, take shortcuts */	switch (v->nexttype) {	case RANGE:			/* a-b-c or other botch */		ERR(REG_ERANGE);		return;		break;	case PLAIN:		c[0] = v->nextvalue;		NEXT();		/* shortcut for ordinary chr (not range, not MCCE leader) */		if (!SEE(RANGE) && !ISCELEADER(v, c[0])) {			onechr(v, c[0], lp, rp);			return;		}		startc = element(v, c, c+1);		NOERR();		break;	case COLLEL:		startp = v->now;		endp = scanplain(v);		INSIST(startp < endp, REG_ECOLLATE);		NOERR();		startc = element(v, startp, endp);		NOERR();		break;	case ECLASS:		startp = v->now;		endp = scanplain(v);		INSIST(startp < endp, REG_ECOLLATE);		NOERR();		startc = element(v, startp, endp);		NOERR();		cv = eclass(v, startc, (v->cflags&REG_ICASE));		NOERR();		dovec(v, cv, lp, rp);		return;		break;	case CCLASS:		startp = v->now;		endp = scanplain(v);		INSIST(startp < endp, REG_ECTYPE);		NOERR();		cv = cclass(v, startp, endp, (v->cflags&REG_ICASE));		NOERR();		dovec(v, cv, lp, rp);		return;		break;	default:		ERR(REG_ASSERT);		return;		break;	}	if (SEE(RANGE)) {		NEXT();		switch (v->nexttype) {		case PLAIN:		case RANGE:			c[0] = v->nextvalue;			NEXT();			endc = element(v, c, c+1);			NOERR();			break;		case COLLEL:			startp = v->now;			endp = scanplain(v);			INSIST(startp < endp, REG_ECOLLATE);			NOERR();			endc = element(v, startp, endp);			NOERR();			break;		default:			ERR(REG_ERANGE);			return;			break;		}	} else		endc = startc;	/*	 * Ranges are unportable.  Actually, standard C does	 * guarantee that digits are contiguous, but making	 * that an exception is just too complicated.	 */	if (startc != endc)		NOTE(REG_UUNPORT);	cv = range(v, startc, endc, (v->cflags&REG_ICASE));	NOERR();	dovec(v, cv, lp, rp);}/* - scanplain - scan PLAIN contents of [. etc. * Certain bits of trickery in lex.c know that this code does not try * to look past the final bracket of the [. etc. ^ static chr *scanplain(struct vars *); */static chr *			/* just after end of sequence */scanplain(v)struct vars *v;{	chr *endp;	assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS));	NEXT();	endp = v->now;	while (SEE(PLAIN)) {		endp = v->now;		NEXT();	}	assert(SEE(END) || ISERR());	NEXT();	return endp;}/* - leaders - process a cvec of collating elements to also include leaders * Also gives all characters involved their own colors, which is almost * certainly necessary, and sets up little disconnected subNFA. ^ static VOID leaders(struct vars *, struct cvec *); */static VOIDleaders(v, cv)struct vars *v;struct cvec *cv;{	int mcce;	chr *p;	chr leader;	struct state *s;	struct arc *a;	v->mccepbegin = newstate(v->nfa);	v->mccepend = newstate(v->nfa);	NOERR();	for (mcce = 0; mcce < cv->nmcces; mcce++) {		p = cv->mcces[mcce];		leader = *p;		if (!haschr(cv, leader)) {			addchr(cv, leader);			s = newstate(v->nfa);			newarc(v->nfa, PLAIN, subcolor(v->cm, leader),							v->mccepbegin, s);			okcolors(v->nfa, v->cm);		} else {			a = findarc(v->mccepbegin, PLAIN,						GETCOLOR(v->cm, leader));			assert(a != NULL);			s = a->to;			assert(s != v->mccepend);		}		p++;		assert(*p != 0 && *(p+1) == 0);	/* only 2-char MCCEs for now */		newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend);		okcolors(v->nfa, v->cm);	}}/* - onechr - fill in arcs for a plain character, and possible case complements * This is mostly a shortcut for efficient handling of the common case. ^ static VOID onechr(struct vars *, pchr, struct state *, struct state *); */static VOIDonechr(v, c, lp, rp)struct vars *v;pchr c;struct state *lp;struct state *rp;{	if (!(v->cflags&REG_ICASE)) {		newarc(v->nfa, PLAIN, subcolor(v->cm, c), lp, rp);		return;	}	/* rats, need general case anyway... */	dovec(v, allcases(v, c), lp, rp);}/* - dovec - fill in arcs for each element of a cvec * This one has to handle the messy cases, like MCCEs and MCCE leaders. ^ static VOID dovec(struct vars *, struct cvec *, struct state *, ^ 	struct state *); */static VOIDdovec(v, cv, lp, rp)struct vars *v;struct cvec *cv;struct state *lp;struct state *rp;{	chr ch, from, to;	celt ce;	chr *p;	int i;	color co;	struct cvec *leads;	struct arc *a;	struct arc *pa;		/* arc in prototype */	struct state *s;	struct state *ps;	/* state in prototype */	/* need a place to store leaders, if any */	if (nmcces(v) > 0) {		assert(v->mcces != NULL);		if (v->cv2 == NULL || v->cv2->nchrs < v->mcces->nchrs) {			if (v->cv2 != NULL)				free(v->cv2);			v->cv2 = newcvec(v->mcces->nchrs, 0, v->mcces->nmcces);			NOERR();			leads = v->cv2;		} else			leads = clearcvec(v->cv2);	} else		leads = NULL;	/* first, get the ordinary characters out of the way */	for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) {		ch = *p;		if (!ISCELEADER(v, ch))			newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp);		else {			assert(singleton(v->cm, ch));			assert(leads != NULL);			if (!haschr(leads, ch))				addchr(leads, ch);		}	}	/* and the ranges */	for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) {		from = *p;		to = *(p+1);		while (from <= to && (ce = nextleader(v, from, to)) != NOCELT) {			if (from < ce)				subrange(v, from, ce - 1, lp, rp);			assert(singleton(v->cm, ce));			assert(leads != NULL);			if (!haschr(leads, ce))				addchr(leads, ce);			from = ce + 1;		}		if (from <= to)			subrange(v, from, to, lp, rp);	}	if ((leads == NULL || leads->nchrs == 0) && cv->nmcces == 0)		return;	/* deal with the MCCE leaders */	NOTE(REG_ULOCALE);	for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--) {		co = GETCOLOR(v->cm, *p);		a = findarc(lp, PLAIN, co);		if (a != NULL)

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?