regexec.c

来自「tcl是工具命令语言」· C语言 代码 · 共 1,039 行 · 第 1/2 页

C
1,039
字号
	assert(n > 0);	if ((size_t)n >= v->nmatch)		return;	MDEBUG(("setting %d\n", n));	v->pmatch[n].rm_so = OFF(begin);	v->pmatch[n].rm_eo = OFF(end);}/* - dissect - determine subexpression matches (uncomplicated case) ^ static int dissect(struct vars *, struct subre *, chr *, chr *); */static int			/* regexec return code */dissect(v, t, begin, end)struct vars *v;struct subre *t;chr *begin;			/* beginning of relevant substring */chr *end;			/* end of same */{	assert(t != NULL);	MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end)));	switch (t->op) {	case '=':		/* terminal node */		assert(t->left == NULL && t->right == NULL);		return REG_OKAY;	/* no action, parent did the work */		break;	case '|':		/* alternation */		assert(t->left != NULL);		return altdissect(v, t, begin, end);		break;	case 'b':		/* back ref -- shouldn't be calling us! */		return REG_ASSERT;		break;	case '.':		/* concatenation */		assert(t->left != NULL && t->right != NULL);		return condissect(v, t, begin, end);		break;	case '(':		/* capturing */		assert(t->left != NULL && t->right == NULL);		assert(t->subno > 0);		subset(v, t, begin, end);		return dissect(v, t->left, begin, end);		break;	default:		return REG_ASSERT;		break;	}}/* - condissect - determine concatenation subexpression matches (uncomplicated) ^ static int condissect(struct vars *, struct subre *, chr *, chr *); */static int			/* regexec return code */condissect(v, t, begin, end)struct vars *v;struct subre *t;chr *begin;			/* beginning of relevant substring */chr *end;			/* end of same */{	struct dfa *d;	struct dfa *d2;	chr *mid;	int i;	int shorter = (t->left->flags&SHORTER) ? 1 : 0;	chr *stop = (shorter) ? end : begin;	assert(t->op == '.');	assert(t->left != NULL && t->left->cnfa.nstates > 0);	assert(t->right != NULL && t->right->cnfa.nstates > 0);	d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);	NOERR();	d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2);	if (ISERR()) {		assert(d2 == NULL);		freedfa(d);		return v->err;	}	/* pick a tentative midpoint */	if (shorter)		mid = shortest(v, d, begin, begin, end, (chr **)NULL,								(int *)NULL);	else		mid = longest(v, d, begin, end, (int *)NULL);	if (mid == NULL) {		freedfa(d);		freedfa(d2);		return REG_ASSERT;	}	MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));	/* iterate until satisfaction or failure */	while (longest(v, d2, mid, end, (int *)NULL) != end) {		/* that midpoint didn't work, find a new one */		if (mid == stop) {			/* all possibilities exhausted! */			MDEBUG(("no midpoint!\n"));			freedfa(d);			freedfa(d2);			return REG_ASSERT;		}		if (shorter)			mid = shortest(v, d, begin, mid+1, end, (chr **)NULL,								(int *)NULL);		else			mid = longest(v, d, begin, mid-1, (int *)NULL);		if (mid == NULL) {			/* failed to find a new one! */			MDEBUG(("failed midpoint!\n"));			freedfa(d);			freedfa(d2);			return REG_ASSERT;		}		MDEBUG(("new midpoint %ld\n", LOFF(mid)));	}	/* satisfaction */	MDEBUG(("successful\n"));	freedfa(d);	freedfa(d2);	i = dissect(v, t->left, begin, mid);	if (i != REG_OKAY)		return i;	return dissect(v, t->right, mid, end);}/* - altdissect - determine alternative subexpression matches (uncomplicated) ^ static int altdissect(struct vars *, struct subre *, chr *, chr *); */static int			/* regexec return code */altdissect(v, t, begin, end)struct vars *v;struct subre *t;chr *begin;			/* beginning of relevant substring */chr *end;			/* end of same */{	struct dfa *d;	int i;	assert(t != NULL);	assert(t->op == '|');	for (i = 0; t != NULL; t = t->right, i++) {		MDEBUG(("trying %dth\n", i));		assert(t->left != NULL && t->left->cnfa.nstates > 0);		d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);		if (ISERR())			return v->err;		if (longest(v, d, begin, end, (int *)NULL) == end) {			MDEBUG(("success\n"));			freedfa(d);			return dissect(v, t->left, begin, end);		}		freedfa(d);	}	return REG_ASSERT;	/* none of them matched?!? */}/* - cdissect - determine subexpression matches (with complications) * The retry memory stores the offset of the trial midpoint from begin,  * plus 1 so that 0 uniquely means "clean slate". ^ static int cdissect(struct vars *, struct subre *, chr *, chr *); */static int			/* regexec return code */cdissect(v, t, begin, end)struct vars *v;struct subre *t;chr *begin;			/* beginning of relevant substring */chr *end;			/* end of same */{	int er;	assert(t != NULL);	MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));	switch (t->op) {	case '=':		/* terminal node */		assert(t->left == NULL && t->right == NULL);		return REG_OKAY;	/* no action, parent did the work */		break;	case '|':		/* alternation */		assert(t->left != NULL);		return caltdissect(v, t, begin, end);		break;	case 'b':		/* back ref -- shouldn't be calling us! */		assert(t->left == NULL && t->right == NULL);		return cbrdissect(v, t, begin, end);		break;	case '.':		/* concatenation */		assert(t->left != NULL && t->right != NULL);		return ccondissect(v, t, begin, end);		break;	case '(':		/* capturing */		assert(t->left != NULL && t->right == NULL);		assert(t->subno > 0);		er = cdissect(v, t->left, begin, end);		if (er == REG_OKAY)			subset(v, t, begin, end);		return er;		break;	default:		return REG_ASSERT;		break;	}}/* - ccondissect - concatenation subexpression matches (with complications) * The retry memory stores the offset of the trial midpoint from begin,  * plus 1 so that 0 uniquely means "clean slate". ^ static int ccondissect(struct vars *, struct subre *, chr *, chr *); */static int			/* regexec return code */ccondissect(v, t, begin, end)struct vars *v;struct subre *t;chr *begin;			/* beginning of relevant substring */chr *end;			/* end of same */{	struct dfa *d;	struct dfa *d2;	chr *mid;	int er;	assert(t->op == '.');	assert(t->left != NULL && t->left->cnfa.nstates > 0);	assert(t->right != NULL && t->right->cnfa.nstates > 0);	if (t->left->flags&SHORTER)		/* reverse scan */		return crevdissect(v, t, begin, end);	d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);	if (ISERR())		return v->err;	d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);	if (ISERR()) {		freedfa(d);		return v->err;	}	MDEBUG(("cconcat %d\n", t->retry));	/* pick a tentative midpoint */	if (v->mem[t->retry] == 0) {		mid = longest(v, d, begin, end, (int *)NULL);		if (mid == NULL) {			freedfa(d);			freedfa(d2);			return REG_NOMATCH;		}		MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));		v->mem[t->retry] = (mid - begin) + 1;	} else {		mid = begin + (v->mem[t->retry] - 1);		MDEBUG(("working midpoint %ld\n", LOFF(mid)));	}	/* iterate until satisfaction or failure */	for (;;) {		/* try this midpoint on for size */		er = cdissect(v, t->left, begin, mid);		if (er == REG_OKAY &&				longest(v, d2, mid, end, (int *)NULL) == end &&				(er = cdissect(v, t->right, mid, end)) == 								REG_OKAY)			break;			/* NOTE BREAK OUT */		if (er != REG_OKAY && er != REG_NOMATCH) {			freedfa(d);			freedfa(d2);			return er;		}		/* that midpoint didn't work, find a new one */		if (mid == begin) {			/* all possibilities exhausted */			MDEBUG(("%d no midpoint\n", t->retry));			freedfa(d);			freedfa(d2);			return REG_NOMATCH;		}		mid = longest(v, d, begin, mid-1, (int *)NULL);		if (mid == NULL) {			/* failed to find a new one */			MDEBUG(("%d failed midpoint\n", t->retry));			freedfa(d);			freedfa(d2);			return REG_NOMATCH;		}		MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));		v->mem[t->retry] = (mid - begin) + 1;		zapmem(v, t->left);		zapmem(v, t->right);	}	/* satisfaction */	MDEBUG(("successful\n"));	freedfa(d);	freedfa(d2);	return REG_OKAY;}/* - crevdissect - determine backref shortest-first subexpression matches * The retry memory stores the offset of the trial midpoint from begin,  * plus 1 so that 0 uniquely means "clean slate". ^ static int crevdissect(struct vars *, struct subre *, chr *, chr *); */static int			/* regexec return code */crevdissect(v, t, begin, end)struct vars *v;struct subre *t;chr *begin;			/* beginning of relevant substring */chr *end;			/* end of same */{	struct dfa *d;	struct dfa *d2;	chr *mid;	int er;	assert(t->op == '.');	assert(t->left != NULL && t->left->cnfa.nstates > 0);	assert(t->right != NULL && t->right->cnfa.nstates > 0);	assert(t->left->flags&SHORTER);	/* concatenation -- need to split the substring between parts */	d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);	if (ISERR())		return v->err;	d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);	if (ISERR()) {		freedfa(d);		return v->err;	}	MDEBUG(("crev %d\n", t->retry));	/* pick a tentative midpoint */	if (v->mem[t->retry] == 0) {		mid = shortest(v, d, begin, begin, end, (chr **)NULL, (int *)NULL);		if (mid == NULL) {			freedfa(d);			freedfa(d2);			return REG_NOMATCH;		}		MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));		v->mem[t->retry] = (mid - begin) + 1;	} else {		mid = begin + (v->mem[t->retry] - 1);		MDEBUG(("working midpoint %ld\n", LOFF(mid)));	}	/* iterate until satisfaction or failure */	for (;;) {		/* try this midpoint on for size */		er = cdissect(v, t->left, begin, mid);		if (er == REG_OKAY &&				longest(v, d2, mid, end, (int *)NULL) == end &&				(er = cdissect(v, t->right, mid, end)) == 								REG_OKAY)			break;			/* NOTE BREAK OUT */		if (er != REG_OKAY && er != REG_NOMATCH) {			freedfa(d);			freedfa(d2);			return er;		}		/* that midpoint didn't work, find a new one */		if (mid == end) {			/* all possibilities exhausted */			MDEBUG(("%d no midpoint\n", t->retry));			freedfa(d);			freedfa(d2);			return REG_NOMATCH;		}		mid = shortest(v, d, begin, mid+1, end, (chr **)NULL, (int *)NULL);		if (mid == NULL) {			/* failed to find a new one */			MDEBUG(("%d failed midpoint\n", t->retry));			freedfa(d);			freedfa(d2);			return REG_NOMATCH;		}		MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));		v->mem[t->retry] = (mid - begin) + 1;		zapmem(v, t->left);		zapmem(v, t->right);	}	/* satisfaction */	MDEBUG(("successful\n"));	freedfa(d);	freedfa(d2);	return REG_OKAY;}/* - cbrdissect - determine backref subexpression matches ^ static int cbrdissect(struct vars *, struct subre *, chr *, chr *); */static int			/* regexec return code */cbrdissect(v, t, begin, end)struct vars *v;struct subre *t;chr *begin;			/* beginning of relevant substring */chr *end;			/* end of same */{	int i;	int n = t->subno;	size_t len;	chr *paren;	chr *p;	chr *stop;	int min = t->min;	int max = t->max;	assert(t != NULL);	assert(t->op == 'b');	assert(n >= 0);	assert((size_t)n < v->nmatch);	MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max));	if (v->pmatch[n].rm_so == -1)		return REG_NOMATCH;	paren = v->start + v->pmatch[n].rm_so;	len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;	/* no room to maneuver -- retries are pointless */	if (v->mem[t->retry])		return REG_NOMATCH;	v->mem[t->retry] = 1;	/* special-case zero-length string */	if (len == 0) {		if (begin == end)			return REG_OKAY;		return REG_NOMATCH;	}	/* and too-short string */	assert(end >= begin);	if ((size_t)(end - begin) < len)		return REG_NOMATCH;	stop = end - len;	/* count occurrences */	i = 0;	for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) {		if ((*v->g->compare)(paren, p, len) != 0)				break;		i++;	}	MDEBUG(("cbackref found %d\n", i));	/* and sort it out */	if (p != end)			/* didn't consume all of it */		return REG_NOMATCH;	if (min <= i && (i <= max || max == INFINITY))		return REG_OKAY;	return REG_NOMATCH;		/* out of range */}/* - caltdissect - determine alternative subexpression matches (w. complications) ^ static int caltdissect(struct vars *, struct subre *, chr *, chr *); */static int			/* regexec return code */caltdissect(v, t, begin, end)struct vars *v;struct subre *t;chr *begin;			/* beginning of relevant substring */chr *end;			/* end of same */{	struct dfa *d;	int er;#	define	UNTRIED	0	/* not yet tried at all */#	define	TRYING	1	/* top matched, trying submatches */#	define	TRIED	2	/* top didn't match or submatches exhausted */	if (t == NULL)		return REG_NOMATCH;	assert(t->op == '|');	if (v->mem[t->retry] == TRIED)		return caltdissect(v, t->right, begin, end);	MDEBUG(("calt n%d\n", t->retry));	assert(t->left != NULL);	if (v->mem[t->retry] == UNTRIED) {		d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);		if (ISERR())			return v->err;		if (longest(v, d, begin, end, (int *)NULL) != end) {			freedfa(d);			v->mem[t->retry] = TRIED;			return caltdissect(v, t->right, begin, end);		}		freedfa(d);		MDEBUG(("calt matched\n"));		v->mem[t->retry] = TRYING;	}	er = cdissect(v, t->left, begin, end);	if (er != REG_NOMATCH)		return er;	v->mem[t->retry] = TRIED;	return caltdissect(v, t->right, begin, end);}#include "rege_dfa.c"

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?