⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 engine.c

📁 Open source for regula expre
💻 C
📖 第 1 页 / 共 2 页
字号:
/* * The matching engine and friends.  This file is #included by regexec.c * after suitable #defines of a variety of macros used herein, so that * different state representations can be used without duplicating masses * of code. */#ifdef SNAMES#define	matcher	smatcher#define	fast	sfast#define	slow	sslow#define	dissect	sdissect#define	backref	sbackref#define	step	sstep#define	print	sprint#define	at	sat#define	match	smat#endif#ifdef LNAMES#define	matcher	lmatcher#define	fast	lfast#define	slow	lslow#define	dissect	ldissect#define	backref	lbackref#define	step	lstep#define	print	lprint#define	at	lat#define	match	lmat#endif/* another structure passed up and down to avoid zillions of parameters */struct match {	struct re_guts *g;	int eflags;	regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */	char *offp;		/* offsets work from here */	char *beginp;		/* start of string -- virtual NUL precedes */	char *endp;		/* end of string -- virtual NUL here */	char *coldp;		/* can be no match starting before here */	char **lastpos;		/* [nplus+1] */	STATEVARS;	states st;		/* current states */	states fresh;		/* states for a fresh start */	states tmp;		/* temporary */	states empty;		/* empty set of states */};#include "engine.ih"#ifdef REDEBUG#define	SP(t, s, c)	print(m, t, s, c, stdout)#define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)#define	NOTE(str)	{ if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }#else#define	SP(t, s, c)	/* nothing */#define	AT(t, p1, p2, s1, s2)	/* nothing */#define	NOTE(s)	/* nothing */#endif/* - matcher - the actual matching engine == static int matcher(register struct re_guts *g, char *string, \ ==	size_t nmatch, regmatch_t pmatch[], int eflags); */static int			/* 0 success, REG_NOMATCH failure */matcher(g, string, nmatch, pmatch, eflags)register struct re_guts *g;char *string;size_t nmatch;regmatch_t pmatch[];int eflags;{	register char *endp;	register int i;	struct match mv;	register struct match *m = &mv;	register char *dp;	const register sopno gf = g->firststate+1;	/* +1 for OEND */	const register sopno gl = g->laststate;	char *start;	char *stop;	/* simplify the situation where possible */	if (g->cflags&REG_NOSUB)		nmatch = 0;	if (eflags&REG_STARTEND) {		start = string + pmatch[0].rm_so;		stop = string + pmatch[0].rm_eo;	} else {		start = string;		stop = start + strlen(start);	}	if (stop < start)		return(REG_INVARG);	/* prescreening; this does wonders for this rather slow code */	if (g->must != NULL) {		for (dp = start; dp < stop; dp++)			if (*dp == g->must[0] && stop - dp >= g->mlen &&				memcmp(dp, g->must, (size_t)g->mlen) == 0)				break;		if (dp == stop)		/* we didn't find g->must */			return(REG_NOMATCH);	}	/* match struct setup */	m->g = g;	m->eflags = eflags;	m->pmatch = NULL;	m->lastpos = NULL;	m->offp = string;	m->beginp = start;	m->endp = stop;	STATESETUP(m, 4);	SETUP(m->st);	SETUP(m->fresh);	SETUP(m->tmp);	SETUP(m->empty);	CLEAR(m->empty);	/* this loop does only one repetition except for backrefs */	for (;;) {		endp = fast(m, start, stop, gf, gl);		if (endp == NULL) {		/* a miss */			STATETEARDOWN(m);			return(REG_NOMATCH);		}		if (nmatch == 0 && !g->backrefs)			break;		/* no further info needed */		/* where? */		assert(m->coldp != NULL);		for (;;) {			NOTE("finding start");			endp = slow(m, m->coldp, stop, gf, gl);			if (endp != NULL)				break;			assert(m->coldp < m->endp);			m->coldp++;		}		if (nmatch == 1 && !g->backrefs)			break;		/* no further info needed */		/* oh my, he wants the subexpressions... */		if (m->pmatch == NULL)			m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *							sizeof(regmatch_t));		if (m->pmatch == NULL) {			STATETEARDOWN(m);			return(REG_ESPACE);		}		for (i = 1; i <= m->g->nsub; i++)			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;		if (!g->backrefs && !(m->eflags&REG_BACKR)) {			NOTE("dissecting");			dp = dissect(m, m->coldp, endp, gf, gl);		} else {			if (g->nplus > 0 && m->lastpos == NULL)				m->lastpos = (char **)malloc((g->nplus+1) *							sizeof(char *));			if (g->nplus > 0 && m->lastpos == NULL) {				free(m->pmatch);				STATETEARDOWN(m);				return(REG_ESPACE);			}			NOTE("backref dissect");			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);		}		if (dp != NULL)			break;		/* uh-oh... we couldn't find a subexpression-level match */		assert(g->backrefs);	/* must be back references doing it */		assert(g->nplus == 0 || m->lastpos != NULL);		for (;;) {			if (dp != NULL || endp <= m->coldp)				break;		/* defeat */			NOTE("backoff");			endp = slow(m, m->coldp, endp-1, gf, gl);			if (endp == NULL)				break;		/* defeat */			/* try it on a shorter possibility */#ifndef NDEBUG			for (i = 1; i <= m->g->nsub; i++) {				assert(m->pmatch[i].rm_so == -1);				assert(m->pmatch[i].rm_eo == -1);			}#endif			NOTE("backoff dissect");			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);		}		assert(dp == NULL || dp == endp);		if (dp != NULL)		/* found a shorter one */			break;		/* despite initial appearances, there is no match here */		NOTE("false alarm");		start = m->coldp + 1;	/* recycle starting later */		assert(start <= stop);	}	/* fill in the details if requested */	if (nmatch > 0) {		pmatch[0].rm_so = m->coldp - m->offp;		pmatch[0].rm_eo = endp - m->offp;	}	if (nmatch > 1) {		assert(m->pmatch != NULL);		for (i = 1; i < nmatch; i++)			if (i <= m->g->nsub)				pmatch[i] = m->pmatch[i];			else {				pmatch[i].rm_so = -1;				pmatch[i].rm_eo = -1;			}	}	if (m->pmatch != NULL)		free((char *)m->pmatch);	if (m->lastpos != NULL)		free((char *)m->lastpos);	STATETEARDOWN(m);	return(0);}/* - dissect - figure out what matched what, no back references == static char *dissect(register struct match *m, char *start, \ ==	char *stop, sopno startst, sopno stopst); */static char *			/* == stop (success) always */dissect(m, start, stop, startst, stopst)register struct match *m;char *start;char *stop;sopno startst;sopno stopst;{	register int i;	register sopno ss;	/* start sop of current subRE */	register sopno es;	/* end sop of current subRE */	register char *sp;	/* start of string matched by it */	register char *stp;	/* string matched by it cannot pass here */	register char *rest;	/* start of rest of string */	register char *tail;	/* string unmatched by rest of RE */	register sopno ssub;	/* start sop of subsubRE */	register sopno esub;	/* end sop of subsubRE */	register char *ssp;	/* start of string matched by subsubRE */	register char *sep;	/* end of string matched by subsubRE */	register char *oldssp;	/* previous ssp */	register char *dp;	AT("diss", start, stop, startst, stopst);	sp = start;	for (ss = startst; ss < stopst; ss = es) {		/* identify end of subRE */		es = ss;		switch (OP(m->g->strip[es])) {		case OPLUS_:		case OQUEST_:			es += OPND(m->g->strip[es]);			break;		case OCH_:			while (OP(m->g->strip[es]) != O_CH)				es += OPND(m->g->strip[es]);			break;		}		es++;		/* figure out what it matched */		switch (OP(m->g->strip[ss])) {		case OEND:			assert(nope);			break;		case OCHAR:			sp++;			break;		case OBOL:		case OEOL:		case OBOW:		case OEOW:			break;		case OANY:		case OANYOF:			sp++;			break;		case OBACK_:		case O_BACK:			assert(nope);			break;		/* cases where length of match is hard to find */		case OQUEST_:			stp = stop;			for (;;) {				/* how long could this one be? */				rest = slow(m, sp, stp, ss, es);				assert(rest != NULL);	/* it did match */				/* could the rest match the rest? */				tail = slow(m, rest, stop, es, stopst);				if (tail == stop)					break;		/* yes! */				/* no -- try a shorter match for this one */				stp = rest - 1;				assert(stp >= sp);	/* it did work */			}			ssub = ss + 1;			esub = es - 1;			/* did innards match? */			if (slow(m, sp, rest, ssub, esub) != NULL) {				dp = dissect(m, sp, rest, ssub, esub);				assert(dp == rest);			} else		/* no */				assert(sp == rest);			sp = rest;			break;		case OPLUS_:			stp = stop;			for (;;) {				/* how long could this one be? */				rest = slow(m, sp, stp, ss, es);				assert(rest != NULL);	/* it did match */				/* could the rest match the rest? */				tail = slow(m, rest, stop, es, stopst);				if (tail == stop)					break;		/* yes! */				/* no -- try a shorter match for this one */				stp = rest - 1;				assert(stp >= sp);	/* it did work */			}			ssub = ss + 1;			esub = es - 1;			ssp = sp;			oldssp = ssp;			for (;;) {	/* find last match of innards */				sep = slow(m, ssp, rest, ssub, esub);				if (sep == NULL || sep == ssp)					break;	/* failed or matched null */				oldssp = ssp;	/* on to next try */				ssp = sep;			}			if (sep == NULL) {				/* last successful match */				sep = ssp;				ssp = oldssp;			}			assert(sep == rest);	/* must exhaust substring */			assert(slow(m, ssp, sep, ssub, esub) == rest);			dp = dissect(m, ssp, sep, ssub, esub);			assert(dp == sep);			sp = rest;			break;		case OCH_:			stp = stop;			for (;;) {				/* how long could this one be? */				rest = slow(m, sp, stp, ss, es);				assert(rest != NULL);	/* it did match */				/* could the rest match the rest? */				tail = slow(m, rest, stop, es, stopst);				if (tail == stop)					break;		/* yes! */				/* no -- try a shorter match for this one */				stp = rest - 1;				assert(stp >= sp);	/* it did work */			}			ssub = ss + 1;			esub = ss + OPND(m->g->strip[ss]) - 1;			assert(OP(m->g->strip[esub]) == OOR1);			for (;;) {	/* find first matching branch */				if (slow(m, sp, rest, ssub, esub) == rest)					break;	/* it matched all of it */				/* that one missed, try next one */				assert(OP(m->g->strip[esub]) == OOR1);				esub++;				assert(OP(m->g->strip[esub]) == OOR2);				ssub = esub + 1;				esub += OPND(m->g->strip[esub]);				if (OP(m->g->strip[esub]) == OOR2)					esub--;				else					assert(OP(m->g->strip[esub]) == O_CH);			}			dp = dissect(m, sp, rest, ssub, esub);			assert(dp == rest);			sp = rest;			break;		case O_PLUS:		case O_QUEST:		case OOR1:		case OOR2:		case O_CH:			assert(nope);			break;		case OLPAREN:			i = OPND(m->g->strip[ss]);			assert(0 < i && i <= m->g->nsub);			m->pmatch[i].rm_so = sp - m->offp;			break;		case ORPAREN:			i = OPND(m->g->strip[ss]);			assert(0 < i && i <= m->g->nsub);			m->pmatch[i].rm_eo = sp - m->offp;			break;		default:		/* uh oh */			assert(nope);			break;		}	}	assert(sp == stop);	return(sp);}/* - backref - figure out what matched what, figuring in back references == static char *backref(register struct match *m, char *start, \ ==	char *stop, sopno startst, sopno stopst, sopno lev); */static char *			/* == stop (success) or NULL (failure) */backref(m, start, stop, startst, stopst, lev)register struct match *m;char *start;char *stop;sopno startst;sopno stopst;sopno lev;			/* PLUS nesting level */{	register int i;	register sopno ss;	/* start sop of current subRE */	register char *sp;	/* start of string matched by it */	register sopno ssub;	/* start sop of subsubRE */	register sopno esub;	/* end sop of subsubRE */	register char *ssp;	/* start of string matched by subsubRE */	register char *dp;	register size_t len;	register int hard;	register sop s;	register regoff_t offsave;	register cset *cs;	AT("back", start, stop, startst, stopst);	sp = start;	/* get as far as we can with easy stuff */	hard = 0;	for (ss = startst; !hard && ss < stopst; ss++)		switch (OP(s = m->g->strip[ss])) {		case OCHAR:			if (sp == stop || *sp++ != (char)OPND(s))				return(NULL);			break;		case OANY:			if (sp == stop)				return(NULL);			sp++;			break;		case OANYOF:			cs = &m->g->sets[OPND(s)];			if (sp == stop || !CHIN(cs, *sp++))				return(NULL);			break;		case OBOL:			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||					(sp < m->endp && *(sp-1) == '\n' &&						(m->g->cflags&REG_NEWLINE)) )				{ /* yes */ }			else				return(NULL);			break;		case OEOL:			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||					(sp < m->endp && *sp == '\n' &&						(m->g->cflags&REG_NEWLINE)) )				{ /* yes */ }			else				return(NULL);			break;		case OBOW:			if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||					(sp < m->endp && *(sp-1) == '\n' &&						(m->g->cflags&REG_NEWLINE)) ||					(sp > m->beginp &&							!ISWORD(*(sp-1))) ) &&					(sp < m->endp && ISWORD(*sp)) )				{ /* yes */ }			else				return(NULL);			break;		case OEOW:			if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||					(sp < m->endp && *sp == '\n' &&						(m->g->cflags&REG_NEWLINE)) ||					(sp < m->endp && !ISWORD(*sp)) ) &&					(sp > m->beginp && ISWORD(*(sp-1))) )				{ /* yes */ }			else				return(NULL);			break;		case O_QUEST:			break;		case OOR1:	/* matches null but needs to skip */			ss++;			s = m->g->strip[ss];			do {				assert(OP(s) == OOR2);				ss += OPND(s);			} while (OP(s = m->g->strip[ss]) != O_CH);			/* note that the ss++ gets us past the O_CH */			break;		default:	/* have to make a choice */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -