regexec.c

来自「tcl是工具命令语言」· C语言 代码 · 共 1,039 行 · 第 1/2 页

C
1,039
字号
/* * re_*exec and friends - match REs * * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved. *  * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results.  The author * thanks all of them.  *  * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. *  * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. *  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */#include "regguts.h"/* lazy-DFA representation */struct arcp {			/* "pointer" to an outarc */	struct sset *ss;	color co;};struct sset {			/* state set */	unsigned *states;	/* pointer to bitvector */	unsigned hash;		/* hash of bitvector */#		define	HASH(bv, nw)	(((nw) == 1) ? *(bv) : hash(bv, nw))#	define	HIT(h,bv,ss,nw)	((ss)->hash == (h) && ((nw) == 1 || \		memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))	int flags;#		define	STARTER		01	/* the initial state set */#		define	POSTSTATE	02	/* includes the goal state */#		define	LOCKED		04	/* locked in cache */#		define	NOPROGRESS	010	/* zero-progress state set */	struct arcp ins;	/* chain of inarcs pointing here */	chr *lastseen;		/* last entered on arrival here */	struct sset **outs;	/* outarc vector indexed by color */	struct arcp *inchain;	/* chain-pointer vector for outarcs */};struct dfa {	int nssets;		/* size of cache */	int nssused;		/* how many entries occupied yet */	int nstates;		/* number of states */	int ncolors;		/* length of outarc and inchain vectors */	int wordsper;		/* length of state-set bitvectors */	struct sset *ssets;	/* state-set cache */	unsigned *statesarea;	/* bitvector storage */	unsigned *work;		/* pointer to work area within statesarea */	struct sset **outsarea;	/* outarc-vector storage */	struct arcp *incarea;	/* inchain storage */	struct cnfa *cnfa;	struct colormap *cm;	chr *lastpost;		/* location of last cache-flushed success */	chr *lastnopr;		/* location of last cache-flushed NOPROGRESS */	struct sset *search;	/* replacement-search-pointer memory */	int cptsmalloced;	/* were the areas individually malloced? */	char *mallocarea;	/* self, or master malloced area, or NULL */};#define	WORK	1		/* number of work bitvectors needed *//* setup for non-malloc allocation for small cases */#define	FEWSTATES	20	/* must be less than UBITS */#define	FEWCOLORS	15struct smalldfa {	struct dfa dfa;	struct sset ssets[FEWSTATES*2];	unsigned statesarea[FEWSTATES*2 + WORK];	struct sset *outsarea[FEWSTATES*2 * FEWCOLORS];	struct arcp incarea[FEWSTATES*2 * FEWCOLORS];};#define	DOMALLOC	((struct smalldfa *)NULL)	/* force malloc *//* internal variables, bundled for easy passing around */struct vars {	regex_t *re;	struct guts *g;	int eflags;		/* copies of arguments */	size_t nmatch;	regmatch_t *pmatch;	rm_detail_t *details;	chr *start;		/* start of string */	chr *stop;		/* just past end of string */	int err;		/* error code if any (0 none) */	regoff_t *mem;		/* memory vector for backtracking */	struct smalldfa dfa1;	struct smalldfa dfa2;};#define	VISERR(vv)	((vv)->err != 0)	/* have we seen an error yet? */#define	ISERR()	VISERR(v)#define	VERR(vv,e)	(((vv)->err) ? (vv)->err : ((vv)->err = (e)))#define	ERR(e)	VERR(v, e)		/* record an error */#define	NOERR()	{if (ISERR()) return v->err;}	/* if error seen, return it */#define	OFF(p)	((p) - v->start)#define	LOFF(p)	((long)OFF(p))/* * forward declarations *//* =====^!^===== begin forwards =====^!^===== *//* automatically gathered by fwd; do not hand-edit *//* === regexec.c === */int exec _ANSI_ARGS_((regex_t *, CONST chr *, size_t, rm_detail_t *, size_t, regmatch_t [], int));static int find _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));static int cfind _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));static int cfindloop _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **));static VOID zapsubs _ANSI_ARGS_((regmatch_t *, size_t));static VOID zapmem _ANSI_ARGS_((struct vars *, struct subre *));static VOID subset _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));static int dissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));static int condissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));static int altdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));static int cdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));static int ccondissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));static int crevdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));static int cbrdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));static int caltdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));/* === rege_dfa.c === */static chr *longest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, int *));static chr *shortest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *));static chr *lastcold _ANSI_ARGS_((struct vars *, struct dfa *));static struct dfa *newdfa _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct smalldfa *));static VOID freedfa _ANSI_ARGS_((struct dfa *));static unsigned hash _ANSI_ARGS_((unsigned *, int));static struct sset *initialize _ANSI_ARGS_((struct vars *, struct dfa *, chr *));static struct sset *miss _ANSI_ARGS_((struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *));static int lacon _ANSI_ARGS_((struct vars *, struct cnfa *, chr *, pcolor));static struct sset *getvacant _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *));static struct sset *pickss _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *));/* automatically gathered by fwd; do not hand-edit *//* =====^!^===== end forwards =====^!^===== *//* - exec - match regular expression ^ int exec(regex_t *, CONST chr *, size_t, rm_detail_t *, ^					size_t, regmatch_t [], int); */intexec(re, string, len, details, nmatch, pmatch, flags)regex_t *re;CONST chr *string;size_t len;rm_detail_t *details;size_t nmatch;regmatch_t pmatch[];int flags;{	struct vars var;	register struct vars *v = &var;	int st;	size_t n;	int backref;#	define	LOCALMAT	20	regmatch_t mat[LOCALMAT];#	define	LOCALMEM	40	regoff_t mem[LOCALMEM];	/* sanity checks */	if (re == NULL || string == NULL || re->re_magic != REMAGIC)		return REG_INVARG;	if (re->re_csize != sizeof(chr))		return REG_MIXED;	/* setup */	v->re = re;	v->g = (struct guts *)re->re_guts;	if ((v->g->cflags&REG_EXPECT) && details == NULL)		return REG_INVARG;	if (v->g->info&REG_UIMPOSSIBLE)		return REG_NOMATCH;	backref = (v->g->info&REG_UBACKREF) ? 1 : 0;	v->eflags = flags;	if (v->g->cflags&REG_NOSUB)		nmatch = 0;		/* override client */	v->nmatch = nmatch;	if (backref) {		/* need work area */		if (v->g->nsub + 1 <= LOCALMAT)			v->pmatch = mat;		else			v->pmatch = (regmatch_t *)MALLOC((v->g->nsub + 1) *							sizeof(regmatch_t));		if (v->pmatch == NULL)			return REG_ESPACE;		v->nmatch = v->g->nsub + 1;	} else		v->pmatch = pmatch;	v->details = details;	v->start = (chr *)string;	v->stop = (chr *)string + len;	v->err = 0;	if (backref) {		/* need retry memory */		assert(v->g->ntree >= 0);		n = (size_t)v->g->ntree;		if (n <= LOCALMEM)			v->mem = mem;		else			v->mem = (regoff_t *)MALLOC(n*sizeof(regoff_t));		if (v->mem == NULL) {			if (v->pmatch != pmatch && v->pmatch != mat)				FREE(v->pmatch);			return REG_ESPACE;		}	} else		v->mem = NULL;	/* do it */	assert(v->g->tree != NULL);	if (backref)		st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);	else		st = find(v, &v->g->tree->cnfa, &v->g->cmap);	/* copy (portion of) match vector over if necessary */	if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) {		zapsubs(pmatch, nmatch);		n = (nmatch < v->nmatch) ? nmatch : v->nmatch;		memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t));	}	/* clean up */	if (v->pmatch != pmatch && v->pmatch != mat)		FREE(v->pmatch);	if (v->mem != NULL && v->mem != mem)		FREE(v->mem);	return st;}/* - find - find a match for the main NFA (no-complications case) ^ static int find(struct vars *, struct cnfa *, struct colormap *); */static intfind(v, cnfa, cm)struct vars *v;struct cnfa *cnfa;struct colormap *cm;{	struct dfa *s;	struct dfa *d;	chr *begin;	chr *end = NULL;	chr *cold;	chr *open;		/* open and close of range of possible starts */	chr *close;	int hitend;	int shorter = (v->g->tree->flags&SHORTER) ? 1 : 0;	/* first, a shot with the search RE */	s = newdfa(v, &v->g->search, cm, &v->dfa1);	assert(!(ISERR() && s != NULL));	NOERR();	MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));	cold = NULL;	close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *)NULL);	freedfa(s);	NOERR();	if (v->g->cflags&REG_EXPECT) {		assert(v->details != NULL);		if (cold != NULL)			v->details->rm_extend.rm_so = OFF(cold);		else			v->details->rm_extend.rm_so = OFF(v->stop);		v->details->rm_extend.rm_eo = OFF(v->stop);	/* unknown */	}	if (close == NULL)		/* not found */		return REG_NOMATCH;	if (v->nmatch == 0)		/* found, don't need exact location */		return REG_OKAY;	/* find starting point and match */	assert(cold != NULL);	open = cold;	cold = NULL;	MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));	d = newdfa(v, cnfa, cm, &v->dfa1);	assert(!(ISERR() && d != NULL));	NOERR();	for (begin = open; begin <= close; begin++) {		MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));		if (shorter)			end = shortest(v, d, begin, begin, v->stop,							(chr **)NULL, &hitend);		else			end = longest(v, d, begin, v->stop, &hitend);		NOERR();		if (hitend && cold == NULL)			cold = begin;		if (end != NULL)			break;		/* NOTE BREAK OUT */	}	assert(end != NULL);		/* search RE succeeded so loop should */	freedfa(d);	/* and pin down details */	assert(v->nmatch > 0);	v->pmatch[0].rm_so = OFF(begin);	v->pmatch[0].rm_eo = OFF(end);	if (v->g->cflags&REG_EXPECT) {		if (cold != NULL)			v->details->rm_extend.rm_so = OFF(cold);		else			v->details->rm_extend.rm_so = OFF(v->stop);		v->details->rm_extend.rm_eo = OFF(v->stop);	/* unknown */	}	if (v->nmatch == 1)		/* no need for submatches */		return REG_OKAY;	/* submatches */	zapsubs(v->pmatch, v->nmatch);	return dissect(v, v->g->tree, begin, end);}/* - cfind - find a match for the main NFA (with complications) ^ static int cfind(struct vars *, struct cnfa *, struct colormap *); */static intcfind(v, cnfa, cm)struct vars *v;struct cnfa *cnfa;struct colormap *cm;{	struct dfa *s;	struct dfa *d;	chr *cold;	int ret;	s = newdfa(v, &v->g->search, cm, &v->dfa1);	NOERR();	d = newdfa(v, cnfa, cm, &v->dfa2);	if (ISERR()) {		assert(d == NULL);		freedfa(s);		return v->err;	}	ret = cfindloop(v, cnfa, cm, d, s, &cold);	freedfa(d);	freedfa(s);	NOERR();	if (v->g->cflags&REG_EXPECT) {		assert(v->details != NULL);		if (cold != NULL)			v->details->rm_extend.rm_so = OFF(cold);		else			v->details->rm_extend.rm_so = OFF(v->stop);		v->details->rm_extend.rm_eo = OFF(v->stop);	/* unknown */	}	return ret;}/* - cfindloop - the heart of cfind ^ static int cfindloop(struct vars *, struct cnfa *, struct colormap *, ^	struct dfa *, struct dfa *, chr **); */static intcfindloop(v, cnfa, cm, d, s, coldp)struct vars *v;struct cnfa *cnfa;struct colormap *cm;struct dfa *d;struct dfa *s;chr **coldp;			/* where to put coldstart pointer */{	chr *begin;	chr *end;	chr *cold;	chr *open;		/* open and close of range of possible starts */	chr *close;	chr *estart;	chr *estop;	int er;	int shorter = v->g->tree->flags&SHORTER;	int hitend;	assert(d != NULL && s != NULL);	cold = NULL;	close = v->start;	do {		MDEBUG(("\ncsearch at %ld\n", LOFF(close)));		close = shortest(v, s, close, close, v->stop, &cold, (int *)NULL);		if (close == NULL)			break;				/* NOTE BREAK */		assert(cold != NULL);		open = cold;		cold = NULL;		MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));		for (begin = open; begin <= close; begin++) {			MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));			estart = begin;			estop = v->stop;			for (;;) {				if (shorter)					end = shortest(v, d, begin, estart,						estop, (chr **)NULL, &hitend);				else					end = longest(v, d, begin, estop,								&hitend);				if (hitend && cold == NULL)					cold = begin;				if (end == NULL)					break;		/* NOTE BREAK OUT */				MDEBUG(("tentative end %ld\n", LOFF(end)));				zapsubs(v->pmatch, v->nmatch);				zapmem(v, v->g->tree);				er = cdissect(v, v->g->tree, begin, end);				if (er == REG_OKAY) {					if (v->nmatch > 0) {						v->pmatch[0].rm_so = OFF(begin);						v->pmatch[0].rm_eo = OFF(end);					}					*coldp = cold;					return REG_OKAY;				}				if (er != REG_NOMATCH) {					ERR(er);					return er;				}				if ((shorter) ? end == estop : end == begin) {					/* no point in trying again */					*coldp = cold;					return REG_NOMATCH;				}				/* go around and try again */				if (shorter)					estart = end + 1;				else					estop = end - 1;			}		}	} while (close < v->stop);	*coldp = cold;	return REG_NOMATCH;}/* - zapsubs - initialize the subexpression matches to "no match" ^ static VOID zapsubs(regmatch_t *, size_t); */static VOIDzapsubs(p, n)regmatch_t *p;size_t n;{	size_t i;	for (i = n-1; i > 0; i--) {		p[i].rm_so = -1;		p[i].rm_eo = -1;	}}/* - zapmem - initialize the retry memory of a subtree to zeros ^ static VOID zapmem(struct vars *, struct subre *); */static VOIDzapmem(v, t)struct vars *v;struct subre *t;{	if (t == NULL)		return;	assert(v->mem != NULL);	v->mem[t->retry] = 0;	if (t->op == '(') {		assert(t->subno > 0);		v->pmatch[t->subno].rm_so = -1;		v->pmatch[t->subno].rm_eo = -1;	}	if (t->left != NULL)		zapmem(v, t->left);	if (t->right != NULL)		zapmem(v, t->right);}/* - subset - set any subexpression relevant to a successful subre ^ static VOID subset(struct vars *, struct subre *, chr *, chr *); */static VOIDsubset(v, sub, begin, end)struct vars *v;struct subre *sub;chr *begin;chr *end;{	int n = sub->subno;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?