⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regcomp.c

📁 从一个开源软件中摘取的正则表达式模块
💻 C
📖 第 1 页 / 共 4 页
字号:
		while (from <= to && (ce = nextleader(v, from, to)) != NOCELT)		{			if (from < ce)				subrange(v, from, ce - 1, lp, rp);			assert(singleton(v->cm, ce));			assert(leads != NULL);			if (!haschr(leads, ce))				addchr(leads, ce);			from = ce + 1;		}		if (from <= to)			subrange(v, from, to, lp, rp);	}	if ((leads == NULL || leads->nchrs == 0) && cv->nmcces == 0)		return;	/* deal with the MCCE leaders */	NOTE(REG_ULOCALE);	for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--)	{		co = GETCOLOR(v->cm, *p);		a = findarc(lp, PLAIN, co);		if (a != NULL)			s = a->to;		else		{			s = newstate(v->nfa);			NOERR();			newarc(v->nfa, PLAIN, co, lp, s);			NOERR();		}		pa = findarc(v->mccepbegin, PLAIN, co);		assert(pa != NULL);		ps = pa->to;		newarc(v->nfa, '$', 1, s, rp);		newarc(v->nfa, '$', 0, s, rp);		colorcomplement(v->nfa, v->cm, AHEAD, ps, s, rp);		NOERR();	}	/* and the MCCEs */	for (i = 0; i < cv->nmcces; i++)	{		p = cv->mcces[i];		assert(singleton(v->cm, *p));		if (!singleton(v->cm, *p))		{			ERR(REG_ASSERT);			return;		}		ch = *p++;		co = GETCOLOR(v->cm, ch);		a = findarc(lp, PLAIN, co);		if (a != NULL)			s = a->to;		else		{			s = newstate(v->nfa);			NOERR();			newarc(v->nfa, PLAIN, co, lp, s);			NOERR();		}		assert(*p != 0);		/* at least two chars */		assert(singleton(v->cm, *p));		ch = *p++;		co = GETCOLOR(v->cm, ch);		assert(*p == 0);		/* and only two, for now */		newarc(v->nfa, PLAIN, co, s, rp);		NOERR();	}}/* * nextleader - find next MCCE leader within range */static celt						/* NOCELT means none */nextleader(struct vars * v,		   chr from,		   chr to){	int			i;	chr		   *p;	chr			ch;	celt		it = NOCELT;	if (v->mcces == NULL)		return it;	for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++)	{		ch = *p;		if (from <= ch && ch <= to)			if (it == NOCELT || ch < it)				it = ch;	}	return it;}/* * wordchrs - set up word-chr list for word-boundary stuff, if needed * * The list is kept as a bunch of arcs between two dummy states; it's * disposed of by the unreachable-states sweep in NFA optimization. * Does NEXT().  Must not be called from any unusual lexical context. * This should be reconciled with the \w etc. handling in lex.c, and * should be cleaned up to reduce dependencies on input scanning. */static voidwordchrs(struct vars * v){	struct state *left;	struct state *right;	if (v->wordchrs != NULL)	{		NEXT();					/* for consistency */		return;	}	left = newstate(v->nfa);	right = newstate(v->nfa);	NOERR();	/* fine point:	implemented with [::], and lexer will set REG_ULOCALE */	lexword(v);	NEXT();	assert(v->savenow != NULL && SEE('['));	bracket(v, left, right);	assert((v->savenow != NULL && SEE(']')) || ISERR());	NEXT();	NOERR();	v->wordchrs = left;}/* * subre - allocate a subre */static struct subre *subre(struct vars * v,	  int op,	  int flags,	  struct state * begin,	  struct state * end){	struct subre *ret;	ret = v->treefree;	if (ret != NULL)		v->treefree = ret->left;	else	{		ret = (struct subre *) MALLOC(sizeof(struct subre));		if (ret == NULL)		{			ERR(REG_ESPACE);			return NULL;		}		ret->chain = v->treechain;		v->treechain = ret;	}	assert(strchr("|.b(=", op) != NULL);	ret->op = op;	ret->flags = flags;	ret->retry = 0;	ret->subno = 0;	ret->min = ret->max = 1;	ret->left = NULL;	ret->right = NULL;	ret->begin = begin;	ret->end = end;	ZAPCNFA(ret->cnfa);	return ret;}/* * freesubre - free a subRE subtree */static voidfreesubre(struct vars * v,		/* might be NULL */		  struct subre * sr){	if (sr == NULL)		return;	if (sr->left != NULL)		freesubre(v, sr->left);	if (sr->right != NULL)		freesubre(v, sr->right);	freesrnode(v, sr);}/* * freesrnode - free one node in a subRE subtree */static voidfreesrnode(struct vars * v,		/* might be NULL */		   struct subre * sr){	if (sr == NULL)		return;	if (!NULLCNFA(sr->cnfa))		freecnfa(&sr->cnfa);	sr->flags = 0;	if (v != NULL)	{		sr->left = v->treefree;		v->treefree = sr;	}	else		FREE(sr);}/* * optst - optimize a subRE subtree */static voidoptst(struct vars * v,	  struct subre * t){	if (t == NULL)		return;	/* recurse through children */	if (t->left != NULL)		optst(v, t->left);	if (t->right != NULL)		optst(v, t->right);}/* * numst - number tree nodes (assigning retry indexes) */static int						/* next number */numst(struct subre * t,	  int start)				/* starting point for subtree numbers */{	int			i;	assert(t != NULL);	i = start;	t->retry = (short) i++;	if (t->left != NULL)		i = numst(t->left, i);	if (t->right != NULL)		i = numst(t->right, i);	return i;}/* * markst - mark tree nodes as INUSE */static voidmarkst(struct subre * t){	assert(t != NULL);	t->flags |= INUSE;	if (t->left != NULL)		markst(t->left);	if (t->right != NULL)		markst(t->right);}/* * cleanst - free any tree nodes not marked INUSE */static voidcleanst(struct vars * v){	struct subre *t;	struct subre *next;	for (t = v->treechain; t != NULL; t = next)	{		next = t->chain;		if (!(t->flags & INUSE))			FREE(t);	}	v->treechain = NULL;	v->treefree = NULL;			/* just on general principles */}/* * nfatree - turn a subRE subtree into a tree of compacted NFAs */static long						/* optimize results from top node */nfatree(struct vars * v,		struct subre * t,		FILE *f)				/* for debug output */{	assert(t != NULL && t->begin != NULL);	if (t->left != NULL)		(DISCARD) nfatree(v, t->left, f);	if (t->right != NULL)		(DISCARD) nfatree(v, t->right, f);	return nfanode(v, t, f);}/* * nfanode - do one NFA for nfatree */static long						/* optimize results */nfanode(struct vars * v,		struct subre * t,		FILE *f)				/* for debug output */{	struct nfa *nfa;	long		ret = 0;	assert(t->begin != NULL);#ifdef REG_DEBUG	if (f != NULL)	{		char		idbuf[50];		fprintf(f, "\n\n\n========= TREE NODE %s ==========\n",				stid(t, idbuf, sizeof(idbuf)));	}#endif	nfa = newnfa(v, v->cm, v->nfa);	NOERRZ();	dupnfa(nfa, t->begin, t->end, nfa->init, nfa->final);	if (!ISERR())	{		specialcolors(nfa);		ret = optimize(nfa, f);	}	if (!ISERR())		compact(nfa, &t->cnfa);	freenfa(nfa);	return ret;}/* * newlacon - allocate a lookahead-constraint subRE */static int						/* lacon number */newlacon(struct vars * v,		 struct state * begin,		 struct state * end,		 int pos){	int			n;	struct subre *sub;	if (v->nlacons == 0)	{		v->lacons = (struct subre *) MALLOC(2 * sizeof(struct subre));		n = 1;					/* skip 0th */		v->nlacons = 2;	}	else	{		v->lacons = (struct subre *) REALLOC(v->lacons,									(v->nlacons + 1) * sizeof(struct subre));		n = v->nlacons++;	}	if (v->lacons == NULL)	{		ERR(REG_ESPACE);		return 0;	}	sub = &v->lacons[n];	sub->begin = begin;	sub->end = end;	sub->subno = pos;	ZAPCNFA(sub->cnfa);	return n;}/* * freelacons - free lookahead-constraint subRE vector */static voidfreelacons(struct subre * subs,		   int n){	struct subre *sub;	int			i;	assert(n > 0);	for (sub = subs + 1, i = n - 1; i > 0; sub++, i--)	/* no 0th */		if (!NULLCNFA(sub->cnfa))			freecnfa(&sub->cnfa);	FREE(subs);}/* * rfree - free a whole RE (insides of regfree) */static voidrfree(regex_t *re){	struct guts *g;	if (re == NULL || re->re_magic != REMAGIC)		return;	re->re_magic = 0;			/* invalidate RE */	g = (struct guts *) re->re_guts;	re->re_guts = NULL;	re->re_fns = NULL;	g->magic = 0;	freecm(&g->cmap);	if (g->tree != NULL)		freesubre((struct vars *) NULL, g->tree);	if (g->lacons != NULL)		freelacons(g->lacons, g->nlacons);	if (!NULLCNFA(g->search))		freecnfa(&g->search);	FREE(g);}#ifdef REG_DEBUG/* * dump - dump an RE in human-readable form */static voiddump(regex_t *re,	 FILE *f){	struct guts *g;	int			i;	if (re->re_magic != REMAGIC)		fprintf(f, "bad magic number (0x%x not 0x%x)\n", re->re_magic,				REMAGIC);	if (re->re_guts == NULL)	{		fprintf(f, "NULL guts!!!\n");		return;	}	g = (struct guts *) re->re_guts;	if (g->magic != GUTSMAGIC)		fprintf(f, "bad guts magic number (0x%x not 0x%x)\n", g->magic,				GUTSMAGIC);	fprintf(f, "\n\n\n========= DUMP ==========\n");	fprintf(f, "nsub %d, info 0%lo, csize %d, ntree %d\n",			(int) re->re_nsub, re->re_info, re->re_csize, g->ntree);	dumpcolors(&g->cmap, f);	if (!NULLCNFA(g->search))	{		printf("\nsearch:\n");		dumpcnfa(&g->search, f);	}	for (i = 1; i < g->nlacons; i++)	{		fprintf(f, "\nla%d (%s):\n", i,				(g->lacons[i].subno) ? "positive" : "negative");		dumpcnfa(&g->lacons[i].cnfa, f);	}	fprintf(f, "\n");	dumpst(g->tree, f, 0);}/* * dumpst - dump a subRE tree */static voiddumpst(struct subre * t,	   FILE *f,	   int nfapresent)			/* is the original NFA still around? */{	if (t == NULL)		fprintf(f, "null tree\n");	else		stdump(t, f, nfapresent);	fflush(f);}/* * stdump - recursive guts of dumpst */static voidstdump(struct subre * t,	   FILE *f,	   int nfapresent)			/* is the original NFA still around? */{	char		idbuf[50];	fprintf(f, "%s. `%c'", stid(t, idbuf, sizeof(idbuf)), t->op);	if (t->flags & LONGER)		fprintf(f, " longest");	if (t->flags & SHORTER)		fprintf(f, " shortest");	if (t->flags & MIXED)		fprintf(f, " hasmixed");	if (t->flags & CAP)		fprintf(f, " hascapture");	if (t->flags & BACKR)		fprintf(f, " hasbackref");	if (!(t->flags & INUSE))		fprintf(f, " UNUSED");	if (t->subno != 0)		fprintf(f, " (#%d)", t->subno);	if (t->min != 1 || t->max != 1)	{		fprintf(f, " {%d,", t->min);		if (t->max != INFINITY)			fprintf(f, "%d", t->max);		fprintf(f, "}");	}	if (nfapresent)		fprintf(f, " %ld-%ld", (long) t->begin->no, (long) t->end->no);	if (t->left != NULL)		fprintf(f, " L:%s", stid(t->left, idbuf, sizeof(idbuf)));	if (t->right != NULL)		fprintf(f, " R:%s", stid(t->right, idbuf, sizeof(idbuf)));	if (!NULLCNFA(t->cnfa))	{		fprintf(f, "\n");		dumpcnfa(&t->cnfa, f);		fprintf(f, "\n");	}	if (t->left != NULL)		stdump(t->left, f, nfapresent);	if (t->right != NULL)		stdump(t->right, f, nfapresent);}/* * stid - identify a subtree node for dumping */static char *					/* points to buf or constant string */stid(struct subre * t,	 char *buf,	 size_t bufsize){	/* big enough for hex int or decimal t->retry? */	if (bufsize < sizeof(void *) * 2 + 3 || bufsize < sizeof(t->retry) * 3 + 1)		return "unable";	if (t->retry != 0)		sprintf(buf, "%d", t->retry);	else		sprintf(buf, "%p", t);	return buf;}#endif   /* REG_DEBUG */#include "regc_lex.c"#include "regc_color.c"#include "regc_nfa.c"#include "regc_cvec.c"#include "regc_locale.c"

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -