⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regcomp.c

📁 从一个开源软件中摘取的正则表达式模块
💻 C
📖 第 1 页 / 共 4 页
字号:
/* * re_*comp and friends - compile REs * This file #includes several others (see the bottom). * * Copyright (c) 1998, 1999 Henry Spencer.	All rights reserved. * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results.  The author * thanks all of them. * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $PostgreSQL: pgsql/src/backend/regex/regcomp.c,v 1.45 2007/10/06 16:05:54 tgl Exp $ * */#include "regguts.h"/* * forward declarations, up here so forward datatypes etc. are defined early *//* === regcomp.c === */static void moresubs(struct vars *, int);static int	freev(struct vars *, int);static void makesearch(struct vars *, struct nfa *);static struct subre *parse(struct vars *, int, int, struct state *, struct state *);static struct subre *parsebranch(struct vars *, int, int, struct state *, struct state *, int);static void parseqatom(struct vars *, int, int, struct state *, struct state *, struct subre *);static void nonword(struct vars *, int, struct state *, struct state *);static void word(struct vars *, int, struct state *, struct state *);static int	scannum(struct vars *);static void repeat(struct vars *, struct state *, struct state *, int, int);static void bracket(struct vars *, struct state *, struct state *);static void cbracket(struct vars *, struct state *, struct state *);static void brackpart(struct vars *, struct state *, struct state *);static chr *scanplain(struct vars *);static void leaders(struct vars *, struct cvec *);static void onechr(struct vars *, chr, struct state *, struct state *);static void dovec(struct vars *, struct cvec *, struct state *, struct state *);static celt nextleader(struct vars *, chr, chr);static void wordchrs(struct vars *);static struct subre *subre(struct vars *, int, int, struct state *, struct state *);static void freesubre(struct vars *, struct subre *);static void freesrnode(struct vars *, struct subre *);static void optst(struct vars *, struct subre *);static int	numst(struct subre *, int);static void markst(struct subre *);static void cleanst(struct vars *);static long nfatree(struct vars *, struct subre *, FILE *);static long nfanode(struct vars *, struct subre *, FILE *);static int	newlacon(struct vars *, struct state *, struct state *, int);static void freelacons(struct subre *, int);static void rfree(regex_t *);#ifdef REG_DEBUGstatic void dump(regex_t *, FILE *);static void dumpst(struct subre *, FILE *, int);static void stdump(struct subre *, FILE *, int);static char *stid(struct subre *, char *, size_t);#endif/* === regc_lex.c === */static void lexstart(struct vars *);static void prefixes(struct vars *);static void lexnest(struct vars *, chr *, chr *);static void lexword(struct vars *);static int	next(struct vars *);static int	lexescape(struct vars *);static chr	lexdigits(struct vars *, int, int, int);static int	brenext(struct vars *, chr);static void skip(struct vars *);static chr	newline(void);static chr	chrnamed(struct vars *, chr *, chr *, chr);/* === regc_color.c === */static void initcm(struct vars *, struct colormap *);static void freecm(struct colormap *);static void cmtreefree(struct colormap *, union tree *, int);static color setcolor(struct colormap *, chr, pcolor);static color maxcolor(struct colormap *);static color newcolor(struct colormap *);static void freecolor(struct colormap *, pcolor);static color pseudocolor(struct colormap *);static color subcolor(struct colormap *, chr c);static color newsub(struct colormap *, pcolor);static void subrange(struct vars *, chr, chr, struct state *, struct state *);static void subblock(struct vars *, chr, struct state *, struct state *);static void okcolors(struct nfa *, struct colormap *);static void colorchain(struct colormap *, struct arc *);static void uncolorchain(struct colormap *, struct arc *);static int	singleton(struct colormap *, chr c);static void rainbow(struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *);static void colorcomplement(struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *);#ifdef REG_DEBUGstatic void dumpcolors(struct colormap *, FILE *);static void fillcheck(struct colormap *, union tree *, int, FILE *);static void dumpchr(chr, FILE *);#endif/* === regc_nfa.c === */static struct nfa *newnfa(struct vars *, struct colormap *, struct nfa *);static void freenfa(struct nfa *);static struct state *newstate(struct nfa *);static struct state *newfstate(struct nfa *, int flag);static void dropstate(struct nfa *, struct state *);static void freestate(struct nfa *, struct state *);static void destroystate(struct nfa *, struct state *);static void newarc(struct nfa *, int, pcolor, struct state *, struct state *);static struct arc *allocarc(struct nfa *, struct state *);static void freearc(struct nfa *, struct arc *);static struct arc *findarc(struct state *, int, pcolor);static void cparc(struct nfa *, struct arc *, struct state *, struct state *);static void moveins(struct nfa *, struct state *, struct state *);static void copyins(struct nfa *, struct state *, struct state *);static void moveouts(struct nfa *, struct state *, struct state *);static void copyouts(struct nfa *, struct state *, struct state *);static void cloneouts(struct nfa *, struct state *, struct state *, struct state *, int);static void delsub(struct nfa *, struct state *, struct state *);static void deltraverse(struct nfa *, struct state *, struct state *);static void dupnfa(struct nfa *, struct state *, struct state *, struct state *, struct state *);static void duptraverse(struct nfa *, struct state *, struct state *);static void cleartraverse(struct nfa *, struct state *);static void specialcolors(struct nfa *);static long optimize(struct nfa *, FILE *);static void pullback(struct nfa *, FILE *);static int	pull(struct nfa *, struct arc *);static void pushfwd(struct nfa *, FILE *);static int	push(struct nfa *, struct arc *);#define INCOMPATIBLE	1		/* destroys arc */#define SATISFIED	2			/* constraint satisfied */#define COMPATIBLE	3			/* compatible but not satisfied yet */static int	combine(struct arc *, struct arc *);static void fixempties(struct nfa *, FILE *);static int	unempty(struct nfa *, struct arc *);static void cleanup(struct nfa *);static void markreachable(struct nfa *, struct state *, struct state *, struct state *);static void markcanreach(struct nfa *, struct state *, struct state *, struct state *);static long analyze(struct nfa *);static void compact(struct nfa *, struct cnfa *);static void carcsort(struct carc *, struct carc *);static void freecnfa(struct cnfa *);static void dumpnfa(struct nfa *, FILE *);#ifdef REG_DEBUGstatic void dumpstate(struct state *, FILE *);static void dumparcs(struct state *, FILE *);static int	dumprarcs(struct arc *, struct state *, FILE *, int);static void dumparc(struct arc *, struct state *, FILE *);static void dumpcnfa(struct cnfa *, FILE *);static void dumpcstate(int, struct carc *, struct cnfa *, FILE *);#endif/* === regc_cvec.c === */static struct cvec *newcvec(int, int, int);static struct cvec *clearcvec(struct cvec *);static void addchr(struct cvec *, chr);static void addrange(struct cvec *, chr, chr);static void addmcce(struct cvec *, chr *, chr *);static int	haschr(struct cvec *, chr);static struct cvec *getcvec(struct vars *, int, int, int);static void freecvec(struct cvec *);/* === regc_locale.c === */static int	pg_wc_isdigit(pg_wchar c);static int	pg_wc_isalpha(pg_wchar c);static int	pg_wc_isalnum(pg_wchar c);static int	pg_wc_isupper(pg_wchar c);static int	pg_wc_islower(pg_wchar c);static int	pg_wc_isgraph(pg_wchar c);static int	pg_wc_isprint(pg_wchar c);static int	pg_wc_ispunct(pg_wchar c);static int	pg_wc_isspace(pg_wchar c);static pg_wchar pg_wc_toupper(pg_wchar c);static pg_wchar pg_wc_tolower(pg_wchar c);static int	nmcces(struct vars *);static int	nleaders(struct vars *);static struct cvec *allmcces(struct vars *, struct cvec *);static celt element(struct vars *, chr *, chr *);static struct cvec *range(struct vars *, celt, celt, int);static int	before(celt, celt);static struct cvec *eclass(struct vars *, celt, int);static struct cvec *cclass(struct vars *, chr *, chr *, int);static struct cvec *allcases(struct vars *, chr);static int	cmp(const chr *, const chr *, size_t);static int	casecmp(const chr *, const chr *, size_t);/* internal variables, bundled for easy passing around */struct vars{	regex_t    *re;	chr		   *now;			/* scan pointer into string 指向扫描的字符的指针*/	chr		   *stop;			/* end of string 字符串结尾*/	chr		   *savenow;			/* saved now and stop for "subroutine call" */	chr		   *savestop;	int			err;			/* error code (0 if none) */	int			cflags;			/* copy of compile flags */	int			lasttype;		/* type of previous token */	int			nexttype;		/* type of next token */	chr			nextvalue;		/* value (if any) of next token */	int			lexcon;			/* lexical context type (see lex.c) */	int			nsubexp;		/* subexpression count */	struct subre **subs;		/* subRE pointer vector */	size_t		nsubs;			/* length of vector */	struct subre *sub10[10];	/* initial vector, enough for most */	struct nfa *nfa;			/* the NFA */	struct colormap *cm;		/* character color map */	color		nlcolor;		/* color of newline */	struct state *wordchrs;		/* state in nfa holding word-char outarcs */	struct subre *tree;			/* subexpression tree */	struct subre *treechain;	/* all tree nodes allocated */	struct subre *treefree;		/* any free tree nodes */	int			ntree;			/* number of tree nodes */	struct cvec *cv;			/* interface cvec */	struct cvec *cv2;			/* utility cvec */	struct cvec *mcces;			/* collating-element information */#define  ISCELEADER(v,c) ((v)->mcces != NULL && haschr((v)->mcces, (c)))	struct state *mccepbegin;	/* in nfa, start of MCCE prototypes */	struct state *mccepend;		/* in nfa, end of MCCE prototypes */	struct subre *lacons;		/* lookahead-constraint vector */	int			nlacons;		/* size of lacons */};/* parsing macros; most know that `v' is the struct vars pointer */#define NEXT()	(next(v))		/* advance by one token */#define SEE(t)	(v->nexttype == (t))	/* is next token this? */#define EAT(t)	(SEE(t) && next(v))		/* if next is this, swallow it */#define VISERR(vv)	((vv)->err != 0)	/* have we seen an error yet? */#define ISERR() VISERR(v)#define VERR(vv,e)	((vv)->nexttype = EOS, ((vv)->err) ? (vv)->err :\							((vv)->err = (e)))#define ERR(e)	VERR(v, e)		/* record an error */#define NOERR() {if (ISERR()) return;}	/* if error seen, return */#define NOERRN()	{if (ISERR()) return NULL;} /* NOERR with retval */#define NOERRZ()	{if (ISERR()) return 0;}	/* NOERR with retval */#define INSIST(c, e)	((c) ? 0 : ERR(e))		/* if condition false, error */#define NOTE(b) (v->re->re_info |= (b)) /* note visible condition */#define EMPTYARC(x, y)	newarc(v->nfa, EMPTY, 0, x, y)/* token type codes, some also used as NFA arc types */#define EMPTY	'n'				/* no token present */#define EOS 'e'					/* end of string */#define PLAIN	'p'				/* ordinary character */#define DIGIT	'd'				/* digit (in bound) */#define BACKREF 'b'				/* back reference */#define COLLEL	'I'				/* start of [. */#define ECLASS	'E'				/* start of [= */#define CCLASS	'C'				/* start of [: */#define END 'X'					/* end of [. [= [: */#define RANGE	'R'				/* - within [] which might be range delim. */#define LACON	'L'				/* lookahead constraint subRE */#define AHEAD	'a'				/* color-lookahead arc */#define BEHIND	'r'				/* color-lookbehind arc */#define WBDRY	'w'				/* word boundary constraint */#define NWBDRY	'W'				/* non-word-boundary constraint */#define SBEGIN	'A'				/* beginning of string (even if not BOL) */#define SEND	'Z'				/* end of string (even if not EOL) */#define PREFER	'P'				/* length preference *//* is an arc colored, and hence on a color chain? */#define COLORED(a)	((a)->type == PLAIN || (a)->type == AHEAD || \							(a)->type == BEHIND)/* static function list */static struct fns functions = {	rfree,						/* regfree insides */};/* * pg_regcomp - compile regular expression */intpg_regcomp(regex_t *re,		   const chr *string,		   size_t len,		   int flags){	struct vars var;	struct vars *v = &var;	struct guts *g;	int			i;	size_t		j;#ifdef REG_DEBUG	FILE	   *debug = (flags & REG_PROGRESS) ? stdout : (FILE *) NULL;#else	FILE	   *debug = (FILE *) NULL;#endif#define  CNOERR()	 { if (ISERR()) return freev(v, v->err); }	/* sanity checks */	if (re == NULL || string == NULL)		return REG_INVARG;	if ((flags & REG_QUOTE) &&		(flags & (REG_ADVANCED | REG_EXPANDED | REG_NEWLINE)))		return REG_INVARG;	if (!(flags & REG_EXTENDED) && (flags & REG_ADVF))		return REG_INVARG;	/* initial setup (after which freev() is callable) */	v->re = re;	v->now = (chr *) string;	v->stop = v->now + len;	v->savenow = v->savestop = NULL;	v->err = 0;	v->cflags = flags;	v->nsubexp = 0;	v->subs = v->sub10;	v->nsubs = 10;	for (j = 0; j < v->nsubs; j++)		v->subs[j] = NULL;	v->nfa = NULL;	v->cm = NULL;	v->nlcolor = COLORLESS;	v->wordchrs = NULL;	v->tree = NULL;	v->treechain = NULL;	v->treefree = NULL;	v->cv = NULL;	v->cv2 = NULL;	v->mcces = NULL;	v->lacons = NULL;	v->nlacons = 0;	re->re_magic = REMAGIC;	re->re_info = 0;			/* bits get set during parse */	re->re_csize = sizeof(chr);	re->re_guts = NULL;	re->re_fns = VS(&functions);	/* more complex setup, malloced things */	re->re_guts = VS(MALLOC(sizeof(struct guts)));	if (re->re_guts == NULL)		return freev(v, REG_ESPACE);	g = (struct guts *) re->re_guts;	g->tree = NULL;	initcm(v, &g->cmap);	v->cm = &g->cmap;	g->lacons = NULL;	g->nlacons = 0;	ZAPCNFA(g->search);	v->nfa = newnfa(v, v->cm, (struct nfa *) NULL);	CNOERR();	v->cv = newcvec(100, 20, 10);	if (v->cv == NULL)		return freev(v, REG_ESPACE);	i = nmcces(v);	if (i > 0)	{		v->mcces = newcvec(nleaders(v), 0, i);		CNOERR();		v->mcces = allmcces(v, v->mcces);		leaders(v, v->mcces);		addmcce(v->mcces, (chr *) NULL, (chr *) NULL);	/* dummy */	}	CNOERR();	/* parsing */	lexstart(v);				/* also handles prefixes */	if ((v->cflags & REG_NLSTOP) || (v->cflags & REG_NLANCH))	{		/* assign newline a unique color */		v->nlcolor = subcolor(v->cm, newline());		okcolors(v->nfa, v->cm);	}	CNOERR();	v->tree = parse(v, EOS, PLAIN, v->nfa->init, v->nfa->final);	assert(SEE(EOS));			/* even if error; ISERR() => SEE(EOS) */	CNOERR();	assert(v->tree != NULL);	/* finish setup of nfa and its subre tree */	specialcolors(v->nfa);	CNOERR();#ifdef REG_DEBUG	if (debug != NULL)	{		fprintf(debug, "\n\n\n========= RAW ==========\n");		dumpnfa(v->nfa, debug);		dumpst(v->tree, debug, 1);	}#endif	optst(v, v->tree);	v->ntree = numst(v->tree, 1);	markst(v->tree);	cleanst(v);#ifdef REG_DEBUG	if (debug != NULL)	{		fprintf(debug, "\n\n\n========= TREE FIXED ==========\n");		dumpst(v->tree, debug, 1);	}#endif	/* build compacted NFAs for tree and lacons */	re->re_info |= nfatree(v, v->tree, debug);	CNOERR();	assert(v->nlacons == 0 || v->lacons != NULL);	for (i = 1; i < v->nlacons; i++)	{#ifdef REG_DEBUG		if (debug != NULL)			fprintf(debug, "\n\n\n========= LA%d ==========\n", i);#endif		nfanode(v, &v->lacons[i], debug);	}	CNOERR();	if (v->tree->flags & SHORTER)		NOTE(REG_USHORTEST);	/* build compacted NFAs for tree, lacons, fast search */#ifdef REG_DEBUG	if (debug != NULL)		fprintf(debug, "\n\n\n========= SEARCH ==========\n");#endif	/* can sacrifice main NFA now, so use it as work area */	(DISCARD) optimize(v->nfa, debug);	CNOERR();	makesearch(v, v->nfa);	CNOERR();	compact(v->nfa, &g->search);	CNOERR();	/* looks okay, package it up */	re->re_nsub = v->nsubexp;	v->re = NULL;				/* freev no longer frees re */	g->magic = GUTSMAGIC;	g->cflags = v->cflags;	g->info = re->re_info;	g->nsub = re->re_nsub;	g->tree = v->tree;	v->tree = NULL;	g->ntree = v->ntree;	g->compare = (v->cflags & REG_ICASE) ? casecmp : cmp;	g->lacons = v->lacons;	v->lacons = NULL;	g->nlacons = v->nlacons;#ifdef REG_DEBUG	if (flags & REG_DUMP)		dump(re, stdout);#endif	assert(v->err == 0);	return freev(v, 0);}/* * moresubs - enlarge subRE vector */static voidmoresubs(struct vars * v,		 int wanted)			/* want enough room for this one */{	struct subre **p;	size_t		n;	assert(wanted > 0 && (size_t) wanted >= v->nsubs);	n = (size_t) wanted *3 / 2 + 1;	if (v->subs == v->sub10)	{		p = (struct subre **) MALLOC(n * sizeof(struct subre *));		if (p != NULL)			memcpy(VS(p), VS(v->subs),				   v->nsubs * sizeof(struct subre *));	}	else		p = (struct subre **) REALLOC(v->subs, n * sizeof(struct subre *));	if (p == NULL)	{		ERR(REG_ESPACE);		return;	}	v->subs = p;	for (p = &v->subs[v->nsubs]; v->nsubs < n; p++, v->nsubs++)		*p = NULL;	assert(v->nsubs == n);	assert((size_t) wanted < v->nsubs);}/* * freev - free vars struct's substructures where necessary * * Optionally does error-number setting, and always returns error code * (if any), to make error-handling code terser. */static intfreev(struct vars * v,	  int err){	if (v->re != NULL)		rfree(v->re);	if (v->subs != v->sub10)		FREE(v->subs);	if (v->nfa != NULL)		freenfa(v->nfa);	if (v->tree != NULL)		freesubre(v, v->tree);	if (v->treechain != NULL)		cleanst(v);	if (v->cv != NULL)		freecvec(v->cv);	if (v->cv2 != NULL)		freecvec(v->cv2);	if (v->mcces != NULL)		freecvec(v->mcces);	if (v->lacons != NULL)		freelacons(v->lacons, v->nlacons);	ERR(err);					/* nop if err==0 */	return v->err;}/* * makesearch - turn an NFA into a search NFA (implicit prepend of .*?) * NFA must have been optimize()d already. */static voidmakesearch(struct vars * v,		   struct nfa * nfa){	struct arc *a;	struct arc *b;	struct state *pre = nfa->pre;	struct state *s;	struct state *s2;	struct state *slist;	/* no loops are needed if it's anchored */	for (a = pre->outs; a != NULL; a = a->outchain)	{		assert(a->type == PLAIN);		if (a->co != nfa->bos[0] && a->co != nfa->bos[1])			break;	}	if (a != NULL)	{		/* add implicit .* in front */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -