📄 regcomp.c

📁 关系型数据库 Postgresql 6.5.2
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/*- * Copyright (c) 1992, 1993, 1994 Henry Spencer. * Copyright (c) 1992, 1993, 1994 *		The Regents of the University of California.  All rights reserved. * * This code is derived from software contributed to Berkeley by * Henry Spencer. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright *	  notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright *	  notice, this list of conditions and the following disclaimer in the *	  documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software *	  must display the following acknowledgement: *		This product includes software developed by the University of *		California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors *	  may be used to endorse or promote products derived from this software *	  without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED.	IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * *		@(#)regcomp.c	8.5 (Berkeley) 3/20/94 */#if defined(LIBC_SCCS) && !defined(lint)static char sccsid[] = "@(#)regcomp.c	8.5 (Berkeley) 3/20/94";#endif	 /* LIBC_SCCS and not lint */#include <sys/types.h>#include <stdio.h>#include <string.h>#include <ctype.h>#include <limits.h>#include <stdlib.h>#include <assert.h>#include <regex/regex.h>#include <regex/utils.h>#include <regex/regex2.h>#include <regex/cclass.h>#include <regex/cname.h>/* * parse structure, passed up and down to avoid global variables and * other clumsinesses */struct parse{	pg_wchar   *next;			/* next character in RE */	pg_wchar   *end;			/* end of string (-> NUL normally) */	int			error;			/* has an error been seen? */	sop		   *strip;			/* malloced strip */	sopno		ssize;			/* malloced strip size (allocated) */	sopno		slen;			/* malloced strip length (used) */	int			ncsalloc;		/* number of csets allocated */	struct re_guts *g;#define  NPAREN  10				/* we need to remember () 1-9 for back								 * refs */	sopno		pbegin[NPAREN]; /* -> ( ([0] unused) */	sopno		pend[NPAREN];	/* -> ) ([0] unused) */};/* ========= begin header generated by ./mkh ========= */#ifdef __cplusplusextern		"C"{#endif/* === regcomp.c === */	static void p_ere(struct parse * p, int stop);	static void p_ere_exp(struct parse * p);	static void p_str(struct parse * p);	static void p_bre(struct parse * p, int end1, int end2);	static int	p_simp_re(struct parse * p, int starordinary);	static int	p_count(struct parse * p);	static void p_bracket(struct parse * p);	static void p_b_term(struct parse * p, cset *cs);	static void p_b_cclass(struct parse * p, cset *cs);	static void p_b_eclass(struct parse * p, cset *cs);	static pg_wchar p_b_symbol(struct parse * p);	static char p_b_coll_elem(struct parse * p, int endc);#ifdef MULTIBYTE	static unsigned char othercase(int ch);#else	static char othercase(int ch);#endif	static void bothcases(struct parse * p, int ch);	static void ordinary(struct parse * p, int ch);	static void nonnewline(struct parse * p);	static void repeat(struct parse * p, sopno start, int from, int to);	static int	seterr(struct parse * p, int e);	static cset *allocset(struct parse * p);	static void freeset(struct parse * p, cset *cs);	static int	freezeset(struct parse * p, cset *cs);	static int	firstch(struct parse * p, cset *cs);	static int	nch(struct parse * p, cset *cs);	static void mcadd(struct parse * p, cset *cs, char *cp);	static void mcinvert(struct parse * p, cset *cs);	static void mccase(struct parse * p, cset *cs);	static int	isinsets(struct re_guts * g, int c);	static int	samesets(struct re_guts * g, int c1, int c2);	static void categorize(struct parse * p, struct re_guts * g);	static sopno dupl(struct parse * p, sopno start, sopno finish);	static void doemit(struct parse * p, sop op, size_t opnd);	static void doinsert(struct parse * p, sop op, size_t opnd, sopno pos);	static void dofwd(struct parse * p, sopno pos, sop value);	static void enlarge(struct parse * p, sopno size);	static void stripsnug(struct parse * p, struct re_guts * g);	static void findmust(struct parse * p, struct re_guts * g);	static sopno pluscount(struct parse * p, struct re_guts * g);	static int	pg_isdigit(int c);	static int	pg_isalpha(int c);	static int	pg_isupper(int c);	static int	pg_islower(int c);#ifdef __cplusplus}#endif/* ========= end header generated by ./mkh ========= */static pg_wchar nuls[10];		/* place to point scanner in event of								 * error *//* * macros for use with parse structure * BEWARE:	these know that the parse structure is named `p' !!! */#define PEEK()	(*p->next)#define PEEK2() (*(p->next+1))#define MORE()	(p->next < p->end)#define MORE2() (p->next+1 < p->end)#define SEE(c)	(MORE() && PEEK() == (c))#define SEETWO(a, b)	(MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))#define EAT(c)	((SEE(c)) ? (NEXT(), 1) : 0)#define EATTWO(a, b)	((SEETWO(a, b)) ? (NEXT2(), 1) : 0)#define NEXT()	(p->next++)#define NEXT2() (p->next += 2)#define NEXTn(n)		(p->next += (n))#define GETNEXT()		(*p->next++)#define SETERROR(e)		seterr(p, (e))#define REQUIRE(co, e)	if (!(co)) SETERROR(e)#define MUSTSEE(c, e)	REQUIRE(MORE() && PEEK() == (c), e)#define MUSTEAT(c, e)	REQUIRE(MORE() && GETNEXT() == (c), e)#define MUSTNOTSEE(c, e)		REQUIRE(!MORE() || PEEK() != (c), e)#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)#define AHEAD(pos)				dofwd(p, pos, HERE()-(pos))#define ASTERN(sop, pos)		EMIT(sop, HERE()-pos)#define HERE()			(p->slen)#define THERE()			(p->slen - 1)#define THERETHERE()	(p->slen - 2)#define DROP(n) (p->slen -= (n))#ifndef NDEBUGstatic int	never = 0;			/* for use in asserts; shuts lint up */#else#define never	0				/* some <assert.h>s have bugs too */#endif/* - regcomp - interface for parser and compilation = extern int regcomp(regex_t *, const char *, int); = #define		REG_BASIC		0000 = #define		REG_EXTENDED	0001 = #define		REG_ICASE		0002 = #define		REG_NOSUB		0004 = #define		REG_NEWLINE		0010 = #define		REG_NOSPEC		0020 = #define		REG_PEND		0040 = #define		REG_DUMP		0200 */int								/* 0 success, otherwise REG_something */pg95_regcomp(preg, pattern, cflags)regex_t    *preg;const char *pattern;int			cflags;{	struct parse pa;	struct re_guts *g;	struct parse *p = &pa;	int			i;	size_t		len;#ifdef MULTIBYTE	pg_wchar   *wcp;#endif#ifdef REDEBUG#define  GOODFLAGS(f)	 (f)#else#define  GOODFLAGS(f)	 ((f)&~REG_DUMP)#endif	cflags = GOODFLAGS(cflags);	if ((cflags & REG_EXTENDED) && (cflags & REG_NOSPEC))		return REG_INVARG;	if (cflags & REG_PEND)	{#ifdef MULTIBYTE		wcp = preg->patsave;		if (preg->re_endp < wcp)			return REG_INVARG;		len = preg->re_endp - wcp;#else		if (preg->re_endp < pattern)			return REG_INVARG;		len = preg->re_endp - pattern;#endif	}	else	{#ifdef MULTIBYTE		wcp = (pg_wchar *) malloc((strlen(pattern) + 1) * sizeof(pg_wchar));		if (wcp == NULL)			return REG_ESPACE;		preg->patsave = wcp;		(void) pg_mb2wchar((unsigned char *) pattern, wcp);		len = pg_wchar_strlen(wcp);#else		len = strlen((char *) pattern);#endif	}	/* do the mallocs early so failure handling is easy */	g = (struct re_guts *) malloc(sizeof(struct re_guts) +								  (NC - 1) * sizeof(cat_t));	if (g == NULL)		return REG_ESPACE;	p->ssize = len / (size_t) 2 *(size_t) 3 + (size_t) 1;		/* ugh */	p->strip = (sop *) malloc(p->ssize * sizeof(sop));	p->slen = 0;	if (p->strip == NULL)	{		free((char *) g);		return REG_ESPACE;	}	/* set things up */	p->g = g;#ifdef MULTIBYTE	p->next = wcp;#else	p->next = (pg_wchar *) pattern;		/* convenience; we do not modify										 * it */#endif	p->end = p->next + len;	p->error = 0;	p->ncsalloc = 0;	for (i = 0; i < NPAREN; i++)	{		p->pbegin[i] = 0;		p->pend[i] = 0;	}	g->csetsize = NC;	g->sets = NULL;	g->setbits = NULL;	g->ncsets = 0;	g->cflags = cflags;	g->iflags = 0;	g->nbol = 0;	g->neol = 0;	g->must = NULL;	g->mlen = 0;	g->nsub = 0;	g->ncategories = 1;			/* category 0 is "everything else" */	g->categories = &g->catspace[-(CHAR_MIN)];	memset((char *) g->catspace, 0, NC * sizeof(cat_t));	g->backrefs = 0;	/* do it */	EMIT(OEND, 0);	g->firststate = THERE();	if (cflags & REG_EXTENDED)		p_ere(p, OUT);	else if (cflags & REG_NOSPEC)		p_str(p);	else		p_bre(p, OUT, OUT);	EMIT(OEND, 0);	g->laststate = THERE();	/* tidy up loose ends and fill things in */	categorize(p, g);	stripsnug(p, g);	findmust(p, g);	g->nplus = pluscount(p, g);	g->magic = MAGIC2;	preg->re_nsub = g->nsub;	preg->re_g = g;	preg->re_magic = MAGIC1;#ifndef REDEBUG	/* not debugging, so can't rely on the assert() in regexec() */	if (g->iflags & BAD)		SETERROR(REG_ASSERT);#endif	/* win or lose, we're done */	if (p->error != 0)			/* lose */		pg95_regfree(preg);	return p->error;}/* - p_ere - ERE parser top level, concatenation and alternation == static void p_ere(struct parse *p, int stop); */static voidp_ere(p, stop)struct parse *p;int			stop;				/* character this ERE should end at */{	char		c;	sopno		prevback = 0;	sopno		prevfwd = 0;	sopno		conc;	int			first = 1;		/* is this the first alternative? */	for (;;)	{		/* do a bunch of concatenated expressions */		conc = HERE();		while (MORE() && (c = PEEK()) != '|' && c != stop)			p_ere_exp(p);		REQUIRE(HERE() != conc, REG_EMPTY);		/* require nonempty */		if (!EAT('|'))			break;				/* NOTE BREAK OUT */		if (first)		{			INSERT(OCH_, conc); /* offset is wrong */			prevfwd = conc;			prevback = conc;			first = 0;		}		ASTERN(OOR1, prevback);		prevback = THERE();		AHEAD(prevfwd);			/* fix previous offset */		prevfwd = HERE();		EMIT(OOR2, 0);			/* offset is very wrong */	}	if (!first)	{							/* tail-end fixups */		AHEAD(prevfwd);		ASTERN(O_CH, prevback);	}	assert(!MORE() || SEE(stop));}/* - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op == static void p_ere_exp(struct parse *p); */static voidp_ere_exp(p)struct parse *p;{	pg_wchar	c;	sopno		pos;	int			count;	int			count2;	sopno		subno;	int			wascaret = 0;	assert(MORE());				/* caller should have ensured this */	c = GETNEXT();	pos = HERE();	switch (c)	{		case '(':			REQUIRE(MORE(), REG_EPAREN);			p->g->nsub++;			subno = p->g->nsub;			if (subno < NPAREN)				p->pbegin[subno] = HERE();			EMIT(OLPAREN, subno);			if (!SEE(')'))				p_ere(p, ')');			if (subno < NPAREN)			{				p->pend[subno] = HERE();				assert(p->pend[subno] != 0);			}			EMIT(ORPAREN, subno);			MUSTEAT(')', REG_EPAREN);			break;#ifndef POSIX_MISTAKE		case ')':				/* happens only if no current unmatched ( */			/*			 * You may ask, why the ifndef?  Because I didn't notice this			 * until slightly too late for 1003.2, and none of the other			 * 1003.2 regular-expression reviewers noticed it at all.  So			 * an unmatched ) is legal POSIX, at least until we can get it			 * fixed.			 */			SETERROR(REG_EPAREN);			break;#endif		case '^':			EMIT(OBOL, 0);			p->g->iflags |= USEBOL;			p->g->nbol++;			wascaret = 1;			break;		case '$':			EMIT(OEOL, 0);			p->g->iflags |= USEEOL;			p->g->neol++;			break;		case '|':			SETERROR(REG_EMPTY);			break;		case '*':		case '+':		case '?':			SETERROR(REG_BADRPT);			break;		case '.':			if (p->g->cflags & REG_NEWLINE)				nonnewline(p);			else				EMIT(OANY, 0);			break;		case '[':			p_bracket(p);			break;		case '\\':			REQUIRE(MORE(), REG_EESCAPE);			c = GETNEXT();			ordinary(p, c);			break;		case '{':				/* okay as ordinary except if digit								 * follows */			REQUIRE(!MORE() || !pg_isdigit(PEEK()), REG_BADRPT);			/* FALLTHROUGH */		default:			ordinary(p, c);			break;	}	if (!MORE())		return;	c = PEEK();	/* we call { a repetition if followed by a digit */	if (!(c == '*' || c == '+' || c == '?' ||		  (c == '{' && MORE2() && pg_isdigit(PEEK2()))))		return;					/* no repetition, we're done */	NEXT();	REQUIRE(!wascaret, REG_BADRPT);	switch (c)	{		case '*':				/* implemented as +? */			/* this case does not require the (y|) trick, noKLUDGE */			INSERT(OPLUS_, pos);			ASTERN(O_PLUS, pos);			INSERT(OQUEST_, pos);			ASTERN(O_QUEST, pos);			break;		case '+':			INSERT(OPLUS_, pos);			ASTERN(O_PLUS, pos);			break;		case '?':			/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */			INSERT(OCH_, pos);	/* offset slightly wrong */			ASTERN(OOR1, pos);	/* this one's right */			AHEAD(pos);			/* fix the OCH_ */			EMIT(OOR2, 0);		/* offset very wrong... */			AHEAD(THERE());		/* ...so fix it */			ASTERN(O_CH, THERETHERE());			break;		case '{':			count = p_count(p);			if (EAT(','))			{				if (pg_isdigit(PEEK()))				{					count2 = p_count(p);					REQUIRE(count <= count2, REG_BADBR);				}				else/* single number with comma */					count2 = INFINITY;			}			else/* just a single number */				count2 = count;			repeat(p, pos, count, count2);			if (!EAT('}'))			{					/* error heuristics */				while (MORE() && PEEK() != '}')					NEXT();				REQUIRE(MORE(), REG_EBRACE);				SETERROR(REG_BADBR);			}			break;	}	if (!MORE())		return;	c = PEEK();	if (!(c == '*' || c == '+' || c == '?' ||		  (c == '{' && MORE2() && pg_isdigit(PEEK2()))))		return;	SETERROR(REG_BADRPT);}/* - p_str - string (no metacharacters) "parser" == static void p_str(struct parse *p); */static voidp_str(p)struct parse *p;{	REQUIRE(MORE(), REG_EMPTY);	while (MORE())		ordinary(p, GETNEXT());}/* - p_bre - BRE parser top level, anchoring and concatenation == static void p_bre(struct parse *p, int end1, \ ==		int end2); * Giving end1 as OUT essentially eliminates the end1/end2 check. * * This implementation is a bit of a kludge, in that a trailing $ is first * taken as an ordinary character and then revised to be an anchor.  The * only undesirable side effect is that '$' gets included as a character * category in such cases.	This is fairly harmless; not worth fixing. * The amount of lookahead needed to avoid this kludge is excessive. */static voidp_bre(p, end1, end2)struct parse *p;int			end1;				/* first terminating character */int			end2;				/* second terminating character */{	sopno		start = HERE();	int			first = 1;		/* first subexpression? */	int			wasdollar = 0;	if (EAT('^'))	{		EMIT(OBOL, 0);		p->g->iflags |= USEBOL;		p->g->nbol++;	}	while (MORE() && !SEETWO(end1, end2))	{		wasdollar = p_simp_re(p, first);		first = 0;	}	if (wasdollar)	{							/* oops, that was a trailing anchor */		DROP(1);		EMIT(OEOL, 0);		p->g->iflags |= USEEOL;		p->g->neol++;	}	REQUIRE(HERE() != start, REG_EMPTY);		/* require nonempty */}/* - p_simp_re - parse a simple RE, an atom possibly followed by a repetition == static int p_simp_re(struct parse *p, int starordinary); */static int						/* was the simple RE an unbackslashed $? */p_simp_re(p, starordinary)struct parse *p;int			starordinary;		/* is a leading * an ordinary character? */{	int			c;	int			count;	int			count2;	sopno		pos;	int			i;	sopno		subno;#define  BACKSL  (1<<24)	pos = HERE();				/* repetion op, if any, covers from here */	assert(MORE());				/* caller should have ensured this */	c = GETNEXT();	if (c == '\\')	{		REQUIRE(MORE(), REG_EESCAPE);#ifdef MULTIBYTE		c = BACKSL | (pg_wchar) GETNEXT();#else		c = BACKSL | (unsigned char) GETNEXT();#endif	}	switch (c)	{		case '.':			if (p->g->cflags & REG_NEWLINE)				nonnewline(p);			else				EMIT(OANY, 0);			break;		case '[':			p_bracket(p);			break;		case BACKSL | '{':			SETERROR(REG_BADRPT);			break;		case BACKSL | '(':			p->g->nsub++;			subno = p->g->nsub;			if (subno < NPAREN)
12 3 下一页
💿 文件大小 6496 K
👤 上传用户 huajunun
📂 所属分类数据库系统
🏷️ 相关标签

#Postgresql #数据库
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -