📄 regex.c

📁 sock5代理服务器源代码
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
			if (*p == '+')				for (sp = mp; lp < sp; lp++)					store(*lp);			store(END);			store(END);			sp = mp;			while (--mp > lp)				*mp = mp[-1];			store(CLO);			mp = sp;			break;		case '\\':              /* tags, backrefs .. */			switch(*++p) {			case '(':				if (tagc < MAXTAG) {					tagstk[++tagi] = tagc;					store(BOT);					store(tagc++);				}				else					badpat("Too many \\(\\) pairs");				break;			case ')':				if (*sp == BOT)					badpat("Null pattern inside \\(\\)");				if (tagi > 0) {					store(EOT);					store(tagstk[tagi--]);				}				else					badpat("Unmatched \\)");				break;			case '<':				store(BOW);				break;			case '>':				if (*sp == BOW)					badpat("Null pattern inside \\<\\>");				store(EOW);				break;			case '1':			case '2':			case '3':			case '4':			case '5':			case '6':			case '7':			case '8':			case '9':				n = *p-'0';				if (tagi > 0 && tagstk[tagi] == n)					badpat("Cyclical reference");				if (tagc > n) {					store(REF);					store(n);				}				else					badpat("Undetermined reference");				break;#ifdef EXTEND			case 'b':				store(CHR);				store('\b');				break;			case 'n':				store(CHR);				store('\n');				break;			case 'f':				store(CHR);				store('\f');				break;			case 'r':				store(CHR);				store('\r');				break;			case 't':				store(CHR);				store('\t');				break;#endif			default:				store(CHR);				store(*p);			}			break;		default :               /* an ordinary char  */			store(CHR);			store(*p);			break;		}		sp = lp;	}	if (tagi > 0)		badpat("Unmatched \\(");	store(END);	sta = OKP;	return 0;}static const char *bol;static const char *bopat[MAXTAG];static const char *eopat[MAXTAG];static const char *pmatch P((const char *, CHAR *));/* * re_exec: * 	execute nfa to find a match. * *	special cases: (nfa[0])	 *		BOL *			Match only once, starting from the *			beginning. *		CHR *			First locate the character without *			calling pmatch, and if found, call *			pmatch for the remaining string. *		END *			re_comp failed, poor luser did not *			check for it. Fail fast. * *	If a match is found, bopat[0] and eopat[0] are set *	to the beginning and the end of the matched fragment, *	respectively. * */intre_exec(lp)register const char *lp;{	register char c;	register const char *ep = 0;	register CHAR *ap = nfa;	bol = lp;	bopat[0] = 0;	bopat[1] = 0;	bopat[2] = 0;	bopat[3] = 0;	bopat[4] = 0;	bopat[5] = 0;	bopat[6] = 0;	bopat[7] = 0;	bopat[8] = 0;	bopat[9] = 0;	switch(*ap) {	case BOL:			/* anchored: match from BOL only */		ep = pmatch(lp,ap);		break;	case CHR:			/* ordinary char: locate it fast */		c = *(ap+1);		while (*lp && *lp != c)			lp++;		if (!*lp)		/* if EOS, fail, else fall thru. */			return 0;	default:			/* regular matching all the way. */		while (*lp) {			if ((ep = pmatch(lp,ap)))				break;			lp++;		}		break;	case END:			/* munged automaton. fail always */		return 0;	}	if (!ep)		return 0;	if (internal_error)		return -1;	bopat[0] = lp;	eopat[0] = ep;	return 1;}/*  * pmatch:  *	internal routine for the hard part * * 	This code is mostly snarfed from an early * 	grep written by David Conroy. The backref and * 	tag stuff, and various other mods are by oZ. * *	special cases: (nfa[n], nfa[n+1]) *		CLO ANY *			We KNOW ".*" will match ANYTHING *			upto the end of line. Thus, go to *			the end of line straight, without *			calling pmatch recursively. As in *			the other closure cases, the remaining *			pattern must be matched by moving *			backwards on the string recursively, *			to find a match for xy (x is ".*" and  *			y is the remaining pattern) where *			the match satisfies the LONGEST match *			for x followed by a match for y. *		CLO CHR *			We can again scan the string forward *			for the single char without recursion,  *			and at the point of failure, we execute  *			the remaining nfa recursively, as *			described above. * *	At the end of a successful match, bopat[n] and eopat[n] *	are set to the beginning and end of subpatterns matched *	by tagged expressions (n = 1 to 9).	 * *//* * character classification table for word boundary * operators BOW and EOW. the reason for not using  * ctype macros is that we can let the user add into  * our own table. see re_modw. This table is not in * the bitset form, since we may wish to extend it * in the future for other character classifications.  * *	TRUE for 0-9 A-Z a-z _ */static char chrtyp[MAXCHR] = {	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 	0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 	1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 	0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 	1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 	1, 1, 1, 0, 0, 0, 0, 0	};#define inascii(x)	(0177&(x))#define iswordc(x) 	chrtyp[inascii(x)]#define isinset(x,y) 	((x)[((y)&BLKIND)>>3] & bitarr[(y)&BITIND])/* * skip values for CLO XXX to skip past the closure * */#define ANYSKIP	2 	/* [CLO] ANY END ...	     */#define CHRSKIP	3	/* [CLO] CHR chr END ...     */#define CCLSKIP 18	/* [CLO] CCL 16bytes END ... */static const char *pmatch(lp, ap)register const char *lp;register CHAR *ap;{	register int op, c, n;	register const char *e;		/* extra pointer for CLO */	register const char *bp;	/* beginning of subpat.. */	register const char *ep;	/* ending of subpat..	 */	const char *are;		/* to save the line ptr. */	while ((op = *ap++) != END)		switch(op) {		case CHR:			if (*lp++ != *ap++)				return 0;			break;		case ANY:			if (!*lp++)				return 0;			break;		case CCL:			c = *lp++;			if (!isinset(ap,c))				return 0;			ap += BITBLK;			break;		case BOL:			if (lp != bol)				return 0;			break;		case EOL:			if (*lp)				return 0;			break;		case BOT:			bopat[(int)(*ap++)] = lp;			break;		case EOT:			eopat[(int)(*ap++)] = lp;			break; 		case BOW:			if ((lp!=bol && iswordc(lp[-1])) || !iswordc(*lp))				return 0;			break;		case EOW:			if (lp==bol || !iswordc(lp[-1]) || iswordc(*lp))				return 0;			break;		case REF:			n = *ap++;			bp = bopat[n];			ep = eopat[n];			while (bp < ep)				if (*bp++ != *lp++)					return 0;			break;		case CLO:			are = lp;			switch(*ap) {			case ANY:				while (*lp)					lp++;				n = ANYSKIP;				break;			case CHR:				c = *(ap+1);				while (*lp && c == *lp)					lp++;				n = CHRSKIP;				break;			case CCL:				while ((c = *lp) && isinset(ap+1,c))					lp++;				n = CCLSKIP;				break;			default:				internal_error++;				return 0;			}			ap += n;			while (lp >= are) {			    if ((e = pmatch(lp, ap)))					return e;				--lp;			}			return 0;		default:			internal_error++;			return 0;		}	return lp;}#endif /* Need regex libraries? Compile to nothing if not.  */
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -