⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regex.c

📁 sock5代理服务器源代码
💻 C
📖 第 1 页 / 共 2 页
字号:
#include <pmachine.h>#ifndef HAVE_RE_COMP/* * These routines are BSD regex(3)/ed(1) compatible regular-expression * routines written by Ozan S. Yigit, Computer Science, York University. * Parts of the code that are not needed by Prospero have been removed, * but most of the accompanying information has been left intact.  * This file is to be included on those operating systems that do not * support re_comp and re_exec. *//* * regex - Regular expression pattern matching *         and replacement * * by:  Ozan S. Yigit (oz@nexus.yorku.ca) *	Dept. of Computing Services *      York University * * These routines are the PUBLIC DOMAIN equivalents  * of regex routines as found in 4.nBSD UN*X, with minor * extensions. * * Modification history: * * $Log: regex.c,v $ * Revision 1.4  1996/04/25 20:06:29  blob * Added const's so that it will compile on OSs that have const in the prototype in * unistd.h * * Revision 1.3  1996/04/11  06:52:27  blob * *** empty log message *** * * Revision 1.2  1996/04/11  06:51:34  blob * Cleaned up warnings... * * Revision 1.1.1.2  1996/03/14  22:06:31  blob * Try 1000000... * * Revision 1.1.1.1  1996/03/13  20:34:40  blob * Initial Socks5 Beta import. * * Revision 1.1  1991/11/20  02:32:13  brendan * entered into RCS * * Revision 1.1  1991/11/20  02:32:13  brendan * entered into RCS * * Revision 1.3  89/04/01  14:18:09  oz * Change all references to a dfa: this is actually an nfa. *  * Revision 1.2  88/08/28  15:36:04  oz * Use a complement bitmap to represent NCL. * This removes the need to have seperate  * code in the pmatch case block - it is  * just CCL code now. *  * Use the actual CCL code in the CLO * section of pmatch. No need for a recursive * pmatch call. *  * Use a bitmap table to set char bits in an * 8-bit chunk. *  * Routines: *      re_comp:        compile a regular expression into *                      a NFA. * *			char *re_comp(s) *			char *s; * *      re_exec:        execute the NFA to match a pattern. * *			int re_exec(s) *			char *s; * * Regular Expressions: * *      [1]     char    matches itself, unless it is a special *                      character (metachar): . \ [ ] * + ^ $ * *      [2]     .       matches any character. * *      [3]     \       matches the character following it, except *			when followed by a left or right round bracket, *			a digit 1 to 9 or a left or right angle bracket.  *			(see [7], [8] and [9]) *			It is used as an escape character for all  *			other meta-characters, and itself. When used *			in a set ([4]), it is treated as an ordinary *			character. * *      [4]     [set]   matches one of the characters in the set. *                      If the first character in the set is "^", *                      it matches a character NOT in the set, i.e.  *			complements the set. A shorthand S-E is  *			used to specify a set of characters S upto  *			E, inclusive. The special characters "]" and  *			"-" have no special meaning if they appear  *			as the first chars in the set. *                      examples:        match: * *                              [a-z]    any lowercase alpha * *                              [^]-]    any char except ] and - * *                              [^A-Z]   any char except uppercase *                                       alpha * *                              [a-zA-Z] any alpha * *      [5]     *       any regular expression form [1] to [4], followed by *                      closure char (*) matches zero or more matches of *                      that form. * *      [6]     +       same as [5], except it matches one or more. * *      [7]             a regular expression in the form [1] to [10], enclosed *                      as \(form\) matches what form matches. The enclosure *                      creates a set of tags, used for [8] and for *                      pattern substution. The tagged forms are numbered *			starting from 1. * *      [8]             a \ followed by a digit 1 to 9 matches whatever a *                      previously tagged regular expression ([7]) matched. * *	[9]	\<	a regular expression starting with a \< construct *		\>	and/or ending with a \> construct, restricts the *			pattern matching to the beginning of a word, and/or *			the end of a word. A word is defined to be a character *			string beginning and/or ending with the characters *			A-Z a-z 0-9 and _. It must also be preceded and/or *			followed by any character outside those mentioned. * *      [10]            a composite regular expression xy where x and y *                      are in the form [1] to [10] matches the longest *                      match of x followed by a match for y. * *      [11]	^	a regular expression starting with a ^ character *		$	and/or ending with a $ character, restricts the *                      pattern matching to the beginning of the line, *                      or the end of line. [anchors] Elsewhere in the *			pattern, ^ and $ are treated as ordinary characters. * * * Acknowledgements: * *	HCR's Hugh Redelmeier has been most helpful in various *	stages of development. He convinced me to include BOW *	and EOW constructs, originally invented by Rob Pike at *	the University of Toronto. * * References: *              Software tools			Kernighan & Plauger *              Software tools in Pascal        Kernighan & Plauger *              Grep [rsx-11 C dist]            David Conroy *		ed - text editor		Un*x Programmer's Manual *		Advanced editing on Un*x	B. W. Kernighan *		regexp routines			Henry Spencer * * Notes: * *	This implementation uses a bit-set representation for character *	classes for speed and compactness. Each character is represented  *	by one bit in a 128-bit block. Thus, CCL always takes a  *	constant 16 bytes in the internal nfa, and re_exec does a single *	bit comparison to locate the character in the set. * * Examples: * *	pattern:	foo*.* *	compile:	CHR f CHR o CLO CHR o END CLO ANY END END *	matches:	fo foo fooo foobar fobar foxx ... * *	pattern:	fo[ob]a[rz]	 *	compile:	CHR f CHR o CCL bitset CHR a CCL bitset END *	matches:	fobar fooar fobaz fooaz * *	pattern:	foo\\+ *	compile:	CHR f CHR o CHR o CHR \ CLO CHR \ END END *	matches:	foo\ foo\\ foo\\\  ... * *	pattern:	\(foo\)[1-3]\1	(same as foo[1-3]foo) *	compile:	BOT 1 CHR f CHR o CHR o EOT 1 CCL bitset REF 1 END *	matches:	foo1foo foo2foo foo3foo * *	pattern:	\(fo.*\)-\1 *	compile:	BOT 1 CHR f CHR o CLO ANY END EOT 1 CHR - REF 1 END *	matches:	foo-foo fo-fo fob-fob foobar-foobar ... *  */#define MAXNFA  1024#define MAXTAG  10#define OKP     1#define NOP     0#define CHR     1#define ANY     2#define CCL     3#define BOL     4#define EOL     5#define BOT     6#define EOT     7#define BOW	8#define EOW	9#define REF     10#define CLO     11#define END     0/* * The following defines are not meant * to be changeable. They are for readability * only. * */#define MAXCHR	128#define CHRBIT	8#define BITBLK	MAXCHR/CHRBIT#define BLKIND	0170#define BITIND	07#define ASCIIB	0177typedef /*unsigned*/ char CHAR;static int  tagstk[MAXTAG];             /* subpat tag stack..*/static CHAR nfa[MAXNFA];		/* automaton..       */static int  sta = NOP;               	/* status of lastpat */static CHAR bittab[BITBLK];		/* bit table for CCL */					/* pre-set bits...   */static CHAR bitarr[] = {1,2,4,8,16,32,64,128};static int internal_error;static voidchset(c)register CHAR c;{	bittab[((c) & BLKIND) >> 3] |= bitarr[(c) & BITIND];}#define badpat(x)	return (*nfa = END, x)#define store(x)	*mp++ = x char *   re_comp(pat)const char *pat;{	register const char *p;               /* pattern pointer   */	register CHAR *mp = nfa;        /* nfa pointer       */	register CHAR *lp;              /* saved pointer..   */	register CHAR *sp = nfa;        /* another one..     */	register int tagi = 0;          /* tag stack index   */	register int tagc = 1;          /* actual tag count  */	register int n;	register CHAR mask;		/* xor mask -CCL/NCL */	int c1, c2;			if (!pat || !*pat)		if (sta)			return 0;		else			badpat("No previous regular expression");	sta = NOP;	for (p = pat; *p; p++) {		lp = mp;		switch(*p) {		case '.':               /* match any char..  */			store(ANY);			break;		case '^':               /* match beginning.. */			if (p == pat)				store(BOL);			else {				store(CHR);				store(*p);			}			break;		case '$':               /* match endofline.. */			if (!*(p+1))				store(EOL);			else {				store(CHR);				store(*p);			}			break;		case '[':               /* match char class..*/			store(CCL);			if (*++p == '^') {				mask = 0377;					p++;			}			else				mask = 0;			if (*p == '-')		/* real dash */				chset(*p++);			if (*p == ']')		/* real brac */				chset(*p++);			while (*p && *p != ']') {				if (*p == '-' && *(p+1) && *(p+1) != ']') {					p++;					c1 = *(p-2) + 1;					c2 = *p++;					while (c1 <= c2)						chset(c1++);				}#ifdef EXTEND				else if (*p == '\\' && *(p+1)) {					p++;					chset(*p++);				}#endif				else					chset(*p++);			}			if (!*p)				badpat("Missing ]");			for (n = 0; n < BITBLK; bittab[n++] = (char) 0)				store(mask ^ bittab[n]);				break;		case '*':               /* match 0 or more.. */		case '+':               /* match 1 or more.. */			if (p == pat)				badpat("Empty closure");			lp = sp;		/* previous opcode */			if (*lp == CLO)		/* equivalence..   */				break;			switch(*lp) {			case BOL:			case BOT:			case EOT:			case BOW:			case EOW:			case REF:				badpat("Illegal closure");			default:				break;			}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -