⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regexp.c

📁 Boost provides free peer-reviewed portable C++ source libraries. We emphasize libraries that work
💻 C
📖 第 1 页 / 共 3 页
字号:
		regoptail(ret, ret);			/* back */		regtail(ret, regnode(BRANCH));		/* or */		regtail(ret, regnode(NOTHING));		/* null. */	} else if (op == '+' && (flags&SIMPLE))		reginsert(PLUS, ret);	else if (op == '+') {		/* Emit x+ as x(&|), where & means "self". */		next = regnode(BRANCH);			/* Either */		regtail(ret, next);		regtail(regnode(BACK), ret);		/* loop back */		regtail(next, regnode(BRANCH));		/* or */		regtail(ret, regnode(NOTHING));		/* null. */	} else if (op == '?') {		/* Emit x? as (x|) */		reginsert(BRANCH, ret);			/* Either x */		regtail(ret, regnode(BRANCH));		/* or */		next = regnode(NOTHING);		/* null. */		regtail(ret, next);		regoptail(ret, next);	}	regparse++;	if (ISMULT(*regparse))		FAIL("nested *?+");	return(ret);}/* - regatom - the lowest level * * Optimization:  gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and * faster to run.  Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */static char *regatom( int *flagp ){	register char *ret;	int flags;	*flagp = WORST;		/* Tentatively. */	switch (*regparse++) {	/* FIXME: these chars only have meaning at beg/end of pat? */	case '^':		ret = regnode(BOL);		break;	case '$':		ret = regnode(EOL);		break;	case '.':		ret = regnode(ANY);		*flagp |= HASWIDTH|SIMPLE;		break;	case '[': {			register int classr;			register int classend;			if (*regparse == '^') {	/* Complement of range. */				ret = regnode(ANYBUT);				regparse++;			} else				ret = regnode(ANYOF);			if (*regparse == ']' || *regparse == '-')				regc(*regparse++);			while (*regparse != '\0' && *regparse != ']') {				if (*regparse == '-') {					regparse++;					if (*regparse == ']' || *regparse == '\0')						regc('-');					else {						classr = UCHARAT(regparse-2)+1;						classend = UCHARAT(regparse);						if (classr > classend+1)							FAIL("invalid [] range");						for (; classr <= classend; classr++)							regc(classr);						regparse++;					}				} else					regc(*regparse++);			}			regc('\0');			if (*regparse != ']')				FAIL("unmatched []");			regparse++;			*flagp |= HASWIDTH|SIMPLE;		}		break;	case '(':		ret = reg(1, &flags);		if (ret == NULL)			return(NULL);		*flagp |= flags&(HASWIDTH|SPSTART);		break;	case '\0':	case '|':	case '\n':	case ')':		FAIL("internal urp");	/* Supposed to be caught earlier. */		break;	case '?':	case '+':	case '*':		FAIL("?+* follows nothing");		break;	case '\\':		switch (*regparse++) {		case '\0':			FAIL("trailing \\");			break;		case '<':			ret = regnode(WORDA);			break;		case '>':			ret = regnode(WORDZ);			break;		/* FIXME: Someday handle \1, \2, ... */		default:			/* Handle general quoted chars in exact-match routine */			goto de_fault;		}		break;	de_fault:	default:		/*		 * Encode a string of characters to be matched exactly.		 *		 * This is a bit tricky due to quoted chars and due to		 * '*', '+', and '?' taking the SINGLE char previous		 * as their operand.		 *		 * On entry, the char at regparse[-1] is going to go		 * into the string, no matter what it is.  (It could be		 * following a \ if we are entered from the '\' case.)		 * 		 * Basic idea is to pick up a good char in  ch  and		 * examine the next char.  If it's *+? then we twiddle.		 * If it's \ then we frozzle.  If it's other magic char		 * we push  ch  and terminate the string.  If none of the		 * above, we push  ch  on the string and go around again.		 *		 *  regprev  is used to remember where "the current char"		 * starts in the string, if due to a *+? we need to back		 * up and put the current char in a separate, 1-char, string.		 * When  regprev  is NULL,  ch  is the only char in the		 * string; this is used in *+? handling, and in setting		 * flags |= SIMPLE at the end.		 */		{			char *regprev;			register char ch;			regparse--;			/* Look at cur char */			ret = regnode(EXACTLY);			for ( regprev = 0 ; ; ) {				ch = *regparse++;	/* Get current char */				switch (*regparse) {	/* look at next one */				default:					regc(ch);	/* Add cur to string */					break;				case '.': case '[': case '(':				case ')': case '|': case '\n':				case '$': case '^':				case '\0':				/* FIXME, $ and ^ should not always be magic */				magic:					regc(ch);	/* dump cur char */					goto done;	/* and we are done */				case '?': case '+': case '*':					if (!regprev) 	/* If just ch in str, */						goto magic;	/* use it */					/* End mult-char string one early */					regparse = regprev; /* Back up parse */					goto done;				case '\\':					regc(ch);	/* Cur char OK */					switch (regparse[1]){ /* Look after \ */					case '\0':					case '<':					case '>':					/* FIXME: Someday handle \1, \2, ... */						goto done; /* Not quoted */					default:						/* Backup point is \, scan							 * point is after it. */						regprev = regparse;						regparse++; 						continue;	/* NOT break; */					}				}				regprev = regparse;	/* Set backup point */			}		done:			regc('\0');			*flagp |= HASWIDTH;			if (!regprev)		/* One char? */				*flagp |= SIMPLE;		}		break;	}	return(ret);}/* - regnode - emit a node */static char *			/* Location. */regnode( int op ){	register char *ret;	register char *ptr;	ret = regcode;	if (ret == &regdummy) {		regsize += 3;		return(ret);	}	ptr = ret;	*ptr++ = op;	*ptr++ = '\0';		/* Null "next" pointer. */	*ptr++ = '\0';	regcode = ptr;	return(ret);}/* - regc - emit (if appropriate) a byte of code */static voidregc( int b ){	if (regcode != &regdummy)		*regcode++ = b;	else		regsize++;}/* - reginsert - insert an operator in front of already-emitted operand * * Means relocating the operand. */static voidreginsert(	char op,	char *opnd ){	register char *src;	register char *dst;	register char *place;	if (regcode == &regdummy) {		regsize += 3;		return;	}	src = regcode;	regcode += 3;	dst = regcode;	while (src > opnd)		*--dst = *--src;	place = opnd;		/* Op node, where operand used to be. */	*place++ = op;	*place++ = '\0';	*place++ = '\0';}/* - regtail - set the next-pointer at the end of a node chain */static voidregtail(	char *p,	char *val ){	register char *scan;	register char *temp;	register int offset;	if (p == &regdummy)		return;	/* Find last node. */	scan = p;	for (;;) {		temp = regnext(scan);		if (temp == NULL)			break;		scan = temp;	}	if (OP(scan) == BACK)		offset = scan - val;	else		offset = val - scan;	*(scan+1) = (offset>>8)&0377;	*(scan+2) = offset&0377;}/* - regoptail - regtail on operand of first argument; nop if operandless */static voidregoptail(	char *p,	char *val ){	/* "Operandless" and "op != BRANCH" are synonymous in practice. */	if (p == NULL || p == &regdummy || OP(p) != BRANCH)		return;	regtail(OPERAND(p), val);}/* * regexec and friends *//* * Global work variables for regexec(). */static char *reginput;		/* String-input pointer. */static char *regbol;		/* Beginning of input, for ^ check. */static char **regstartp;	/* Pointer to startp array. */static char **regendp;		/* Ditto for endp. *//* * Forwards. */STATIC int regtry( regexp *prog, char *string );STATIC int regmatch( char *prog );STATIC int regrepeat( char *p );#ifdef DEBUGint regnarrate = 0;void regdump();STATIC char *regprop();#endif/* - regexec - match a regexp against a string */intregexec(	register regexp *prog,	register char *string ){	register char *s;	/* Be paranoid... */	if (prog == NULL || string == NULL) {		regerror("NULL parameter");		return(0);	}	/* Check validity of program. */	if (UCHARAT(prog->program) != MAGIC) {		regerror("corrupted program");		return(0);	}	/* If there is a "must appear" string, look for it. */	if (prog->regmust != NULL) {		s = (char *)string;		while ((s = strchr(s, prog->regmust[0])) != NULL) {			if (strncmp(s, prog->regmust, prog->regmlen) == 0)				break;	/* Found it. */			s++;		}		if (s == NULL)	/* Not present. */			return(0);	}	/* Mark beginning of line for ^ . */	regbol = (char *)string;	/* Simplest case:  anchored match need be tried only once. */	if (prog->reganch)		return(regtry(prog, string));	/* Messy cases:  unanchored match. */	s = (char *)string;	if (prog->regstart != '\0')		/* We know what char it must start with. */		while ((s = strchr(s, prog->regstart)) != NULL) {			if (regtry(prog, s))				return(1);			s++;		}	else		/* We don't -- general case. */		do {			if (regtry(prog, s))				return(1);		} while (*s++ != '\0');	/* Failure. */	return(0);}/* - regtry - try match at specific point */static int			/* 0 failure, 1 success */regtry(	regexp *prog,	char *string ){	register int i;	register char **sp;	register char **ep;	reginput = string;	regstartp = prog->startp;	regendp = prog->endp;	sp = prog->startp;	ep = prog->endp;	for (i = NSUBEXP; i > 0; i--) {		*sp++ = NULL;		*ep++ = NULL;	}	if (regmatch(prog->program + 1)) {		prog->startp[0] = string;		prog->endp[0] = reginput;		return(1);	} else		return(0);}/*

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -