⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regexp.c

📁 操作系统源代码
💻 C
📖 第 1 页 / 共 2 页
字号:
reginsert(iop, opnd)int iop;unsigned char *opnd;{	register unsigned char *src;	register unsigned char *dst;	register unsigned char *place;	unsigned char op;	op = (unsigned char) iop;	if (regcode == &regdummy) {		regsize += 3;		return;	}	src = regcode;	regcode += 3;	dst = regcode;	while (src > opnd)		*--dst = *--src;	place = opnd;		/* Op node, where operand used to be. */	*place++ = op;	*place++ = '\0';	*place++ = '\0';}/* - regtail - set the next-pointer at the end of a node chain */STATIC voidregtail(p, val)unsigned char *p;unsigned char *val;{	register unsigned char *scan;	register unsigned char *temp;	register int offset;	if (p == &regdummy)		return;	/* Find last node. */	scan = p;	for (;;) {		temp = regnext(scan);		if (temp == NULL)			break;		scan = temp;	}	if (OP(scan) == BACK)		offset = scan - val;	else		offset = val - scan;	*(scan+1) = (offset>>8)&0377;	*(scan+2) = offset&0377;}/* - regoptail - regtail on operand of first argument; nop if operandless */STATIC voidregoptail(p, val)unsigned char *p;unsigned char *val;{	/* "Operandless" and "op != BRANCH" are synonymous in practice. */	if (p == NULL || p == &regdummy || OP(p) != BRANCH)		return;	regtail(OPERAND(p), val);}/* * regexec and friends *//* * Global work variables for regexec(). */STATIC unsigned char *reginput;		/* String-input pointer. */STATIC unsigned char *regbol;		/* Beginning of input, for ^ check. */STATIC unsigned char **regstartp;	/* Pointer to startp array. */STATIC unsigned char **regendp;		/* Ditto for endp. */#ifdef DEBUGint regnarrate = 0;void regdump();STATIC unsigned char *regprop();#endif/* - regexec - match a regexp against a string */intregexec(prog, string)register regexp *prog;register unsigned char *string;{	register unsigned char *s;#ifndef	STDLIB	extern char *strchr();#endif	/* Be paranoid... */	if (prog == NULL || string == NULL) {		regerror("NULL parameter");		return(0);	}	/* Check validity of program. */	if (UCHARAT(prog->program) != MAGIC) {		regerror("corrupted program");		return(0);	}	/* If there is a "must appear" string, look for it. */	if (prog->regmust != NULL) {		s = string;		while ((s = (unsigned char *)strchr((char *)s,prog->regmust[0]))		!= NULL) {			if (strncmp((char *)s, (char *)prog->regmust,			    prog->regmlen)			== 0)				break;	/* Found it. */			s++;		}		if (s == NULL)	/* Not present. */			return(0);	}	/* Mark beginning of line for ^ . */	regbol = string;	/* Simplest case:  anchored match need be tried only once. */	if (prog->reganch)		return(regtry(prog, string));	/* Messy cases:  unanchored match. */	s = string;	if (prog->regstart != '\0')		/* We know what char it must start with. */		while ((s = (unsigned char *)strchr((char *)s, prog->regstart))		!= NULL) {			if (regtry(prog, s))				return(1);			s++;		}	else		/* We don't -- general case. */		do {			if (regtry(prog, s))				return(1);		} while (*s++ != '\0');	/* Failure. */	return(0);}/* - regtry - try match at specific point */STATIC int			/* 0 failure, 1 success */regtry(prog, string)regexp *prog;unsigned char *string;{	register int i;	register unsigned char **sp;	register unsigned char **ep;	reginput = string;	regstartp = prog->startp;	regendp = prog->endp;	sp = prog->startp;	ep = prog->endp;	for (i = NSUBEXP; i > 0; i--) {		*sp++ = NULL;		*ep++ = NULL;	}	if (regmatch(prog->program + 1)) {		prog->startp[0] = string;		prog->endp[0] = reginput;		return(1);	} else		return(0);}/* - regmatch - main matching routine * * Conceptually the strategy is simple:  check to see whether the current * node matches, call self recursively to see whether the rest matches, * and then act accordingly.  In practice we make some effort to avoid * recursion, in particular by going through "ordinary" nodes (that don't * need to know whether the rest of the match failed) by a loop instead of * by recursion. */STATIC int			/* 0 failure, 1 success */regmatch(prog)unsigned char *prog;{	register unsigned char *scan;	/* Current node. */	unsigned char *next;		/* Next node. */#ifndef	STDLIB	extern char *strchr();#endif	scan = prog;#ifdef DEBUG	if (scan != NULL && regnarrate)		fprintf(stderr, "%s(\n", regprop(scan));#endif	while (scan != NULL) {#ifdef DEBUG		if (regnarrate)			fprintf(stderr, "%s...\n", regprop(scan));#endif		next = regnext(scan);		switch (OP(scan)) {		case BOL:			if (reginput != regbol)				return(0);			break;		case EOL:			if (*reginput != '\0')				return(0);			break;		case ANY:			if (*reginput == '\0')				return(0);			reginput++;			break;		case EXACTLY: {				register int len;				register unsigned char *opnd;				opnd = OPERAND(scan);				/* Inline the first character, for speed. */				if (*opnd != *reginput)					return(0);				len = strlen((char *)opnd);				if (len > 1 && strncmp((char *)opnd,				   (char *)reginput, len)				!= 0)					return(0);				reginput += len;			}			break;		case ANYOF:			if (*reginput == '\0'			|| strchr((char *)OPERAND(scan), *reginput) == NULL)				return(0);			reginput++;			break;		case ANYBUT:			if (*reginput == '\0'			|| strchr((char *)OPERAND(scan), *reginput) != NULL)				return(0);			reginput++;			break;		case NOTHING:			break;		case BACK:			break;		case OPEN+1:		case OPEN+2:		case OPEN+3:		case OPEN+4:		case OPEN+5:		case OPEN+6:		case OPEN+7:		case OPEN+8:		case OPEN+9: {				register int no;				register unsigned char *save;				no = OP(scan) - OPEN;				save = reginput;				if (regmatch(next)) {					/*					 * Don't set startp if some later					 * invocation of the same parentheses					 * already has.					 */					if (regstartp[no] == NULL)						regstartp[no] = save;					return(1);				} else					return(0);			}			break;		case CLOSE+1:		case CLOSE+2:		case CLOSE+3:		case CLOSE+4:		case CLOSE+5:		case CLOSE+6:		case CLOSE+7:		case CLOSE+8:		case CLOSE+9: {				register int no;				register unsigned char *save;				no = OP(scan) - CLOSE;				save = reginput;				if (regmatch(next)) {					/*					 * Don't set endp if some later					 * invocation of the same parentheses					 * already has.					 */					if (regendp[no] == NULL)						regendp[no] = save;					return(1);				} else					return(0);			}			break;		case BRANCH: {				register unsigned char *save;				if (OP(next) != BRANCH)		/* No choice. */					next = OPERAND(scan);	/* Avoid recursion. */				else {					do {						save = reginput;						if (regmatch(OPERAND(scan)))							return(1);						reginput = save;						scan = regnext(scan);					} while (scan != NULL && OP(scan) == BRANCH);					return(0);					/* NOTREACHED */				}			}			break;		case STAR:		case PLUS: {				register unsigned char nextch;				register int no;				register unsigned char *save;				register int min;				/*				 * Lookahead to avoid useless match attempts				 * when we know what character comes next.				 */				nextch = '\0';				if (OP(next) == EXACTLY)					nextch = *OPERAND(next);				min = (OP(scan) == STAR) ? 0 : 1;				save = reginput;				no = regrepeat(OPERAND(scan));				while (no >= min) {					/* If it could work, try it. */					if (nextch == '\0' || *reginput == nextch)						if (regmatch(next))							return(1);					/* Couldn't or didn't -- back up. */					no--;					reginput = save + no;				}				return(0);			}			break;		case END:			return(1);	/* Success! */			break;		default:			regerror("memory corruption");			return(0);			break;		}		scan = next;	}	/*	 * We get here only if there's trouble -- normally "case END" is	 * the terminating point.	 */	regerror("corrupted pointers");	return(0);}/* - regrepeat - repeatedly match something simple, report how many */STATIC intregrepeat(p)unsigned char *p;{	register int count = 0;	register unsigned char *scan;	register unsigned char *opnd;	scan = reginput;	opnd = OPERAND(p);	switch (OP(p)) {	case ANY:		count = strlen((char *)scan);		scan += count;		break;	case EXACTLY:		while (*opnd == *scan) {			count++;			scan++;		}		break;	case ANYOF:		while (*scan != '\0' && strchr((char *)opnd, *scan) != NULL) {			count++;			scan++;		}		break;	case ANYBUT:		while (*scan != '\0' && strchr((char *)opnd, *scan) == NULL) {			count++;			scan++;		}		break;	default:		/* Oh dear.  Called inappropriately. */		regerror("internal foulup");		count = 0;	/* Best compromise. */		break;	}	reginput = scan;	return(count);}/* - regnext - dig the "next" pointer out of a node */STATIC unsigned char *regnext(p)register unsigned char *p;{	register int offset;	if (p == &regdummy)		return(NULL);	offset = NEXT(p);	if (offset == 0)		return(NULL);	if (OP(p) == BACK)		return(p-offset);	else		return(p+offset);}#ifdef DEBUGSTATIC unsigned char *regprop();/* - regdump - dump a regexp onto stdout in vaguely comprehensible form */voidregdump(r)regexp *r;{	register unsigned char *s;	register unsigned char op = EXACTLY;	/* Arbitrary non-END op. */	register unsigned char *next;	extern char *strchr();	s = r->program + 1;	while (op != END) {	/* While that wasn't END last time... */		op = OP(s);		printf("%2d%s", s-r->program, regprop(s));	/* Where, what. */		next = regnext(s);		if (next == NULL)		/* Next ptr. */			printf("(0)");		else 			printf("(%d)", (s-r->program)+(next-s));		s += 3;		if (op == ANYOF || op == ANYBUT || op == EXACTLY) {			/* Literal string, where present. */			while (*s != '\0') {				putchar(*s);				s++;			}			s++;		}		putchar('\n');	}	/* Header fields of interest. */	if (r->regstart != '\0')		printf("start `%c' ", r->regstart);	if (r->reganch)		printf("anchored ");	if (r->regmust != NULL)		printf("must have \"%s\"", r->regmust);	printf("\n");}/* - regprop - printable representation of opcode */STATIC unsigned char *regprop(op)unsigned char *op;{	register unsigned char *p;	STATIC unsigned char buf[50];	(void) strcpy(buf, ":");	switch (OP(op)) {	case BOL:		p = "BOL";		break;	case EOL:		p = "EOL";		break;	case ANY:		p = "ANY";		break;	case ANYOF:		p = "ANYOF";		break;	case ANYBUT:		p = "ANYBUT";		break;	case BRANCH:		p = "BRANCH";		break;	case EXACTLY:		p = "EXACTLY";		break;	case NOTHING:		p = "NOTHING";		break;	case BACK:		p = "BACK";		break;	case END:		p = "END";		break;	case OPEN+1:	case OPEN+2:	case OPEN+3:	case OPEN+4:	case OPEN+5:	case OPEN+6:	case OPEN+7:	case OPEN+8:	case OPEN+9:		sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN);		p = NULL;		break;	case CLOSE+1:	case CLOSE+2:	case CLOSE+3:	case CLOSE+4:	case CLOSE+5:	case CLOSE+6:	case CLOSE+7:	case CLOSE+8:	case CLOSE+9:		sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE);		p = NULL;		break;	case STAR:		p = "STAR";		break;	case PLUS:		p = "PLUS";		break;	default:		regerror("corrupted opcode");		break;	}	if (p != NULL)		(void) strcat(buf, p);	return(buf);}#endif/* * The following is provided for those people who do not have strcspn() in * their C libraries.  They should get off their butts and do something * about it; at least one public-domain implementation of those (highly * useful) string routines has been published on Usenet. */#ifdef STRCSPN/* * strcspn - find length of initial segment of s1 consisting entirely * of characters not from s2 */STATIC intstrcspn(s1, s2)unsigned char *s1;unsigned char *s2;{	register unsigned char *scan1;	register unsigned char *scan2;	register int count;	count = 0;	for (scan1 = s1; *scan1 != '\0'; scan1++) {		for (scan2 = s2; *scan2 != '\0';)	/* ++ moved down. */			if (*scan1 == *scan2++)				return(count);		count++;	}	return(count);}#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -