regexp.h

来自「这是一个同样来自贝尔实验室的和UNIX有着渊源的操作系统, 其简洁的设计和实现易」· C头文件 代码 · 共 411 行

H
411
字号
#define	CBRA	2#define	CCHR	4#define	CDOT	8#define	CCL	12#define	CDOL	20#define	CEOF	22#define	CKET	24#define	CBACK	36#define	STAR	01#define RNGE	03#define	NBRA	9#define PLACE(c)	ep[c >> 3] |= bittab[c & 07]#define ISTHERE(c)	(ep[c >> 3] & bittab[c & 07])char	*braslist[NBRA];char	*braelist[NBRA];int	nbra, ebra;char *loc1, *loc2, *locs;int	sed;int	circf;int	low;int	size;char	bittab[] = {	1,	2,	4,	8,	16,	32,	64,	128};char *compile(instring, ep, endbuf, seof)register char *ep;char *instring, *endbuf;{	INIT	/* Dependent declarations and initializations */	register c;	register eof = seof;	char *lastep = instring;	int cclcnt;	char bracket[NBRA], *bracketp;	int closed;	char neg;	int lc;	int i, cflg;	lastep = 0;	if((c = GETC()) == eof) {		if(*ep == 0 && !sed)			ERROR(41);		RETURN(ep);	}	bracketp = bracket;	circf = closed = nbra = ebra = 0;	if (c == '^')		circf++;	else		UNGETC(c);	for (;;) {		if (ep >= endbuf)			ERROR(50);		if((c = GETC()) != '*' && ((c != '\\') || (PEEKC() != '{')))			lastep = ep;		if (c == eof) {			*ep++ = CEOF;			RETURN(ep);		}		switch (c) {		case '.':			*ep++ = CDOT;			continue;		case '\n':			ERROR(36);		case '*':			if (lastep==0 || *lastep==CBRA || *lastep==CKET)				goto defchar;			*lastep |= STAR;			continue;		case '$':			if(PEEKC() != eof)				goto defchar;			*ep++ = CDOL;			continue;		case '[':			if(&ep[17] >= endbuf)				ERROR(50);			*ep++ = CCL;			lc = 0;			for(i = 0; i < 16; i++)				ep[i] = 0;			neg = 0;			if((c = GETC()) == '^') {				neg = 1;				c = GETC();			}			do {				if(c == '\0' || c == '\n')					ERROR(49);				if(c == '-' && lc != 0) {					if ((c = GETC()) == ']') {						PLACE('-');						break;					}					while(lc < c) {						PLACE(lc);						lc++;					}				}				lc = c;				PLACE(c);			} while((c = GETC()) != ']');			if(neg) {				for(cclcnt = 0; cclcnt < 16; cclcnt++)					ep[cclcnt] ^= -1;				ep[0] &= 0376;			}			ep += 16;			continue;		case '\\':			switch(c = GETC()) {			case '(':				if(nbra >= NBRA)					ERROR(43);				*bracketp++ = nbra;				*ep++ = CBRA;				*ep++ = nbra++;				continue;			case ')':				if(bracketp <= bracket || ++ebra != nbra)					ERROR(42);				*ep++ = CKET;				*ep++ = *--bracketp;				closed++;				continue;			case '{':				if(lastep == (char *) (0))					goto defchar;				*lastep |= RNGE;				cflg = 0;			nlim:				c = GETC();				i = 0;				do {					if ('0' <= c && c <= '9')						i = 10 * i + c - '0';					else						ERROR(16);				} while(((c = GETC()) != '\\') && (c != ','));				if (i > 255)					ERROR(11);				*ep++ = i;				if (c == ',') {					if(cflg++)						ERROR(44);					if((c = GETC()) == '\\')						*ep++ = 255;					else {						UNGETC(c);						goto nlim; /* get 2'nd number */					}				}				if(GETC() != '}')					ERROR(45);				if(!cflg)	/* one number */					*ep++ = i;				else if((ep[-1] & 0377) < (ep[-2] & 0377))					ERROR(46);				continue;			case '\n':				ERROR(36);			case 'n':				c = '\n';				goto defchar;			default:				if(c >= '1' && c <= '9') {					if((c -= '1') >= closed)						ERROR(25);					*ep++ = CBACK;					*ep++ = c;					continue;				}			}			/* Drop through to default to use \ to turn off special chars */		defchar:		default:			lastep = ep;			*ep++ = CCHR;			*ep++ = c;		}	}}step(p1, p2)register char *p1, *p2;{	register c;	if (circf) {		loc1 = p1;		return(advance(p1, p2));	}	/* fast check for first character */	if (*p2==CCHR) {		c = p2[1];		do {			if (*p1 != c)				continue;			if (advance(p1, p2)) {				loc1 = p1;				return(1);			}		} while (*p1++);		return(0);	}		/* regular algorithm */	do {		if (advance(p1, p2)) {			loc1 = p1;			return(1);		}	} while (*p1++);	return(0);}advance(lp, ep)register char *lp, *ep;{	register char *curlp;	char c;	char *bbeg;	int ct;	for (;;) switch (*ep++) {	case CCHR:		if (*ep++ == *lp++)			continue;		return(0);	case CDOT:		if (*lp++)			continue;		return(0);	case CDOL:		if (*lp==0)			continue;		return(0);	case CEOF:		loc2 = lp;		return(1);	case CCL:		c = *lp++ & 0177;		if(ISTHERE(c)) {			ep += 16;			continue;		}		return(0);	case CBRA:		braslist[*ep++] = lp;		continue;	case CKET:		braelist[*ep++] = lp;		continue;	case CCHR|RNGE:		c = *ep++;		getrnge(ep);		while(low--)			if(*lp++ != c)				return(0);		curlp = lp;		while(size--) 			if(*lp++ != c)				break;		if(size < 0)			lp++;		ep += 2;		goto star;	case CDOT|RNGE:		getrnge(ep);		while(low--)			if(*lp++ == '\0')				return(0);		curlp = lp;		while(size--)			if(*lp++ == '\0')				break;		if(size < 0)			lp++;		ep += 2;		goto star;	case CCL|RNGE:		getrnge(ep + 16);		while(low--) {			c = *lp++ & 0177;			if(!ISTHERE(c))				return(0);		}		curlp = lp;		while(size--) {			c = *lp++ & 0177;			if(!ISTHERE(c))				break;		}		if(size < 0)			lp++;		ep += 18;		/* 16 + 2 */		goto star;	case CBACK:		bbeg = braslist[*ep];		ct = braelist[*ep++] - bbeg;		if(ecmp(bbeg, lp, ct)) {			lp += ct;			continue;		}		return(0);	case CBACK|STAR:		bbeg = braslist[*ep];		ct = braelist[*ep++] - bbeg;		curlp = lp;		while(ecmp(bbeg, lp, ct))			lp += ct;		while(lp >= curlp) {			if(advance(lp, ep))	return(1);			lp -= ct;		}		return(0);	case CDOT|STAR:		curlp = lp;		while (*lp++);		goto star;	case CCHR|STAR:		curlp = lp;		while (*lp++ == *ep);		ep++;		goto star;	case CCL|STAR:		curlp = lp;		do {			c = *lp++ & 0177;		} while(ISTHERE(c));		ep += 16;		goto star;	star:		do {			if(--lp == locs)				break;			if (advance(lp, ep))				return(1);		} while (lp > curlp);		return(0);	}}getrnge(str)register char *str;{	low = *str++ & 0377;	size = *str == 255 ? 20000 : (*str &0377) - low;}ecmp(a, b, count)register char	*a, *b;register	count;{	while(count--)		if(*a++ != *b++)	return(0);	return(1);}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?