⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regcomp.c

📁 这是一个同样来自贝尔实验室的和UNIX有着渊源的操作系统, 其简洁的设计和实现易于我们学习和理解
💻 C
字号:
#include <u.h>#include <libc.h>#include "regexp.h"#include "regcomp.h"#define	TRUE	1#define	FALSE	0/* * Parser Information */typedefstruct Node{	Reinst*	first;	Reinst*	last;}Node;#define	NSTACK	20static	Node	andstack[NSTACK];static	Node	*andp;static	int	atorstack[NSTACK];static	int*	atorp;static	int	cursubid;		/* id of current subexpression */static	int	subidstack[NSTACK];	/* parallel to atorstack */static	int*	subidp;static	int	lastwasand;	/* Last token was operand */static	int	nbra;static	char*	exprp;		/* pointer to next character in source expression */static	int	lexdone;static	int	nclass;static	Reclass*classp;static	Reinst*	freep;static	int	errors;static	Rune	yyrune;		/* last lex'd rune */static	Reclass*yyclassp;	/* last lex'd class *//* predeclared crap */static	void	operator(int);static	void	pushand(Reinst*, Reinst*);static	void	pushator(int);static	void	evaluntil(int);static	int	bldcclass(void);static jmp_buf regkaboom;static	voidrcerror(char *s){	errors++;	regerror(s);	longjmp(regkaboom, 1);}static	Reinst*newinst(int t){	freep->type = t;	freep->left = 0;	freep->right = 0;	return freep++;}static	voidoperand(int t){	Reinst *i;	if(lastwasand)		operator(CAT);	/* catenate is implicit */	i = newinst(t);	if(t == CCLASS || t == NCCLASS)		i->cp = yyclassp;	if(t == RUNE)		i->r = yyrune;	pushand(i, i);	lastwasand = TRUE;}static	voidoperator(int t){	if(t==RBRA && --nbra<0)		rcerror("unmatched right paren");	if(t==LBRA){		if(++cursubid >= NSUBEXP)			rcerror ("too many subexpressions");		nbra++;		if(lastwasand)			operator(CAT);	} else		evaluntil(t);	if(t != RBRA)		pushator(t);	lastwasand = FALSE;	if(t==STAR || t==QUEST || t==PLUS || t==RBRA)		lastwasand = TRUE;	/* these look like operands */}static	voidregerr2(char *s, int c){	char buf[100];	char *cp = buf;	while(*s)		*cp++ = *s++;	*cp++ = c;	*cp = '\0'; 	rcerror(buf);}static	voidcant(char *s){	char buf[100];	strcpy(buf, "can't happen: ");	strcat(buf, s);	rcerror(buf);}static	voidpushand(Reinst *f, Reinst *l){	if(andp >= &andstack[NSTACK])		cant("operand stack overflow");	andp->first = f;	andp->last = l;	andp++;}static	voidpushator(int t){	if(atorp >= &atorstack[NSTACK])		cant("operator stack overflow");	*atorp++ = t;	*subidp++ = cursubid;}static	Node*popand(int op){	Reinst *inst;	if(andp <= &andstack[0]){		regerr2("missing operand for ", op);		inst = newinst(NOP);		pushand(inst,inst);	}	return --andp;}static	intpopator(void){	if(atorp <= &atorstack[0])		cant("operator stack underflow");	--subidp;	return *--atorp;}static	voidevaluntil(int pri){	Node *op1, *op2;	Reinst *inst1, *inst2;	while(pri==RBRA || atorp[-1]>=pri){		switch(popator()){		default:			rcerror("unknown operator in evaluntil");			break;		case LBRA:		/* must have been RBRA */			op1 = popand('(');			inst2 = newinst(RBRA);			inst2->subid = *subidp;			op1->last->next = inst2;			inst1 = newinst(LBRA);			inst1->subid = *subidp;			inst1->next = op1->first;			pushand(inst1, inst2);			return;		case OR:			op2 = popand('|');			op1 = popand('|');			inst2 = newinst(NOP);			op2->last->next = inst2;			op1->last->next = inst2;			inst1 = newinst(OR);			inst1->right = op1->first;			inst1->left = op2->first;			pushand(inst1, inst2);			break;		case CAT:			op2 = popand(0);			op1 = popand(0);			op1->last->next = op2->first;			pushand(op1->first, op2->last);			break;		case STAR:			op2 = popand('*');			inst1 = newinst(OR);			op2->last->next = inst1;			inst1->right = op2->first;			pushand(inst1, inst1);			break;		case PLUS:			op2 = popand('+');			inst1 = newinst(OR);			op2->last->next = inst1;			inst1->right = op2->first;			pushand(op2->first, inst1);			break;		case QUEST:			op2 = popand('?');			inst1 = newinst(OR);			inst2 = newinst(NOP);			inst1->left = inst2;			inst1->right = op2->first;			op2->last->next = inst2;			pushand(inst1, inst2);			break;		}	}}static	Reprog*optimize(Reprog *pp){	Reinst *inst, *target;	int size;	Reprog *npp;	Reclass *cl;	int diff;	/*	 *  get rid of NOOP chains	 */	for(inst=pp->firstinst; inst->type!=END; inst++){		target = inst->next;		while(target->type == NOP)			target = target->next;		inst->next = target;	}	/*	 *  The original allocation is for an area larger than	 *  necessary.  Reallocate to the actual space used	 *  and then relocate the code.	 */	size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);	npp = realloc(pp, size);	if(npp==0 || npp==pp)		return pp;	diff = (char *)npp - (char *)pp;	freep = (Reinst *)((char *)freep + diff);	for(inst=npp->firstinst; inst<freep; inst++){		switch(inst->type){		case OR:		case STAR:		case PLUS:		case QUEST:			*(char **)&inst->right += diff;			break;		case CCLASS:		case NCCLASS:			*(char **)&inst->right += diff;			cl = inst->cp;			*(char **)&cl->end += diff;			break;		}		*(char **)&inst->left += diff;	}	*(char **)&npp->startinst += diff;	return npp;}#ifdef	DEBUGstatic	voiddumpstack(void){	Node *stk;	int *ip;	print("operators\n");	for(ip=atorstack; ip<atorp; ip++)		print("0%o\n", *ip);	print("operands\n");	for(stk=andstack; stk<andp; stk++)		print("0%o\t0%o\n", stk->first->type, stk->last->type);}static	voiddump(Reprog *pp){	Reinst *l;	Rune *p;	l = pp->firstinst;	do{		print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,			l->left-pp->firstinst, l->right-pp->firstinst);		if(l->type == RUNE)			print("\t%C\n", l->r);		else if(l->type == CCLASS || l->type == NCCLASS){			print("\t[");			if(l->type == NCCLASS)				print("^");			for(p = l->cp->spans; p < l->cp->end; p += 2)				if(p[0] == p[1])					print("%C", p[0]);				else					print("%C-%C", p[0], p[1]);			print("]\n");		} else			print("\n");	}while(l++->type);}#endifstatic	Reclass*newclass(void){	if(nclass >= NCLASS)		regerr2("too many character classes; limit", NCLASS+'0');	return &(classp[nclass++]);}static	intnextc(Rune *rp){	if(lexdone){		*rp = 0;		return 1;	}	exprp += chartorune(rp, exprp);	if(*rp == L'\\'){		exprp += chartorune(rp, exprp);		return 1;	}	if(*rp == 0)		lexdone = 1;	return 0;}static	intlex(int literal, int dot_type){	int quoted;	quoted = nextc(&yyrune);	if(literal || quoted){		if(yyrune == 0)			return END;		return RUNE;	}	switch(yyrune){	case 0:		return END;	case L'*':		return STAR;	case L'?':		return QUEST;	case L'+':		return PLUS;	case L'|':		return OR;	case L'.':		return dot_type;	case L'(':		return LBRA;	case L')':		return RBRA;	case L'^':		return BOL;	case L'$':		return EOL;	case L'[':		return bldcclass();	}	return RUNE;}static intbldcclass(void){	int type;	Rune r[NCCRUNE];	Rune *p, *ep, *np;	Rune rune;	int quoted;	/* we have already seen the '[' */	type = CCLASS;	yyclassp = newclass();	/* look ahead for negation */	/* SPECIAL CASE!!! negated classes don't match \n */	ep = r;	quoted = nextc(&rune);	if(!quoted && rune == L'^'){		type = NCCLASS;		quoted = nextc(&rune);		*ep++ = L'\n';		*ep++ = L'\n';	}	/* parse class into a set of spans */	for(; ep<&r[NCCRUNE];){		if(rune == 0){			rcerror("malformed '[]'");			return 0;		}		if(!quoted && rune == L']')			break;		if(!quoted && rune == L'-'){			if(ep == r){				rcerror("malformed '[]'");				return 0;			}			quoted = nextc(&rune);			if((!quoted && rune == L']') || rune == 0){				rcerror("malformed '[]'");				return 0;			}			*(ep-1) = rune;		} else {			*ep++ = rune;			*ep++ = rune;		}		quoted = nextc(&rune);	}	/* sort on span start */	for(p = r; p < ep; p += 2){		for(np = p; np < ep; np += 2)			if(*np < *p){				rune = np[0];				np[0] = p[0];				p[0] = rune;				rune = np[1];				np[1] = p[1];				p[1] = rune;			}	}	/* merge spans */	np = yyclassp->spans;	p = r;	if(r == ep)		yyclassp->end = np;	else {		np[0] = *p++;		np[1] = *p++;		for(; p < ep; p += 2)			if(p[0] <= np[1]){				if(p[1] > np[1])					np[1] = p[1];			} else {				np += 2;				np[0] = p[0];				np[1] = p[1];			}		yyclassp->end = np+2;	}	return type;}static	Reprog*regcomp1(char *s, int literal, int dot_type){	int token;	Reprog *pp;	/* get memory for the program */	pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));	if(pp == 0){		regerror("out of memory");		return 0;	}	freep = pp->firstinst;	classp = pp->class;	errors = 0;	if(setjmp(regkaboom))		goto out;	/* go compile the sucker */	lexdone = 0;	exprp = s;	nclass = 0;	nbra = 0;	atorp = atorstack;	andp = andstack;	subidp = subidstack;	lastwasand = FALSE;	cursubid = 0;	/* Start with a low priority operator to prime parser */	pushator(START-1);	while((token = lex(literal, dot_type)) != END){		if((token&0300) == OPERATOR)			operator(token);		else			operand(token);	}	/* Close with a low priority operator */	evaluntil(START);	/* Force END */	operand(END);	evaluntil(START);#ifdef DEBUG	dumpstack();#endif	if(nbra)		rcerror("unmatched left paren");	--andp;	/* points to first and only operand */	pp->startinst = andp->first;#ifdef DEBUG	dump(pp);#endif	pp = optimize(pp);#ifdef DEBUG	print("start: %d\n", andp->first-pp->firstinst);	dump(pp);#endifout:	if(errors){		free(pp);		pp = 0;	}	return pp;}extern	Reprog*regcomp(char *s){	return regcomp1(s, 0, ANY);}extern	Reprog*regcomplit(char *s){	return regcomp1(s, 1, ANY);}extern	Reprog*regcompnl(char *s){	return regcomp1(s, 0, ANYNL);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -