regc_lex.c

来自「tcl是工具命令语言」· C语言 代码 · 共 1,062 行 · 第 1/2 页

C
1,062
字号
			NOTE(REG_UUNSPEC);			RETV(PLAIN, c);		} else {			NOTE(REG_UBOUNDS);			INTOCON(L_EBND);			RET('{');		}		assert(NOTREACHED);		break;	case CHR('('):		/* parenthesis, or advanced extension */		if ((v->cflags&REG_ADVF) && NEXT1('?')) {			NOTE(REG_UNONPOSIX);			v->now++;			switch (*v->now++) {			case CHR(':'):		/* non-capturing paren */				RETV('(', 0);				break;			case CHR('#'):		/* comment */				while (!ATEOS() && *v->now != CHR(')'))					v->now++;				if (!ATEOS())					v->now++;				assert(v->nexttype == v->lasttype);				return next(v);				break;			case CHR('='):		/* positive lookahead */				NOTE(REG_ULOOKAHEAD);				RETV(LACON, 1);				break;			case CHR('!'):		/* negative lookahead */				NOTE(REG_ULOOKAHEAD);				RETV(LACON, 0);				break;			default:				FAILW(REG_BADRPT);				break;			}			assert(NOTREACHED);		}		if (v->cflags&REG_NOSUB)			RETV('(', 0);		/* all parens non-capturing */		else			RETV('(', 1);		break;	case CHR(')'):		if (LASTTYPE('(')) {			NOTE(REG_UUNSPEC);		}		RETV(')', c);		break;	case CHR('['):		/* easy except for [[:<:]] and [[:>:]] */		if (HAVE(6) &&	*(v->now+0) == CHR('[') &&				*(v->now+1) == CHR(':') &&				(*(v->now+2) == CHR('<') ||						*(v->now+2) == CHR('>')) &&				*(v->now+3) == CHR(':') &&				*(v->now+4) == CHR(']') &&				*(v->now+5) == CHR(']')) {			c = *(v->now+2);			v->now += 6;			NOTE(REG_UNONPOSIX);			RET((c == CHR('<')) ? '<' : '>');		}		INTOCON(L_BRACK);		if (NEXT1('^')) {			v->now++;			RETV('[', 0);		}		RETV('[', 1);		break;	case CHR('.'):		RET('.');		break;	case CHR('^'):		RET('^');		break;	case CHR('$'):		RET('$');		break;	case CHR('\\'):		/* mostly punt backslashes to code below */		if (ATEOS())			FAILW(REG_EESCAPE);		break;	default:		/* ordinary character */		RETV(PLAIN, c);		break;	}	/* ERE/ARE backslash handling; backslash already eaten */	assert(!ATEOS());	if (!(v->cflags&REG_ADVF)) {	/* only AREs have non-trivial escapes */		if (iscalnum(*v->now)) {			NOTE(REG_UBSALNUM);			NOTE(REG_UUNSPEC);		}		RETV(PLAIN, *v->now++);	}	(DISCARD)lexescape(v);	if (ISERR())		FAILW(REG_EESCAPE);	if (v->nexttype == CCLASS) {	/* fudge at lexical level */		switch (v->nextvalue) {		case 'd':	lexnest(v, backd, ENDOF(backd)); break;		case 'D':	lexnest(v, backD, ENDOF(backD)); break;		case 's':	lexnest(v, backs, ENDOF(backs)); break;		case 'S':	lexnest(v, backS, ENDOF(backS)); break;		case 'w':	lexnest(v, backw, ENDOF(backw)); break;		case 'W':	lexnest(v, backW, ENDOF(backW)); break;		default:			assert(NOTREACHED);			FAILW(REG_ASSERT);			break;		}		/* lexnest done, back up and try again */		v->nexttype = v->lasttype;		return next(v);	}	/* otherwise, lexescape has already done the work */	return !ISERR();}/* - lexescape - parse an ARE backslash escape (backslash already eaten) * Note slightly nonstandard use of the CCLASS type code. ^ static int lexescape(struct vars *); */static int			/* not actually used, but convenient for RETV */lexescape(v)struct vars *v;{	chr c;	static chr alert[] = {		CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t')	};	static chr esc[] = {		CHR('E'), CHR('S'), CHR('C')	};	chr *save;	assert(v->cflags&REG_ADVF);	assert(!ATEOS());	c = *v->now++;	if (!iscalnum(c))		RETV(PLAIN, c);	NOTE(REG_UNONPOSIX);	switch (c) {	case CHR('a'):		RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007')));		break;	case CHR('A'):		RETV(SBEGIN, 0);		break;	case CHR('b'):		RETV(PLAIN, CHR('\b'));		break;	case CHR('B'):		RETV(PLAIN, CHR('\\'));		break;	case CHR('c'):		NOTE(REG_UUNPORT);		if (ATEOS())			FAILW(REG_EESCAPE);		RETV(PLAIN, (chr)(*v->now++ & 037));		break;	case CHR('d'):		NOTE(REG_ULOCALE);		RETV(CCLASS, 'd');		break;	case CHR('D'):		NOTE(REG_ULOCALE);		RETV(CCLASS, 'D');		break;	case CHR('e'):		NOTE(REG_UUNPORT);		RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033')));		break;	case CHR('f'):		RETV(PLAIN, CHR('\f'));		break;	case CHR('m'):		RET('<');		break;	case CHR('M'):		RET('>');		break;	case CHR('n'):		RETV(PLAIN, CHR('\n'));		break;	case CHR('r'):		RETV(PLAIN, CHR('\r'));		break;	case CHR('s'):		NOTE(REG_ULOCALE);		RETV(CCLASS, 's');		break;	case CHR('S'):		NOTE(REG_ULOCALE);		RETV(CCLASS, 'S');		break;	case CHR('t'):		RETV(PLAIN, CHR('\t'));		break;	case CHR('u'):		c = lexdigits(v, 16, 4, 4);		if (ISERR())			FAILW(REG_EESCAPE);		RETV(PLAIN, c);		break;	case CHR('U'):		c = lexdigits(v, 16, 8, 8);		if (ISERR())			FAILW(REG_EESCAPE);		RETV(PLAIN, c);		break;	case CHR('v'):		RETV(PLAIN, CHR('\v'));		break;	case CHR('w'):		NOTE(REG_ULOCALE);		RETV(CCLASS, 'w');		break;	case CHR('W'):		NOTE(REG_ULOCALE);		RETV(CCLASS, 'W');		break;	case CHR('x'):		NOTE(REG_UUNPORT);		c = lexdigits(v, 16, 1, 255);	/* REs >255 long outside spec */		if (ISERR())			FAILW(REG_EESCAPE);		RETV(PLAIN, c);		break;	case CHR('y'):		NOTE(REG_ULOCALE);		RETV(WBDRY, 0);		break;	case CHR('Y'):		NOTE(REG_ULOCALE);		RETV(NWBDRY, 0);		break;	case CHR('Z'):		RETV(SEND, 0);		break;	case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):	case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):	case CHR('9'):		save = v->now;		v->now--;	/* put first digit back */		c = lexdigits(v, 10, 1, 255);	/* REs >255 long outside spec */		if (ISERR())			FAILW(REG_EESCAPE);		/* ugly heuristic (first test is "exactly 1 digit?") */		if (v->now - save == 0 || (int)c <= v->nsubexp) {			NOTE(REG_UBACKREF);			RETV(BACKREF, (chr)c);		}		/* oops, doesn't look like it's a backref after all... */		v->now = save;		/* and fall through into octal number */	case CHR('0'):		NOTE(REG_UUNPORT);		v->now--;	/* put first digit back */		c = lexdigits(v, 8, 1, 3);		if (ISERR())			FAILW(REG_EESCAPE);		RETV(PLAIN, c);		break;	default:		assert(iscalpha(c));		FAILW(REG_EESCAPE);	/* unknown alphabetic escape */		break;	}	assert(NOTREACHED);}/* - lexdigits - slurp up digits and return chr value ^ static chr lexdigits(struct vars *, int, int, int); */static chr			/* chr value; errors signalled via ERR */lexdigits(v, base, minlen, maxlen)struct vars *v;int base;int minlen;int maxlen;{	uchr n;			/* unsigned to avoid overflow misbehavior */	int len;	chr c;	int d;	CONST uchr ub = (uchr) base;	n = 0;	for (len = 0; len < maxlen && !ATEOS(); len++) {		c = *v->now++;		switch (c) {		case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):		case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):		case CHR('8'): case CHR('9'):			d = DIGITVAL(c);			break;		case CHR('a'): case CHR('A'): d = 10; break;		case CHR('b'): case CHR('B'): d = 11; break;		case CHR('c'): case CHR('C'): d = 12; break;		case CHR('d'): case CHR('D'): d = 13; break;		case CHR('e'): case CHR('E'): d = 14; break;		case CHR('f'): case CHR('F'): d = 15; break;		default:			v->now--;	/* oops, not a digit at all */			d = -1;			break;		}		if (d >= base) {	/* not a plausible digit */			v->now--;			d = -1;		}		if (d < 0)			break;		/* NOTE BREAK OUT */		n = n*ub + (uchr)d;	}	if (len < minlen)		ERR(REG_EESCAPE);	return (chr)n;}/* - brenext - get next BRE token * This is much like EREs except for all the stupid backslashes and the * context-dependency of some things. ^ static int brenext(struct vars *, pchr); */static int			/* 1 normal, 0 failure */brenext(v, pc)struct vars *v;pchr pc;{	chr c = (chr)pc;	switch (c) {	case CHR('*'):		if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^'))			RETV(PLAIN, c);		RET('*');		break;	case CHR('['):		if (HAVE(6) &&	*(v->now+0) == CHR('[') &&				*(v->now+1) == CHR(':') &&				(*(v->now+2) == CHR('<') ||						*(v->now+2) == CHR('>')) &&				*(v->now+3) == CHR(':') &&				*(v->now+4) == CHR(']') &&				*(v->now+5) == CHR(']')) {			c = *(v->now+2);			v->now += 6;			NOTE(REG_UNONPOSIX);			RET((c == CHR('<')) ? '<' : '>');		}		INTOCON(L_BRACK);		if (NEXT1('^')) {			v->now++;			RETV('[', 0);		}		RETV('[', 1);		break;	case CHR('.'):		RET('.');		break;	case CHR('^'):		if (LASTTYPE(EMPTY))			RET('^');		if (LASTTYPE('(')) {			NOTE(REG_UUNSPEC);			RET('^');		}		RETV(PLAIN, c);		break;	case CHR('$'):		if (v->cflags&REG_EXPANDED)			skip(v);		if (ATEOS())			RET('$');		if (NEXT2('\\', ')')) {			NOTE(REG_UUNSPEC);			RET('$');		}		RETV(PLAIN, c);		break;	case CHR('\\'):		break;		/* see below */	default:		RETV(PLAIN, c);		break;	}	assert(c == CHR('\\'));	if (ATEOS())		FAILW(REG_EESCAPE);	c = *v->now++;	switch (c) {	case CHR('{'):		INTOCON(L_BBND);		NOTE(REG_UBOUNDS);		RET('{');		break;	case CHR('('):		RETV('(', 1);		break;	case CHR(')'):		RETV(')', c);		break;	case CHR('<'):		NOTE(REG_UNONPOSIX);		RET('<');		break;	case CHR('>'):		NOTE(REG_UNONPOSIX);		RET('>');		break;	case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):	case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):	case CHR('9'):		NOTE(REG_UBACKREF);		RETV(BACKREF, (chr)DIGITVAL(c));		break;	default:		if (iscalnum(c)) {			NOTE(REG_UBSALNUM);			NOTE(REG_UUNSPEC);		}		RETV(PLAIN, c);		break;	}	assert(NOTREACHED);}/* - skip - skip white space and comments in expanded form ^ static VOID skip(struct vars *); */static VOIDskip(v)struct vars *v;{	chr *start = v->now;	assert(v->cflags&REG_EXPANDED);	for (;;) {		while (!ATEOS() && iscspace(*v->now))			v->now++;		if (ATEOS() || *v->now != CHR('#'))			break;				/* NOTE BREAK OUT */		assert(NEXT1('#'));		while (!ATEOS() && *v->now != CHR('\n'))			v->now++;		/* leave the newline to be picked up by the iscspace loop */	}	if (v->now != start)		NOTE(REG_UNONPOSIX);}/* - newline - return the chr for a newline * This helps confine use of CHR to this source file. ^ static chr newline(NOPARMS); */static chrnewline(){	return CHR('\n');}/* - ch - return the chr sequence for regc_locale.c's fake collating element ch * This helps confine use of CHR to this source file.  Beware that the caller * knows how long the sequence is. ^ #ifdef REG_DEBUG ^ static chr *ch(NOPARMS); ^ #endif */#ifdef REG_DEBUGstatic chr *ch(){	static chr chstr[] = { CHR('c'), CHR('h'), CHR('\0') };	return chstr;}#endif/* - chrnamed - return the chr known by a given (chr string) name * The code is a bit clumsy, but this routine gets only such specialized * use that it hardly matters. ^ static chr chrnamed(struct vars *, chr *, chr *, pchr); */static chrchrnamed(v, startp, endp, lastresort)struct vars *v;chr *startp;			/* start of name */chr *endp;			/* just past end of name */pchr lastresort;		/* what to return if name lookup fails */{	celt c;	int errsave;	int e;	struct cvec *cv;	errsave = v->err;	v->err = 0;	c = element(v, startp, endp);	e = v->err;	v->err = errsave;	if (e != 0)		return (chr)lastresort;	cv = range(v, c, c, 0);	if (cv->nchrs == 0)		return (chr)lastresort;	return cv->chrs[0];}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?