⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lex.c

📁 unix环境下实现的cmm语言编译器
💻 C
字号:
#include "cmm.h"int t;			/*当前的标记*/char *token;	/*指向标示符*/Coordinate src; /*标记的位置 */Symbol tsym;	/*指向临时符号存储的指针*/static struct symbol tval; /*用于存储临时的符号*/	enum {BLANK=01,NEWLINE=02,LETTER=04,	  DIGIT=010,OTHER=020};static unsigned char map[256] = {	/* 000 null	*/ 	0,	/* 001 soh	*/ 	0,	/* 002 stx	*/ 	0,	/* 003 etx	*/ 	0,	/* 004 eot	*/ 	0,	/* 005 enq	*/ 	0,	/* 006 ack	*/ 	0,	/* 007 bel	*/ 	0,	/* 010 bs	*/ 	0,	/* 011 ht	*/ 	BLANK,	/* 012 lf	*/ 	NEWLINE,	/* 013 vt	*/ 	BLANK,	/* 014 ff	*/ 	BLANK,	/* 015 cr	*/ 	0,	/* 016 so	*/ 	0, 	/* 017 si	*/ 	0,	/* 020 dle	*/ 	0,	/* 021 dc1	*/ 	0,	/* 022 dc2	*/ 	0,	/* 023 dc3	*/ 	0,	/* 024 dc4	*/ 	0,	/* 025 nak	*/ 	0,	/* 026 syn	*/ 	0,	/* 027 etb	*/ 	0,	/* 030 can	*/ 	0,	/* 031 em 	*/ 	0,	/* 032 sub	*/ 	0,	/* 033 esc	*/ 	0,	/* 034 fs 	*/ 	0,	/* 035 gs 	*/ 	0,	/* 036 rs	*/ 	0,	/* 037 us	*/ 	0,	/* 040 sp	*/ 	BLANK,	/* 041 !	*/ 	OTHER,	/* 042 "	*/ 	OTHER,	/* 043 #	*/ 	0,	/* 044 $	*/ 	0,	/* 045 %	*/ 	OTHER,	/* 046 &	*/ 	OTHER,	/* 047 '	*/ 	OTHER,	/* 050 (	*/ 	OTHER,	/* 051 )	*/ 	OTHER,	/* 052 *	*/ 	OTHER,	/* 053 +	*/ 	OTHER,	/* 054 ,	*/ 	OTHER,	/* 055 -	*/ 	OTHER,	/* 056 .	*/ 	0,	/* 057 /	*/ 	OTHER,	/* 060 0	*/ 	DIGIT,	/* 061 1	*/ 	DIGIT,	/* 062 2	*/ 	DIGIT,	/* 063 3	*/ 	DIGIT,	/* 064 4	*/ 	DIGIT,	/* 065 5	*/ 	DIGIT,	/* 066 6	*/ 	DIGIT,	/* 067 7	*/ 	DIGIT,	/* 070 8	*/ 	DIGIT,	/* 071 9	*/ 	DIGIT,	/* 072 :	*/ 	0,	/* 073 ;	*/	OTHER,	/* 074 <   */	OTHER,	/* 075 =   */	OTHER,	/* 076 >   */	OTHER,	/* 077 ?   */	0,	/* 100 @   */	0,	/* 101 A   */	LETTER,	/* 102 B   */	LETTER,	/* 103 C   */	LETTER,	/* 104 D   */	LETTER,	/* 105 E   */	LETTER,	/* 106 F   */	LETTER,	/* 107 G   */	LETTER,	/* 110 H   */	LETTER,	/* 111 I   */	LETTER,	/* 112 J   */	LETTER,	/* 113 K   */	LETTER,	/* 114 L   */	LETTER,	/* 115 M   */	LETTER,	/* 116 N   */	LETTER,	/* 117 O   */	LETTER,	/* 120 P   */	LETTER,	/* 121 Q   */	LETTER,	/* 122 R   */	LETTER,	/* 123 S   */	LETTER,	/* 124 T   */	LETTER,	/* 125 U   */	LETTER,	/* 126 V   */	LETTER,	/* 127 W   */	LETTER,	/* 130 X   */	LETTER,	/* 131 Y   */	LETTER,	/* 132 Z   */	LETTER,	/* 133 [   */	OTHER,	/* 134 \   */	0,	/* 135 ]   */	OTHER,	/* 136 ^   */	0,	/* 137 _   */	LETTER,	/* 140 `   */	0,	/* 141 a   */	LETTER,	/* 142 b   */	LETTER,	/* 143 c   */	LETTER,	/* 144 d   */	LETTER,	/* 145 e   */	LETTER,	/* 146 f   */	LETTER,	/* 147 g   */	LETTER,	/* 150 h   */	LETTER,	/* 151 i   */	LETTER,	/* 152 j   */	LETTER,	/* 153 k   */	LETTER,	/* 154 l   */	LETTER,	/* 155 m   */	LETTER,	/* 156 n   */	LETTER,	/* 157 o   */	LETTER,	/* 160 p   */	LETTER,	/* 161 q   */	LETTER,	/* 162 r   */	LETTER,	/* 163 s   */	LETTER,	/* 164 t   */	LETTER,	/* 165 u   */	LETTER,	/* 166 v   */	LETTER,	/* 167 w   */	LETTER,	/* 170 x   */	LETTER,	/* 171 y   */	LETTER,	/* 172 z   */	LETTER,	/* 173 {   */	OTHER,	/* 174 |   */	0,	/* 175 }   */	OTHER,	/* 175 ~   */	0,};static  char *escape(char *p ,unsigned char *rcp){	switch (*rcp) {	case 'n':		/*LF*/		*p = 10;		break;	case 't':		/*HT*/		*p = 9;		break;	case 'b':		/*BS*/		*p = 8;		break;	case 'r':		/*CR*/		*p = 13;		break;	case 'f':		/*FF*/		*p = 12;		break;	case 'v':		/*VT*/		*p = 11;		break;	case '\\':		/*'\'*/		*p = 92;		break;	case '\'':		*p = 39;		break;	case '\"':		*p = 34;		break;	case 'a':		/*BEL*/		*p = 7;		break;	case '?':		*p = 63;		break;	default:		*p = (char)*rcp;		warning("unknown escape character: '\\%c'\n", *rcp);		break;	}	return p + 1;}int gettoken(void){	for(;;) {		register unsigned char *rcp = cp;		while(map[*rcp]&BLANK)			rcp++;		src.file = file;		src.x = (char *)rcp - line + 1;		src.y = lineno + 1;		cp = rcp + 1;		switch(*rcp++){		case '/': if (*rcp == '*') {				int c = 0;			  	for (rcp++; *rcp != '/' || c != '*'; )			  		if (map[*rcp]&NEWLINE) {			  			if (rcp < limit)			  				c = *rcp;			  			cp = rcp + 1;			  			nexline();			  			rcp = cp;			  			if (rcp == limit)			  				break;			  		} else			  			c = *rcp++;				if (rcp < limit)					rcp++;				else					error("unclosed comment\n");				cp = rcp;				continue;			}			return '/';		case '\n': case '\v':case '\r':case '\f':			cp = rcp ;			nexline();			rcp = cp;			if(rcp >= limit) {					tsym = NULL;				return EOI;			}			continue;		case '+':	case '-':	case '*':	case ';':		case '%':	case ',':	case '(':	case ')':			case '{':	case '}':	case '[':	case ']':		case '&':			return rcp[-1];		case '!':	return *rcp == '=' ? cp++,NEQ: '!';		case '=':	return *rcp == '=' ? cp++,EQL: '=';		case '<':	return *rcp == '=' ? cp++,LEQ: '<';		case '>':	return *rcp == '=' ? cp++,GEQ: '>';		case 'b': if(rcp[0] == 'r'			&& rcp[1] == 'e'			&& rcp[2] == 'a'			&& rcp[3] == 'k'			&& !(map[rcp[4]]&(DIGIT|LETTER))) {				cp = rcp + 4;				return  BREAK;			}			goto id;		case 'c': if(rcp[0] == 'h'			&& rcp[1] == 'a'			&& rcp[2] == 'r'			&& !(map[rcp[3]] &(DIGIT|LETTER))) {				cp = rcp + 3;				return CHAR;			}			if(rcp[0] == 'o'			&& rcp[1] == 'n'			&& rcp[2] == 't'			&& rcp[3] == 'i'			&& rcp[4] == 'n'			&& rcp[5] == 'u'			&& rcp[6] == 'e'			&& !(map[rcp[7]] &(DIGIT|LETTER))) {				cp = rcp + 7;				return CONTINUE;			}			goto id;		case 'e': if(rcp[0] == 'l'			&& rcp[1] == 's'			&& rcp[2] == 'e'			&& !(map[rcp[3]] &(DIGIT|LETTER))) {				cp = rcp + 3;				return ELSE;			}			goto id;		case 'i': if(rcp[0] == 'f'			&& !(map[rcp[1]] &(DIGIT|LETTER))) {				cp = rcp + 1;				return IF;			}			if(rcp[0] == 'n'			&& rcp[1] == 't'			&& !(map[rcp[2]] &(DIGIT|LETTER))) {				cp = rcp + 2;				return INT;			}			goto id;		case 'r': if(rcp[0] == 'e'			&& rcp[1] == 't'			&& rcp[2] == 'u'			&& rcp[3] == 'r'			&& rcp[4] == 'n'			&& !(map[rcp[5]] &(DIGIT|LETTER))) {				cp = rcp + 5;				return RETURN;			}			goto id;		case 'v': if(rcp[0] == 'o'			&& rcp[1] == 'i'			&& rcp[2] == 'd'			&& !(map[rcp[3]] &(DIGIT|LETTER))) {				cp = rcp + 3;				return VOID;			}			goto id;		case 'w': if(rcp[0] == 'h'			&& rcp[1] == 'i'			&& rcp[2] == 'l'			&& rcp[3] == 'e'			&& !(map[rcp[4]] &(DIGIT|LETTER))) {				cp = rcp + 4;				return WHILE;			}			goto id;		case 's': if (rcp[0] == 'i'			&& rcp[1] == 'z'			&& rcp[2] == 'e'			&& rcp[3] == 'o'			&& rcp[4] == 'f'			&& !(map[rcp[5]] &(DIGIT|LETTER))) {				cp = rcp + 5;				return SIZEOF;			}			goto id;		case 'a':	case 'd':	case 'f':	case 'g':		case 'h':	case 'j':	case 'k':	case 'l':		case 'm':	case 'n':	case 'o':	case 'p':		case 'q':	case 't':	case 'u':		case 'x':	case 'y':	case 'z':		case 'A':	case 'B':	case 'C':	case 'D':		case 'E':	case 'F':	case 'G':	case 'H':		case 'I':	case 'J':	case 'K':	case 'L':		case 'M':	case 'N':	case 'O':	case 'P':		case 'Q':	case 'R':	case 'S':	case 'T':		case 'U':	case 'V':	case 'W':	case 'X':		case 'Y':	case 'Z':	case '_':		id: {			token = (char *)rcp - 1;			while(map[*rcp]&(DIGIT | LETTER))				rcp++;			cp = rcp;			token = string(token,(char *)rcp - token);			return ID;			}		case '0':	case '1':	case '2':	case '3':		case '4':	case '5':	case '6':	case '7':		case '8':	case '9':		{	int n = 0;			rcp--;			while(map[*rcp]&DIGIT){				n = n*10+(*rcp-'0');				rcp++;			}			cp = rcp;			tsym = &tval;			tsym->name = token = stringd(n);			tsym->type = inttype;			tsym->u.c.v.i = n;			return ICON;		}		case '\'':{ char temp; /*字符常量*/			token = &temp;			if ('\\' == *rcp) 				escape(token, ++rcp);			else				temp = (char)*rcp;			tsym = &tval;			tsym->name = token = string(&temp,1);			tsym->type = chartype;			tsym->u.c.v.c = temp;			if(*++rcp != '\'')				error("missing symbol '\n");			else				rcp++;			cp = rcp;		 }return CCON;		case '"': {/*字符串常量*/			char buffer[512];			int n;			token = buffer;			for (; *rcp != '"' && token < buffer + sizeof buffer; rcp++) {				if(*rcp == '\n'||*rcp == '\v'||*rcp == '\r'||*rcp == '\f') {					if(rcp == limit) {						error("missing symbol '\"'\n");						break;					}					*token++ = *rcp; 					lineno++;				}else if ('\\' == *rcp) /*处理转义字符*/					token = escape(token, ++rcp);				else 					*token++ = *rcp; 			}			n = token - buffer;			token = string(buffer,n);			tsym = &tval;			tsym->u.c.v.p = token;			tsym->type = array(chartype, n, 0);			if(*rcp == '"') rcp++;			cp = rcp;		 }return SCON;		default:			if(map[rcp[-1]] == 0) {				if(rcp[-1] < ' ' || rcp[-1] >= 0177)					error("illegal character '\\0%d'\n",rcp[-1]);				else					error("illegal character '%c'\n", rcp[-1]);			}			}					}}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -