📄 lex.c
字号:
#include "cmm.h"int t; /*当前的标记*/char *token; /*指向标示符*/Coordinate src; /*标记的位置 */Symbol tsym; /*指向临时符号存储的指针*/static struct symbol tval; /*用于存储临时的符号*/ enum {BLANK=01,NEWLINE=02,LETTER=04, DIGIT=010,OTHER=020};static unsigned char map[256] = { /* 000 null */ 0, /* 001 soh */ 0, /* 002 stx */ 0, /* 003 etx */ 0, /* 004 eot */ 0, /* 005 enq */ 0, /* 006 ack */ 0, /* 007 bel */ 0, /* 010 bs */ 0, /* 011 ht */ BLANK, /* 012 lf */ NEWLINE, /* 013 vt */ BLANK, /* 014 ff */ BLANK, /* 015 cr */ 0, /* 016 so */ 0, /* 017 si */ 0, /* 020 dle */ 0, /* 021 dc1 */ 0, /* 022 dc2 */ 0, /* 023 dc3 */ 0, /* 024 dc4 */ 0, /* 025 nak */ 0, /* 026 syn */ 0, /* 027 etb */ 0, /* 030 can */ 0, /* 031 em */ 0, /* 032 sub */ 0, /* 033 esc */ 0, /* 034 fs */ 0, /* 035 gs */ 0, /* 036 rs */ 0, /* 037 us */ 0, /* 040 sp */ BLANK, /* 041 ! */ OTHER, /* 042 " */ OTHER, /* 043 # */ 0, /* 044 $ */ 0, /* 045 % */ OTHER, /* 046 & */ OTHER, /* 047 ' */ OTHER, /* 050 ( */ OTHER, /* 051 ) */ OTHER, /* 052 * */ OTHER, /* 053 + */ OTHER, /* 054 , */ OTHER, /* 055 - */ OTHER, /* 056 . */ 0, /* 057 / */ OTHER, /* 060 0 */ DIGIT, /* 061 1 */ DIGIT, /* 062 2 */ DIGIT, /* 063 3 */ DIGIT, /* 064 4 */ DIGIT, /* 065 5 */ DIGIT, /* 066 6 */ DIGIT, /* 067 7 */ DIGIT, /* 070 8 */ DIGIT, /* 071 9 */ DIGIT, /* 072 : */ 0, /* 073 ; */ OTHER, /* 074 < */ OTHER, /* 075 = */ OTHER, /* 076 > */ OTHER, /* 077 ? */ 0, /* 100 @ */ 0, /* 101 A */ LETTER, /* 102 B */ LETTER, /* 103 C */ LETTER, /* 104 D */ LETTER, /* 105 E */ LETTER, /* 106 F */ LETTER, /* 107 G */ LETTER, /* 110 H */ LETTER, /* 111 I */ LETTER, /* 112 J */ LETTER, /* 113 K */ LETTER, /* 114 L */ LETTER, /* 115 M */ LETTER, /* 116 N */ LETTER, /* 117 O */ LETTER, /* 120 P */ LETTER, /* 121 Q */ LETTER, /* 122 R */ LETTER, /* 123 S */ LETTER, /* 124 T */ LETTER, /* 125 U */ LETTER, /* 126 V */ LETTER, /* 127 W */ LETTER, /* 130 X */ LETTER, /* 131 Y */ LETTER, /* 132 Z */ LETTER, /* 133 [ */ OTHER, /* 134 \ */ 0, /* 135 ] */ OTHER, /* 136 ^ */ 0, /* 137 _ */ LETTER, /* 140 ` */ 0, /* 141 a */ LETTER, /* 142 b */ LETTER, /* 143 c */ LETTER, /* 144 d */ LETTER, /* 145 e */ LETTER, /* 146 f */ LETTER, /* 147 g */ LETTER, /* 150 h */ LETTER, /* 151 i */ LETTER, /* 152 j */ LETTER, /* 153 k */ LETTER, /* 154 l */ LETTER, /* 155 m */ LETTER, /* 156 n */ LETTER, /* 157 o */ LETTER, /* 160 p */ LETTER, /* 161 q */ LETTER, /* 162 r */ LETTER, /* 163 s */ LETTER, /* 164 t */ LETTER, /* 165 u */ LETTER, /* 166 v */ LETTER, /* 167 w */ LETTER, /* 170 x */ LETTER, /* 171 y */ LETTER, /* 172 z */ LETTER, /* 173 { */ OTHER, /* 174 | */ 0, /* 175 } */ OTHER, /* 175 ~ */ 0,};static char *escape(char *p ,unsigned char *rcp){ switch (*rcp) { case 'n': /*LF*/ *p = 10; break; case 't': /*HT*/ *p = 9; break; case 'b': /*BS*/ *p = 8; break; case 'r': /*CR*/ *p = 13; break; case 'f': /*FF*/ *p = 12; break; case 'v': /*VT*/ *p = 11; break; case '\\': /*'\'*/ *p = 92; break; case '\'': *p = 39; break; case '\"': *p = 34; break; case 'a': /*BEL*/ *p = 7; break; case '?': *p = 63; break; default: *p = (char)*rcp; warning("unknown escape character: '\\%c'\n", *rcp); break; } return p + 1;}int gettoken(void){ for(;;) { register unsigned char *rcp = cp; while(map[*rcp]&BLANK) rcp++; src.file = file; src.x = (char *)rcp - line + 1; src.y = lineno + 1; cp = rcp + 1; switch(*rcp++){ case '/': if (*rcp == '*') { int c = 0; for (rcp++; *rcp != '/' || c != '*'; ) if (map[*rcp]&NEWLINE) { if (rcp < limit) c = *rcp; cp = rcp + 1; nexline(); rcp = cp; if (rcp == limit) break; } else c = *rcp++; if (rcp < limit) rcp++; else error("unclosed comment\n"); cp = rcp; continue; } return '/'; case '\n': case '\v':case '\r':case '\f': cp = rcp ; nexline(); rcp = cp; if(rcp >= limit) { tsym = NULL; return EOI; } continue; case '+': case '-': case '*': case ';': case '%': case ',': case '(': case ')': case '{': case '}': case '[': case ']': case '&': return rcp[-1]; case '!': return *rcp == '=' ? cp++,NEQ: '!'; case '=': return *rcp == '=' ? cp++,EQL: '='; case '<': return *rcp == '=' ? cp++,LEQ: '<'; case '>': return *rcp == '=' ? cp++,GEQ: '>'; case 'b': if(rcp[0] == 'r' && rcp[1] == 'e' && rcp[2] == 'a' && rcp[3] == 'k' && !(map[rcp[4]]&(DIGIT|LETTER))) { cp = rcp + 4; return BREAK; } goto id; case 'c': if(rcp[0] == 'h' && rcp[1] == 'a' && rcp[2] == 'r' && !(map[rcp[3]] &(DIGIT|LETTER))) { cp = rcp + 3; return CHAR; } if(rcp[0] == 'o' && rcp[1] == 'n' && rcp[2] == 't' && rcp[3] == 'i' && rcp[4] == 'n' && rcp[5] == 'u' && rcp[6] == 'e' && !(map[rcp[7]] &(DIGIT|LETTER))) { cp = rcp + 7; return CONTINUE; } goto id; case 'e': if(rcp[0] == 'l' && rcp[1] == 's' && rcp[2] == 'e' && !(map[rcp[3]] &(DIGIT|LETTER))) { cp = rcp + 3; return ELSE; } goto id; case 'i': if(rcp[0] == 'f' && !(map[rcp[1]] &(DIGIT|LETTER))) { cp = rcp + 1; return IF; } if(rcp[0] == 'n' && rcp[1] == 't' && !(map[rcp[2]] &(DIGIT|LETTER))) { cp = rcp + 2; return INT; } goto id; case 'r': if(rcp[0] == 'e' && rcp[1] == 't' && rcp[2] == 'u' && rcp[3] == 'r' && rcp[4] == 'n' && !(map[rcp[5]] &(DIGIT|LETTER))) { cp = rcp + 5; return RETURN; } goto id; case 'v': if(rcp[0] == 'o' && rcp[1] == 'i' && rcp[2] == 'd' && !(map[rcp[3]] &(DIGIT|LETTER))) { cp = rcp + 3; return VOID; } goto id; case 'w': if(rcp[0] == 'h' && rcp[1] == 'i' && rcp[2] == 'l' && rcp[3] == 'e' && !(map[rcp[4]] &(DIGIT|LETTER))) { cp = rcp + 4; return WHILE; } goto id; case 's': if (rcp[0] == 'i' && rcp[1] == 'z' && rcp[2] == 'e' && rcp[3] == 'o' && rcp[4] == 'f' && !(map[rcp[5]] &(DIGIT|LETTER))) { cp = rcp + 5; return SIZEOF; } goto id; case 'a': case 'd': case 'f': case 'g': case 'h': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 't': case 'u': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_': id: { token = (char *)rcp - 1; while(map[*rcp]&(DIGIT | LETTER)) rcp++; cp = rcp; token = string(token,(char *)rcp - token); return ID; } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { int n = 0; rcp--; while(map[*rcp]&DIGIT){ n = n*10+(*rcp-'0'); rcp++; } cp = rcp; tsym = &tval; tsym->name = token = stringd(n); tsym->type = inttype; tsym->u.c.v.i = n; return ICON; } case '\'':{ char temp; /*字符常量*/ token = &temp; if ('\\' == *rcp) escape(token, ++rcp); else temp = (char)*rcp; tsym = &tval; tsym->name = token = string(&temp,1); tsym->type = chartype; tsym->u.c.v.c = temp; if(*++rcp != '\'') error("missing symbol '\n"); else rcp++; cp = rcp; }return CCON; case '"': {/*字符串常量*/ char buffer[512]; int n; token = buffer; for (; *rcp != '"' && token < buffer + sizeof buffer; rcp++) { if(*rcp == '\n'||*rcp == '\v'||*rcp == '\r'||*rcp == '\f') { if(rcp == limit) { error("missing symbol '\"'\n"); break; } *token++ = *rcp; lineno++; }else if ('\\' == *rcp) /*处理转义字符*/ token = escape(token, ++rcp); else *token++ = *rcp; } n = token - buffer; token = string(buffer,n); tsym = &tval; tsym->u.c.v.p = token; tsym->type = array(chartype, n, 0); if(*rcp == '"') rcp++; cp = rcp; }return SCON; default: if(map[rcp[-1]] == 0) { if(rcp[-1] < ' ' || rcp[-1] >= 0177) error("illegal character '\\0%d'\n",rcp[-1]); else error("illegal character '%c'\n", rcp[-1]); } } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -