📄 lexer.c
字号:
/* lexer.c * * (C) Copyright Apr 15 1995, Edmond J. Breen. * ALL RIGHTS RESERVED. * This code may be copied for personal, non-profit use only. * * * History: * First written V1.0 1984 (E.J.B) * Revised V1.2 1994 (E.J.B) * Revised V2.0 1995 (E.J.B) * */ #include <stdio.h>#include <stdlib.h>#include <limits.h>#include <string.h>#include <math.h>#include <float.h>#include "global.h"#include "xalloc.h"#include "error.h"#include "symbol.h"#include "lexer.h"#include "preproc.h"static void retract(char c);static int fail(int ival, int c);static int charliteral(int c);static void EiC_stringliteral(void);static void success(int ival);char nextchar(void);char *nextproline(void);void retract(char c);int fail(int ival, int c);int charliteral(int c);void stringliteral(void);void success(int ival);#define NEWLINE(ch) (ch == '\n' ? TRUE: FALSE)#define LETTER(ch) ((ch >= 'a' && ch <= 'z') ? TRUE :\ ((ch >= 'A' && ch <= 'Z') ? TRUE :\ ((ch == '_' || ch == '$')? TRUE : FALSE)))#define DIGIT(ch) ((ch >= '0' && ch <= '9') ? TRUE : FALSE)#define WHITE(ch) ((ch == ' ' || ch == '\t')? TRUE : FALSE)keyword_t cwords[] ={ {"__eiclongjmp",eiclongjmpsym,}, {"__eicsetjmp",eicsetjmpsym,}, {"auto", autosym,}, {"break", breaksym,}, {"case", casesym,}, {"char", charsym,}, {"const", constsym,}, {"continue", continuesym,}, {"default", defaultsym,}, {"do", dosym,}, {"double", doublesym,}, {"else", elsesym,}, {"enum", enumsym,}, {"extern", externsym,}, {"float", floatsym,}, {"for", forsym,}, {"goto",gotosym,}, {"if", ifsym,}, {"int", intsym,}, {"long", longsym,}, {"register", registersym,}, {"return", returnsym,}, {"safe", safesym,}, {"short", shortsym,}, {"signed", signedsym,}, {"sizeof", sizeofsym,}, {"static", staticsym,}, {"struct", structsym,}, {"switch", switchsym,}, {"typedef", typedefsym,}, {"union", unionsym,}, {"unsafe", unsafesym,}, {"unsigned", unsignedsym,}, {"void", voidsym,}, {"volatile", volatilesym,}, {"while", whilesym,},};#define NOTOKEN 0char EiC_LEXEM[BSIZE+1];static token_t TOK;token_t *token = &TOK;#ifdef ILOOKAHEADtoken_t EiC_TokenArray[MAX_TOKENS];int EiC_TokenI = 0, EiC_TokenP = 0, EiC_TokenR=0;#elseunsigned short STOKEN;#endifunsigned lex_lineno, lex_lineprev, lex_linepos, lex_linelen, lex_curpos = 0, lex_lastpos;char *lex_buff;static int state = 0;static unsigned long lexival;static double lexfval;static char Lseen, /* long seen */ Fseen, /* floating point seen */ Useen, /* Unsigned specifier seen */ Hseen; /* Hex or Octal value seen */#define getoct(x) (((x)>='0'&&(x)<='7')? (x)-'0':-1)static int gethex(int c){ if (c >= '0' && c <= '9') return c - '0'; if (c >= 'a' && c <= 'f') return c - 'a' + 10; if (c >= 'A' && c <= 'F') return c - 'A' + 10; return -1;}void EiC_initlex(char *str){ lex_buff = str; lex_curpos = 0; lex_lineprev = 0; lex_lineno = 1;#ifdef ILOOKAHEAD EiC_TokenR = EiC_TokenP = EiC_TokenI = 0; #else STOKEN = NOTOKEN;#endif }char EiC_nextchar(){ char *EiC_nextproline(); if (lex_buff[lex_curpos] == EOF) { return 0; } else if (lex_buff[lex_curpos] == '\0') { if (lex_curpos != lex_lastpos) return 0; lex_buff = EiC_nextproline(); lex_lastpos = lex_curpos = 0; lex_lineprev = 0; lex_lineno = 1; } if (lex_lineprev != lex_lineno) { lex_linepos = lex_curpos; lex_lineprev = lex_lineno; } return (lex_buff[lex_curpos++]);}static void checkExt(int c){ /* check for unsigned and long suffix */ Lseen = Useen = Fseen = 0; if (c == 'f' || c == 'F') Fseen = 1; else if (c == 'u' || c == 'U') { Useen = 1; if ((c = EiC_nextchar()) == 'l' || c == 'L') Lseen = 1; else retract(c); } else if (c == 'l' || c == 'L') Lseen = 1; else retract(c);}static void retract(char c){ if (c != '\0') { lex_curpos--; if (lex_curpos < lex_lastpos) lex_lastpos = lex_curpos; }}static int fail(int ival, int c){ retract(c); switch (ival) { case RELOP: return (10); case ID: return (20); case FLOAT: case INT: return (100); } return 0;}static void setfloatval(void){ if (Lseen || lexfval > FLT_MAX || !Fseen ) { token->Val.dval = lexfval; token->Tok = DOUBLE; } else { /* mandatory conversion to float */ float f = lexfval; token->Val.dval = f; token->Tok = FLOAT; }}static void setintval(void){ if (Fseen) { lexfval = lexival; setfloatval(); return; } if (Useen) { if (Lseen || lexival > UINT_MAX) { token->Tok = ULONG; token->Val.ulval = lexival; } else { token->Tok = UINT; token->Val.uival = (unsigned) lexival; } } else if (Lseen || lexival > UINT_MAX) { if (lexival > ULONG_MAX) { token->Tok = ULONG; token->Val.ulval = lexival; } else if (lexival >= ULONG_MAX) { token->Tok = ULONG; token->Val.ulval = lexival; } else { token->Val.lval = lexival; token->Tok = LONG; } } else { if (lexival <= INT_MAX) { token->Val.ival = (int) lexival; token->Tok = INT; } else if(Hseen && lexival <= UINT_MAX) { token->Tok = UINT; token->Val.uival = (unsigned) lexival; } else if(lexival <= LONG_MAX) { token->Val.lval = (long) lexival; token->Tok = LONG; } else { token->Tok = ULONG; token->Val.ulval = lexival; } }}static void success(int ival){ int i, size; size = (int) (lex_curpos - lex_lastpos); memcpy(EiC_LEXEM, &lex_buff[lex_lastpos], size); EiC_LEXEM[size] = '\0'; if (Lseen) size--; if (Useen) size--; if (Fseen) size--; Hseen = 0; switch (ival) { case ID: if ((token->Tok = EiC_iskeyword(cwords, EiC_LEXEM, sizeof(cwords) / sizeof(keyword_t))) == 0) { token->Tok = ID; /* search for id in various name spaces */ if ((token->Val.sym = EiC_lookup(EiC_work_tab, EiC_LEXEM)) == NULL) token->Val.sym = EiC_insertLUT(EiC_work_tab, EiC_LEXEM, ID); if (token->Val.sym) if (token->Val.sym->sclass == c_typedef) token->Tok = TYPENAME; } break; case OCTAL: if (Fseen) EiC_error("Declaration syntax error"); for (lexival = 0, i = 0; i < size; i++) lexival = lexival * 8 + getoct(EiC_LEXEM[i]); Hseen = 1; setintval(); break; case HEX: for (lexival = 0, i = 2; i < size; i++) lexival = lexival * 16 + gethex(EiC_LEXEM[i]); Hseen = 1; setintval(); break; case INT: for (lexival = 0, i = 0; i < size; i++) lexival = lexival * 10 + EiC_LEXEM[i] - '0'; setintval(); break; case FLOAT: if (Useen) EiC_error("Declaration syntax error"); lexfval = atof(EiC_LEXEM); setfloatval(); break; case RELOP: case MISC: break; }}static int WASLITERAL;static int charliteral(int c){ if (c == '\\') { switch ((c = EiC_nextchar())) { case 'n': c = '\n'; break; /* newline */ case 't': c = '\t'; break; /* tabspace */ case 'v': c = '\v'; break; /* vertical tab */ case 'b': c = '\b'; break; /* backspace */ case 'r': c = '\r'; break; /* carriage return */ case 'f': c = '\f'; break; /* formfeed */ case 'a': c = '\a'; break; /* bell */ case '\\': c = '\\'; break; /* backslash */ case '\'': c = '\''; break; /* single quote */ case '"': c = '\"'; break; /* double quote */ case '?': c = '\?'; break; /* question mark */ case 'x': /* string of hex characters */ case 'X':{ int i, val = 0; while ((i = gethex((c = EiC_nextchar()))) > -1) { val = val * 16 + i; } retract(c); if (val > 255) EiC_error("Illegal character hex value"); c = val; } break; default: if (getoct(c) > -1) { /* octal characters */ int i, val = 0; while ((i = getoct(c)) > -1) { val = val * 8 + i; c = EiC_nextchar(); } retract(c); if (val > 255) EiC_error("Illegal character octal value"); c = val; } else EiC_error("Illegal character escape sequence `\\%c'", c); break; } WASLITERAL = 1; } else WASLITERAL = 0; return ((signed char )c);}static void EiC_stringliteral(void){ unsigned size, lastsize = 0, c; char *p=NULL; lex_lastpos = lex_curpos; do { for (size = 0; ((c = charliteral(EiC_nextchar())) != '\0' || WASLITERAL) && !(c == '"' && !WASLITERAL) && size < BSIZE; size++) EiC_LEXEM[size] = c; if (lastsize) p = (char *) xrealloc(p, lastsize + size + 1); else p = (char *) xcalloc(size + 1, sizeof(char)); memcpy(&p[lastsize], EiC_LEXEM, size); lastsize += size; if(c != '"' && size == BSIZE) { p[lastsize++] = c; continue; } if (c != '"') EiC_error("String literal error"); do { c = EiC_nextchar(); if (c == '\n') lex_lastpos++, lex_lineno++; } while (WHITE(c) || c == '\n'); lex_lastpos = lex_curpos; if (!c) do c = EiC_nextchar(); while (WHITE(c)); } while (c == '"' || size == BSIZE); retract(c); p[lastsize] = '\0'; token->Val.p.sp = token->Val.p.p = p; token->Val.p.ep = p + lastsize + 1;}extern int EiC_lexan(void){ int t=0, loop; char c=0, EiC_nextchar();#ifdef ILOOKAHEAD token = &EiC_TokenArray[EiC_TokenP]; if(EiC_TokenR > 0) { EiC_TokenR--; EiC_TokenI++; EiC_TokenP=(EiC_TokenP+1)%MAX_TOKENS; return token->Tok; }#else if (STOKEN != NOTOKEN) { STOKEN = NOTOKEN; return token->Tok; }#endif loop = 1; state = 0; while (loop) { switch (state) { case 0: lex_lastpos = lex_curpos; c = EiC_nextchar(); state = (WHITE(c) ? 0 : (c == '\n' ? lex_lineno++, 0 : (c == '<' ? t = LT, 1 : (c == '>' ? t = GT, 2 : (c == '+' ? t = '+', 3 : (c == '-' ? t = '-', 4 : (c == '|' ? t = BOR, 5 : (c == '&' ? t = AND, 6 : (c == '\''? 7 : (c == '"' ? 8 : (c == '.' ? 9 : (c == '/' ? t = '/', c = EiC_nextchar(), 50 : (c == '%' ? t = '%', c = EiC_nextchar(), 50 : (c == '*' ? t = '*', c = EiC_nextchar(), 50 : (c == '=' ? t = ASS, c = EiC_nextchar(), 50 : (c == '!' ? t = NOT, c = EiC_nextchar(), 50 : (c == '^' ? t = XOR, c = EiC_nextchar(), 50 : fail(RELOP, c)))))))))))))))))); break; case 1: /* get <, <= and << */ if ((c = EiC_nextchar()) == '<') t = LSHT; else state = 50; break; case 2: /* get >, >= and >> */ if ((c = EiC_nextchar()) == '>') t = RSHT; else state = 50; break; case 3: c = EiC_nextchar(); /* get +, += or ++ */ if (c == '+') t = INC, state = 60; else state = 50; break; case 4: c = EiC_nextchar(); /* get -, -= -- */ state = 60; if (c == '-') t = DEC; else if (c == '>') t = RARROW; else state = 50; break; case 5: c = EiC_nextchar(); /* get |, |= or || */ if (c == '|') t = LOR, state = 60; else state = 50; break; case 6: c = EiC_nextchar(); /* get &, &= or && */ if (c == '&') t = LAND, state = 60; else state = 50; break; case 7:token->Val.ival = charliteral(EiC_nextchar()); /* char_constants */ t = CHAR; if (EiC_nextchar() != '\'') EiC_error("Missing single quote '"); state = 60; break; case 8: EiC_stringliteral(); /* string literals */ token->Tok = STR; /*return STR;*/ loop = 0; break; case 9: c = EiC_nextchar(); t = '.'; if(DIGIT(c)) state = 22; else state = 60; retract(c); break; case 10: c = EiC_nextchar(); /* identifiers and keywords */ state = (LETTER(c) ? 11 : (c == '_' ? 11 : fail(ID, c))); break; case 11: c = EiC_nextchar(); state = (LETTER(c) ? 11 : (DIGIT(c) ? 11 : (c == '_' ? 11 : 12))); break; case 12: retract(c); success(ID); /*return (token->Tok);*/ loop = 0; break; case 20: c = EiC_nextchar(); /* integers and reals */ state = (c == '0' ? 30 : (DIGIT(c) ? 21 : fail(INT, c))); break; case 21: c = EiC_nextchar(); state = (DIGIT(c) ? 21 : (c == '.' ? 22 : (c == 'e' ? 23 : (c == 'E' ? 23 : 25)))); break; case 22: c = EiC_nextchar(); state = (DIGIT(c) ? 22 : (c == 'e' ? 23 : (c == 'E' ? 23 : 26))); break; case 23: c = EiC_nextchar(); state = (c == '+' ? 24 : (c == '-' ? 24 : (DIGIT(c) ? 24 : fail(FLOAT, c) /* ??? */ ))); break; case 24: c = EiC_nextchar(); state = (DIGIT(c) ? 24 : 26); break; case 25: checkExt(c); success(INT); /*return (token->Tok);*/ loop = 0; break; case 26: checkExt(c); success(FLOAT); /*return (token->Tok);*/ loop = 0; break; case 27: checkExt(c); success(HEX); /*return (token->Tok);*/ loop = 0; break; case 28: checkExt(c); success(OCTAL); /*return (token->Tok);*/ loop = 0; break; case 30: /* check for octal and hex numbers */ if ((c = EiC_nextchar()) == 'x' || c == 'X') { while (gethex((c = EiC_nextchar())) > -1); state = 27; break; } if (c != '.' && c != 'e' && c != 'E') { while (getoct(c) > -1) c = EiC_nextchar(); state = 28; break; } retract(c); state = 21; break; case 50: /* mix with equal's */ if (c == '=') switch (t) { case '+': t = ADDEQ; break; /* += */ case '-': t = SUBEQ; break; /* -= */ case '/': t = DIVEQ; break; /* /= */ case '*': t = MULEQ; break; /* *= */ case '%': t = MODEQ; break; /* %= */ case ASS: t = EQ; break; /* == */ case GT: t = GE; break; /* >= */ case LT: t = LE; break; /* <= */ case NOT: t = NE; break; /* != */ case RSHT:t = RSHTEQ; break; /* >>= */ case LSHT:t = LSHTEQ; break; /* <<= */ case AND: t = ANDEQ; break; /* &= */ case BOR: t = BOREQ; break; /* |= */ case XOR: t = XOREQ; break; /* ^= */ default: retract(c); } else retract(c); state = 60; break; case 60: success(MISC); token->Tok = t; /*return (token->Tok);*/ loop = 0; break; case 100: token->Tok = EiC_nextchar(); /*return (token->Tok);*/ loop = 0; break; } }#ifdef ILOOKAHEAD if(EiC_TokenI<MAX_TOKENS) EiC_TokenI++; EiC_TokenP = (EiC_TokenP +1)%MAX_TOKENS;#endif return token->Tok;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -