📄 scanner.cpp
字号:
#include "compiler.h"
#include "scanner.h"
#include "ctkparser.h"
#define _S 0x01 // space symbol: ' ' \n \t \r ...
#define _I 0x02 // part of identifier: letter or digit
#define _O 0x04 // 0-7
#define _H 0x08 // 0-9 a-f A-F
#define _D 0x10 // 0-9 a-f A-F . + - x X
#define isspace( c ) ( Letter[ byte(c) ] & _S )
#define isident( c ) ( Letter[ byte(c) ] & _I )
#define isdig( c ) ( Letter[ byte(c) ] & _D )
#define isoct( c ) ( Letter[ byte(c) ] & _O )
#define ishex( c ) ( Letter[ byte(c) ] & _H )
static const char Letter [ 256 ] = {
0,_S,_S,_S, _S,_S,_S,_S, _S,_S,_S,_S, _S,_S,_S,_S,
_S,_S,_S,_S, _S,_S,_S,_S, _S,_S,_S,_S, _S,_S,_S,_S,
_S,0,0,0, 0,0,0,0, 0,0,0,_D, 0,_D,_D,0,
_I|_O|_H|_D,_I|_O|_H|_D,_I|_O|_H|_D,_I|_O|_H|_D,
_I|_O|_H|_D,_I|_O|_H|_D,_I|_O|_H|_D,_I|_O|_H|_D,
_I|_H|_D,_I|_H|_D,0,0, 0,0,0,0,
0,_I|_H|_D,_I|_H|_D,_I|_H|_D, _I|_H|_D,_I|_H|_D,_I|_H|_D,_I, _I,_I,_I,_I, _I,_I,_I,_I,
_I,_I,_I,_I, _I,_I,_I,_I, _I|_D,_I,_I,0, 0,0,0,_I,
0,_I|_H|_D,_I|_H|_D,_I|_H|_D, _I|_H|_D,_I|_H|_D,_I|_H|_D,_I, _I,_I,_I,_I, _I,_I,_I,_I,
_I,_I,_I,_I, _I,_I,_I,_I, _I|_D,_I,_I,0, 0,0,0,0
} ;
CtkScanner CtkScanner::instance;
CtkSymbolTable CtkSymbolTable::instance;
CtkToken* CtkSymbolTable::add(char const* name, int tag) {
unsigned h = 0;
CtkToken* tok;
byte* p = (byte*)name;
while (*p != 0) {
h = h*31 + *p++;
}
int i = h % TOKEN_HASH_TABLE_SIZE;
for (tok = tokenHashTable[i]; tok != NULL; tok = tok->next) {
if (tok->hash == h && strcmp(tok->name, name) == 0) {
return tok;
}
}
tok = new CtkToken();
tok->hash = h;
tok->symId = ++nSymbols;
tok->tag = tag;
tok->name = new char[strlen(name) + 1];
strcpy(tok->name, name);
tok->next = tokenHashTable[i];
tokenHashTable[i] = tok;
return tok;
}
int yylex()
{
return CtkScanner::instance.get();
}
void CtkScanner::reset(FILE* f) {
yyfile = f;
yyline = 1;
yybuf[0] = '\0';
fgets(yybuf, sizeof yybuf, f);
yyptr = yybuf;
}
int CtkScanner::get()
{
int pos;
int tkn;
long ival;
double rval;
char ch, *p = yyptr, *q;
while(true) {
switch (tkn = *p++) {
case '\f': case ' ': case '\t':
yyptr = p;
continue;
case '\0': case '\n':
readNextLine:
if (fgets(yybuf, sizeof yybuf, yyfile) == NULL) {
return EOF;
}
yyptr = p = yybuf;
yyline += 1;
continue;
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
case '_':
while (isident(*p)) {
p += 1;
}
ch = *p;
*p = '\0';
yylval.tok = CtkSymbolTable::instance.add(yyptr, IDENT);
tkn = yylval.tok->tag;
*p = ch;
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
while (isdig(*p)) {
p += 1;
}
ch = *p;
*p = '\0';
if (sscanf(yyptr, "%li%n", &ival, &pos) != 1
|| pos != (int)(p - yyptr))
{
if (sscanf(yyptr, "%lf%n", &rval, &pos) != 1
|| pos != (int)(p - yyptr))
{
error("Invalid numeric constant");
} else {
yylval.rval = (ctk_real)rval;
tkn = RLITERAL;
}
} else {
yylval.ival = (ctk_integer)ival;
tkn = ILITERAL;
}
*p = ch;
break;
case '+':
if (*p == '+') {
p += 1;
tkn = INC;
} else if (*p == '=') {
p += 1;
tkn = SET_ADD;
}
break;
case '-':
if (*p == '-') {
p += 1;
tkn = DEC;
} else if (*p == '=') {
p += 1;
tkn = SET_SUB;
}
break;
case '=':
if (*p == '=') {
p += 1;
tkn = EQ;
}
break;
case '*':
if (*p == '=') {
p += 1;
tkn = SET_MUL;
}
break;
case '/':
if (*p == '=') {
p += 1;
tkn = SET_DIV;
} else if (*p == '/') {
goto readNextLine;
} else if (*p == '*') {
do {
if (*++p == '\0') {
yyline += 1;
if (fgets(yybuf, sizeof yybuf, yyfile) == NULL) {
return EOF;
}
p = yybuf;
}
} while (p[0] != '*' || p[1] != '/');
yyptr = p += 2;
continue;
}
break;
case '%':
if (*p == '=') {
p += 1;
tkn = SET_MOD;
}
break;
case '&':
if (*p == '&') {
p += 1;
tkn = LAND;
} else if (*p == '=') {
p += 1;
tkn = SET_AND;
}
break;
case '|':
if (*p == '|') {
p += 1;
tkn = LOR;
} else if (*p == '=') {
p += 1;
tkn = SET_OR;
}
break;
case '^':
if (*p == '=') {
p += 1;
tkn = SET_XOR;
}
break;
case '!':
if (*p == '=') {
p += 1;
tkn = NE;
}
break;
case '>':
if (*p == '>') {
if (*++p == '=') {
p += 1;
tkn = SET_SHR;
} else {
tkn = SHR;
}
} else if (*p == '=') {
p += 1;
tkn = GE;
}
break;
case '<':
if (*p == '<') {
if (*++p == '=') {
p += 1;
tkn = SET_SHL;
} else {
tkn = SHL;
}
} else if (*p == '=') {
p += 1;
tkn = LE;
}
break;
case '$':
case '.':
case '?':
case ':':
case '~':
case '(':
case ')':
case '{':
case '}':
case '[':
case ']':
case ';':
case ',':
break;
case '\'':
case '\"':
q = p-1;
while ((ch = *p++) != '\'' && ch != '\"') {
if (ch == '\0') {
error("Unterminated character constant");
}
if (ch == '\\') {
switch (ch = *p++) {
case 'r':
ch = '\r';
break;
case 'n':
ch = '\n';
break;
case 't':
ch = '\t';
break;
case 'f':
ch = '\f';
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
ch -= '0';
while (isoct(*p)) {
ch = ch*8 + *p++ - '0';
}
break;
case 'x':
case 'X':
if (!ishex(p[0]) || !ishex(p[1])) {
error("Invalid hexademical constant in string");
}
ch = p[2];
p[2] = '\0';
int value;
sscanf(p, "%x", &value);
p[2] = ch;
ch = (char)value;
p += 2;
}
}
*q++ = ch;
}
*q++ = '\0';
yylval.sval = ctkAllocateStringLiteral(yyptr);
tkn = SLITERAL;
break;
default:
error("Invalid character");
}
yyptr = p;
return tkn;
}
}
void CtkScanner::error(char const* msg) {
ctkTrace("%s:%d:%d: %s", CtkCompiler::instance.currModule->path,
yyline, yyptr - yybuf, msg);
exit(1);
}
CtkSymbolTable::CtkSymbolTable()
{
add("if", IF);
add("else", ELSE);
add("for", FOR);
add("function", FUNCTION);
add("do", DO);
add("while", WHILE);
add("try", TRY);
add("catch", CATCH);
add("switch", SWITCH);
add("case", CASE);
add("default", DEFAULT);
add("continue", CONTINUE);
add("break", BREAK);
add("par", PAR);
add("return", RETURN);
add("throw", THROW);
add("import", IMPORT);
add("synchronized", SYNCHRONIZED);
add("null", NULLLITERAL);
add("NULL", NULLLITERAL);
}
CtkSymbolTable::~CtkSymbolTable()
{
for(int i=0; i<TOKEN_HASH_TABLE_SIZE; i++)
{
CtkToken* tok = tokenHashTable[i];
while (tok != NULL) {
CtkToken* loctok = tok;
tok = tok->next;
delete[] loctok->name;
delete loctok;
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -