📄 lexer.cpp
字号:
#include "lexer.h"
Token::Token()
{
arg = 0;
type = TOK_BAD;
}
Line::Line()
{
_lineNum = 1;
_eof = false;
_fileName = NULL;
}
File::File(const char* fileName)
{
_curPos = 0;
_curLine = 0;
_numElems = 0;
strcpy(_fileName, fileName);
_file = fopen(fileName, "r");
if (_file == NULL)
{
printf("Source file open error: %s\n", fileName);
exit(0);
}
}
void File::GetLine(Line& line)
{
int i = 0;
++_curLine;
char ch = GetChar();
if (ch == EOF)
{
line._eof = true;
}
while ((ch != '\n') && (ch != EOF))
{
line._buffer[i++] = ch;
ch = GetChar();
if (ch == EOF)
{
line._eof = true;
}
}
line._buffer[i] = '\0';
line._lineNum = _curLine;
line._fileName = _fileName;
}
char File::GetChar()
{
if (_curPos == BUFFER_SIZE || _numElems == 0)
{
FillBuffer();
}
return _buffer[_curPos++];
}
void File::FillBuffer()
{
int cb = fread(_buffer, sizeof(char), BUFFER_SIZE, _file);
if (cb < BUFFER_SIZE)
{
if (!feof(_file))
{
printf("Source file reading error%s\n", _fileName);
exit(0);
}
else
{
_numElems = cb;
_buffer[cb] = EOF;
}
}
_numElems = BUFFER_SIZE;
_curPos = 0;
}
Lexer::Lexer(File& file, Line& line)
:_curFile(file), _curLine(line), _curPos(0), _lastTokPos(0)
{
MapBucket b;
b._str = "s8";
b._index = Int8;
_keywords.Insert(b);
b._str = "s16";
b._index = Int16;
_keywords.Insert(b);
b._str = "s32";
b._index = Int32;
_keywords.Insert(b);
b._str = "s64";
b._index = Int64;
_keywords.Insert(b);
b._str = "u8";
b._index = UInt8;
_keywords.Insert(b);
b._str = "u16";
b._index = UInt16;
_keywords.Insert(b);
b._str = "u32";
b._index = UInt32;
_keywords.Insert(b);
b._str = "u64";
b._index = UInt64;
_keywords.Insert(b);
b._str = "f32";
b._index = Float32;
_keywords.Insert(b);
b._str = "f64";
b._index = Float64;
_keywords.Insert(b);
b._str = "void";
b._index = Void;
_keywords.Insert(b);
_directives.Insert("entry");
}
void Lexer::Forward()
{
++_curPos;
}
void Lexer::Back()
{
--_curPos;
}
char Lexer::GetNextChar()
{
return _curLine._buffer[_curPos++];
}
bool Lexer::Match(Token& tok, TokType type)
{
if ((tok.type != type))
{
return false;
}
return true;
}
void Lexer::GetNextToken(Token& tok)
{
char ch = SkipWhiteSpace();
_lastTokPos = _curPos;
if (ch == '(')
{
tok.text[0] = ch;
tok.text[1] = '\0';
tok.type = TOK_LP_S;
}
else if (ch == ')')
{
tok.text[0] = ch;
tok.text[1] = '\0';
tok.type = TOK_RP_S;
}
else if (ch == '[')
{
tok.text[0] = ch;
tok.text[1] = '\0';
tok.type = TOK_LP_M;
}
else if (ch == ']')
{
tok.text[0] = ch;
tok.text[1] = '\0';
tok.type = TOK_RP_M;
}
else if (ch == '{')
{
tok.text[0] = ch;
tok.text[1] = '\0';
tok.type = TOK_LP_B;
}
else if (ch == '}')
{
tok.text[0] = ch;
tok.text[1] = '\0';
tok.type = TOK_RP_B;
}
else if (ch == '.')
{
ProcessDirective(tok);
}
else if (ch == '\'')
{
ProcessCharConst(tok);
}
else if (ch =='\"')
{
ProcessStrConst(tok);
}
else if ( ((ch >= 'a') && (ch <= 'z')) ||
((ch >= 'A') && (ch <= 'Z')) || (ch == '_'))
{
ProcessIdentifier(tok);
}
else if ( ((ch >= '0') && (ch <= '9')) ||
(ch == '-') && (ch == '+') )
{
ProcessNumConst(tok);
}
else if (ch == ',')
{
tok.type = TOK_COMMA;
tok.text[0] = ch;
tok.text[1] = '\0';
}
else if (ch == ';')
{
tok.text[0] = ';';
tok.text[1] = '\0';
tok.type = TOK_SEMICOLON;
}
else if (ch == '\0')
{
if (_curLine._eof)
{
tok.type = TOK_EOF;
strcpy(tok.text, "EOF");
}
else
{
_curFile.GetLine(_curLine);
_curPos = 0;
GetNextToken(tok);
}
}
else
{
tok.text[0] = ch;
tok.text[1] = '\0';
tok.type = TOK_BAD;
OutputError(_curLine, ER_ELF0006);
}
}
char Lexer::SkipWhiteSpace()
{
char ch = GetNextChar();
while (ch == ' ' || ch == '\n' || ch == '\t')
{
ch = GetNextChar();
}
return ch;
}
void Lexer::PrintToken(Token& tok)
{
switch(tok.type)
{
case TOK_BAD:
printf("token is bad: %s\n\n", tok.text);
break;
case TOK_IDENTIFIER:
printf("Identifier: %s\n", tok.text);
break;
case TOK_CHAR_CONST:
printf("Char const: %c\n", tok.arg);
break;
case TOK_NOMORE:
printf("No more\n");
break;
case TOK_INT_CONST:
printf("int const: %s, arg %d\n", tok.text, tok.arg);
break;
case TOK_FLT_CONST:
printf("float const: %s, arg %lf\n", tok.text, (*(double*)(&tok.arg)));
break;
case TOK_COMMA:
printf("comma\n");
break;
case TOK_STR_CONST:
printf("string: %s\n", tok.text);
break;
default:
printf("error not a token\n");
}
}
void Lexer::ProcessCharConst(Token& tok)
{
char ch, next;
ch = GetNextChar();
if (ch >= 32 && ch <= 126)
{
tok.type = TOK_CHAR_CONST;
tok.text[0] = ch;
tok.text[1] = '\0';
tok.arg = ch;
next = GetNextChar();
if (next != '\'')
{
tok.type = TOK_BAD;
tok.text[0] = '\'';
tok.text[1] = ch;
tok.text[2] = next;
tok.text[3] = '\0';
}
}
else
{
tok.text[0] = '\'';
tok.text[1] = ch;
tok.text[2] = '\0';
}
}
void Lexer::ProcessDirective(Token& tok)
{
int i = 0;
tok.type = TOK_DIRECTIVE;
char ch = GetNextChar();
while ( ((ch >= 'a') && (ch <= 'z')) ||
((ch >= 'A') && (ch <= 'Z')) )
{
tok.text[i++] = ch;
ch = GetNextChar();
}
tok.text[i] = '\0';
if (i == 0)
{
OutputError(_curLine, ER_ELF0005);
}
}
void Lexer::ProcessStrConst(Token& tok)
{
int i = 0;
char ch = GetNextChar();
while (ch != '\"')
{
if (ch == '\\')
{
switch(GetNextChar())
{
case 'n':
ch = '\n';
break;
case 't':
ch = '\t';
break;
case 'r':
ch = '\r';
break;
default:
printf("Error: String not recognized\n");
exit(0);
}
}
else if (ch == '\0')
{
tok.type = TOK_BAD;
return;
}
tok.text[i++] = ch;
ch = GetNextChar();
}
tok.type = TOK_STR_CONST;
tok.text[i] = '\0';
}
void Lexer::ProcessNumConst(Token& tok)
{
int i = 0;
Back();
char ch = GetNextChar();
if (ch == '+' || ch == '-')
{//num -> (+|-)?digits;
tok.text[i++] = ch;
ch = GetNextChar();
}
if (ch >= '0' && ch <= '9')
{
if (ch == 0)
{
ch = GetNextChar();
if (ch == '.')
{//0.digits;
ch = '0';
Back();
}
else
{//zero
tok.text[i++] = ch;
tok.text[i] = '\0';
tok.type = TOK_INT_CONST;
return;
}
}
while (ch >='0' && ch <= '9')
{//digits -> 0|1|2....9;
tok.text[i++] = ch;
ch = GetNextChar();
}
if (ch == '.')
{
tok.text[i++] = '.';
ch = GetNextChar();
if (ch >= '0' && ch <= '9')
{//num -> (+|-)?(digits)*.(digits)*
while (ch >= '0' && ch <= '9')
{
tok.text[i++] = ch;
ch = GetNextChar();
}
}
else
{//(+|-)?(digits)*.x (x not digits)
tok.text[i++] = '\0';
tok.type = TOK_BAD;
Back();
return;
}
if (ch == 'e' || ch == 'E')
{//(+|-)?(digits)*.(digits)*(e|E)?
tok.text[i++] = ch;
ch = GetNextChar();
if (ch == '+' || ch == '-')
{//(+|-)?(digits)*.(digits)*(e|E)?(+|-)?
tok.text[i++] = ch;
ch = GetNextChar();
}
if (ch >= '0' && ch <= '9')
{//(+|-)?(digits)*.(digits)*(e|E)?(+|-)?(digits)*
while (ch >= '0' && ch <= '9')
{
tok.text[i++] = ch;
ch = GetNextChar();
}
tok.text[i] = '\0';
tok.type = TOK_FLT_CONST;
double n = atof(tok.text);
tok.arg = (*(S64*)(&n));
Back();
CheckNumeric(tok);
return;
}
else
{//(+|-)?(digits)*.(digits)*(e|E)?(+|-)?x (x not a digit)
tok.text[i] = '\0';
tok.type = TOK_BAD;
Back();
}
}
else
{//(+|-)?(digits)*.(digits)*
tok.text[i++] = '\0';
tok.type = TOK_FLT_CONST;
double n = atof(tok.text);
tok.arg = (*(S64*)(&n));
Back();
CheckNumeric(tok);
return;
}
}
else if (ch == 'e' || ch == 'E')
{//(+|-)?(digits)*(e|E)?
tok.text[i++] = ch;
ch = GetNextChar();
if (ch == '+' || ch == '-')
{//(+|-)?(digits)*(e|E)?(+|-)?
tok.text[i++] = ch;
ch = GetNextChar();
}
if (ch >= '0' && ch <= '9')
{//(+|-)?(digits)*(e|E)?(+|-)?(digits)*
while (ch >= '0' && ch <= '9')
{
tok.text[i++] = ch;
ch = GetNextChar();
}
tok.text[i] = '\0';
tok.type = TOK_FLT_CONST;
double n = atof(tok.text);
tok.arg = (*(S64*)(&n));
Back();
CheckNumeric(tok);
return;
}
else
{
tok.text[i] = '\0';
tok.type = TOK_BAD;
Back();
return;
}
}
else
{//(+|-)?(digits)*
tok.text[i] = '\0';
tok.type = TOK_INT_CONST;
tok.arg = _atoi64(tok.text);
Back();
CheckNumeric(tok);
return;
}
}
else
{
tok.text[i] = '\0';
tok.type = TOK_BAD;
Back();
}
}
void Lexer::CheckNumeric(Token& tok)
{
int len = strlen(tok.text);
if (len > 11)
{
tok.type = TOK_BAD;
strcpy(tok.text, "Too long numeric digits");
}
}
void Lexer::ProcessIdentifier(Token& tok)
{
tok.type = TOK_IDENTIFIER;
int i = 0;
Back();
char ch = GetNextChar();
while ( ((ch >= 'a') && (ch <= 'z')) ||
((ch >= 'A') && (ch <= 'Z')) ||
(ch == '.') ||(ch == '_') ||
((ch <= '9') && (ch >= '0')) )
{
tok.text[i++] = ch;
ch = GetNextChar();
}
Back(); //....
tok.text[i] = '\0';
MapBucket b = _keywords.Exsits(MapBucket(tok.text));
if (b._index != -1)
{
tok.type = TOK_KEYWORD;
tok.arg = b._index;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -