📄 scanner.cpp
字号:
#include "scanner.h"
Scanner::Scanner(char * fn_in, char * fn_out, char * fn_err, char * fn_lex)
{
token = -1;
count_line = 0;
count_file = 0;
//将源文件读入缓冲区
int ptr = 0;
inFile.open(fn_in);
if (!inFile.is_open())
{
return;
}
while (inFile.get(buf[ptr]))
{
ptr++;
}
inFile.close();
//创建输出和错误文件
FILE * fp;
fp = fopen(fn_out, "w");
fclose(fp);
fp = fopen(fn_err, "w");
fclose(fp);
fp = fopen(fn_lex, "w");
fclose(fp);
outFile.open(fn_out, ios::out | ios::app);
errFile.open(fn_err, ios::out | ios::app);
lexFile.open(fn_lex, ios::out | ios::app);
buf[ptr] = C_EOI;
buflen = ptr;
line = 1;
col = 0;
bp = -1;
endPos.Reset();
ScanChar();
NextToken();
}
Scanner::~Scanner()
{
if (!outFile.is_open())
{
outFile.close();
}
if (!errFile.is_open())
{
errFile.close();
}
if (!lexFile.is_open())
{
lexFile.close();
}
}
void Scanner::ScanChar()
{
int oldcol;
bp++;
ch = buf[bp];
switch (ch)
{
case C_CR:
oldcol = col;
col = 0;
line++;
break;
case C_LF:
if (bp == 0 || buf[bp - 1] != C_CR)
{
oldcol = col;
col = 0;
line++;
}
break;
case C_TAB:
col = (col / TabInc * TabInc) + TabInc;
break;
default:
col++;
break;
}
endPos.Set(line, col - 1);
if (endPos.col == -1)
{
endPos.Set(line - 1, oldcol);
}
}
void Scanner::ScanNumber(int radix)
{
this->radix = radix;
int digitRadix = (radix <= 10) ? 10 : 16;
while (Digit(digitRadix) >= 0)
{
PutChar(ch);
ScanChar();
}
if (radix <= 10 && ch == '.')
{
PutChar(ch);
ScanChar();
ScanFractionAndSuffix();
}
else if
(
radix <= 10 &&
(
ch == 'e' || ch == 'E' ||
ch == 'f' || ch == 'F' ||
ch == 'd' || ch == 'D'
)
)
{
ScanFractionAndSuffix();
}
else
{
if (ch == 'l' || ch == 'L')
{
ScanChar();
token = P_LONGLITERAL;
}
else
{
token = P_INTLITERAL;
}
}
}
void Scanner::PutChar(char ch)
{
sbuf[sp++] = ch;
}
void Scanner::NextToken()
{
int start;
memset(sbuf, '\0', MAX_FILE);
sp = 0;
count_line++;
count_file++;
while (true)
{
int oldline = pos.line;
pos.Set(line, col);
if (pos.line > oldline)
{
count_line = 1;
}
start = bp;
switch (ch)
{
case ' ':
case C_TAB:
case C_FF:
case C_CR:
case C_LF:
ScanChar();
break;
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
case 'H':
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'O':
case 'P':
case 'Q':
case 'R':
case 'S':
case 'T':
case 'U':
case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
case 'u':
case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
case '$':
case '_':
ScanIdent();
return;
case '0':
ScanChar();
if (ch == 'x' || ch == 'X')
{
ScanChar();
if (Digit(16) < 0)
{
LexError("Invalid hexadecimal number");
}
ScanNumber(16);
}
else
{
PutChar('0');
ScanNumber(8);
}
return;
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
ScanNumber(10);
return;
case '.':
ScanChar();
if ('0' <= ch && ch <= '9')
{
PutChar('.');
ScanFractionAndSuffix();
}
else
{
token = P_DOT;
}
return;
case ',':
ScanChar();
token = P_COMMA;
return;
case ';':
ScanChar();
token = P_SEMI;
return;
case '(':
ScanChar();
token = P_LPAREN;
return;
case ')':
ScanChar();
token = P_RPAREN;
return;
case '[':
ScanChar();
token = P_LBRACKET;
return;
case ']':
ScanChar();
token = P_RBRACKET;
return;
case '{':
ScanChar();
token = P_LBRACE;
return;
case '}':
ScanChar();
token = P_RBRACE;
return;
case '/':
ScanChar();
if (ch == '/')
{
do
{
ScanCommentChar();
}
while (ch != C_CR && ch != C_LF && bp < buflen);
break;
}
else if (ch == '*')
{
ScanChar();
SkipComment();
if (ch == '/')
{
ScanChar();
break;
}
else
{
LexError("Unclosed comment");
return;
}
}
else if (ch == '=')
{
strcpy(name, "/=");
token = P_SLASHEQ;
ScanChar();
}
else
{
strcpy(name, "/");
token = P_SLASH;
}
return;
case '\'':
ScanChar();
if (ch == '\'')
{
LexError("Empty character");
}
else
{
if (ch == C_CR || ch == C_LF)
{
LexError(pos, "Illegal line end in character");
}
ScanLitChar();
if (ch == '\'')
{
ScanChar();
token = P_CHARLITERAL;
}
else
{
LexError(pos, "Unclosed character quote mark");
}
}
return;
case '\"':
ScanChar();
while (ch != '\"' && ch != C_CR && ch != C_LF && bp < buflen)
{
ScanLitChar();
}
if (ch == '\"')
{
token = P_STRINGLITERAL;
ScanChar();
}
else
{
LexError(pos, "Unclosed string quote mark");
}
return;
default:
if (IsSpecial(ch))
{
ScanOperator();
}
else if (IsJavaIdentifierStart(ch))
{
ScanIdent();
}
else if (bp == buflen || ch == C_EOI && bp + 1 == buflen)
{
token = P_EOF;
}
else
{
LexError("Illegal character");
ScanChar();
}
return;
}
}
}
void Scanner::ScanLitChar()
{
if (ch == '\\')
{
if (buf[bp + 1] == '\\')
{
bp++;
col++;
PutChar('\\');
ScanChar();
}
else
{
ScanChar();
char leadch = ch;
int oct = Digit(8);
int hex = 0;
switch (ch)
{
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
leadch = ch;
oct = Digit(8);
ScanChar();
if ('0' <= ch && ch <= '7')
{
oct = oct * 8 + Digit(8);
ScanChar();
if (leadch <= '3' && '0' <= ch && ch <= '7')
{
oct = oct * 8 + Digit(8);
ScanChar();
}
}
PutChar((char)oct);
break;
case 'u':
ScanChar();
if (('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F'))
{
hex = hex * 16 + Digit(16);
ScanChar();
if (('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F'))
{
hex = hex * 16 + Digit(16);
ScanChar();
if (('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F'))
{
hex = hex * 16 + Digit(16);
ScanChar();
if (('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F'))
{
hex = hex * 16 + Digit(16);
ScanChar();
}
}
}
}
else
{
tmpPos.Set(line, col);
LexError(tmpPos, "Illegal unicode character");
break;
}
PutChar((char)hex);
break;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -