📄 scan.cpp
字号:
// Lexer of the compiler
// function: translate the source into tokens
//
// Written by bood, boodweb@163.com, http://boodweb.126.com
// 2004-08-06
//
// 2004.5.21
// fixed: '!=' 's return type should be NEQ rather that LTEQ
//
#pragma warning(disable:4786)
#include <string>
#include <sstream>
#include <list>
#include "global.h"
#include "util.h"
#include "scan.h"
using namespace std;
string TokenString; // Current token
TokenType token; // Current token type
// Token History, used for saving the tokens scanned
list<TokenUnit> tokenHist;
// Point to the backup point(always the head of the list)
// For forward looking while do parsing
list<TokenUnit>::iterator tokenIt=tokenHist.end();
// Reserve words
ReserveWordsUnit ReservedWords[10]={
{"if",IF},{"else",ELSE},{"while",WHILE},
{"return",RETURN},{"void",VOID},{"int",INT},{""}
};
TokenType LookupReserved(string &token)
{
int i=0;
while(ReservedWords[i].word[0]!='\0')
{
if(token==ReservedWords[i].word)
return ReservedWords[i].type;
i++;
}
return ID;
}
//
// Main function of the lexer
//
// Return one token each time, meanwhile we save each
// token into a TokenUnit list, this makes no need to
// do repeat scan while do forward looking. However,
// it also costs memory, so improvements are needed
//
TokenType GetToken()
{
TokenType rettype;
// If the tokenIt is not NULL, i.e. a restoring is going on,
// then just reads from that point in the token list
if(tokenIt!=tokenHist.end()){
rettype=(*tokenIt).token;
TokenString=(*tokenIt).tokenstring;
lineno=(*tokenIt).lineno;
++tokenIt;
return rettype;
}
// A DFA below, codes are almost trivial:)
char ch;
int save=1;
ScanState state=START;
TokenString="";
while(state!=DONE)
{
istream *is = &fsource;
ch=is->get();
if(ch=='\n') lineno++;
switch(state)
{
case START:
if(isalpha(ch)) state=INID;
else if(isdigit(ch)) state=INNUM;
else if(ch=='<') state=INLTEQ;
else if(ch=='>') state=INGTEQ;
else if(ch=='=') state=INEQ;
else if(ch=='!') state=INNEQ;
else if(ch=='/') {
if((ch=is->get())!='*') {
is->putback(ch);
rettype=DIV;
state=DONE;
}
else{
state=INCOMMENT;
save=0;
}
}
else if(ch==' ' || ch=='\n' || ch=='\t') {
save=0;
}
else{
switch(ch){
case '+':
rettype=PLUS;
break;
case '-':
rettype=MINUS;
break;
case '*':
rettype=MUL;
break;
case ';':
rettype=SEMI;
break;
case ',':
rettype=COMMA;
break;
case '(':
rettype=LPAREN;
break;
case ')':
rettype=RPAREN;
break;
case '[':
rettype=LSQUAR;
break;
case ']':
rettype=RSQUAR;
break;
case '{':
rettype=LBRACE;
break;
case '}':
rettype=RBRACE;
break;
case EOF:
save=0;
rettype=ENDFILE;
break;
default:
save=0;
rettype=ERROR;
break;
}
state=DONE;
}
break;
case INID:
if(!isalpha(ch)) {
is->putback(ch);
rettype=ID;state=DONE;
save=0;
}
break;
case INNUM:
if(!isdigit(ch)) {
is->putback(ch);
rettype=NUMBER;state=DONE;
save=0;
}
break;
case INLTEQ:
if(ch=='=') {rettype=LTEQ;state=DONE;}
else{is->putback(ch);rettype=LT;state=DONE;save=0;}
break;
case INGTEQ:
if(ch=='=') {rettype=GTEQ;state=DONE;}
else{is->putback(ch);rettype=GT;state=DONE;save=0;}
break;
case INEQ:
if(ch=='=') {rettype=EQ;state=DONE;}
else{is->putback(ch);rettype=ASSIGN;state=DONE;save=0;}
break;
case INNEQ:
if(ch=='=') {rettype=NEQ;state=DONE;}
else{is->putback(ch);rettype=ERROR;state=DONE;save=0;}
break;
case INCOMMENT:
if(ch=='*'){
if(is->get()=='/')
state=START;
}
save=0;
break;
}
if(save==1) TokenString+=ch;
else save=1;
}
if(rettype==ID){
rettype=LookupReserved(TokenString);
}
// Save the token just identified
TokenUnit tu;
tu.lineno=lineno;
tu.token=rettype;
tu.tokenstring=TokenString;
tokenHist.push_back(tu);
ftoken<<TokenString<<endl;
return rettype;
}
//
// Saving a backup point for later restore
//
// This function is essenial for the forward looking
// Though seeming simple, explanations are needed to
// understand it:)
//
// First, if no backup point is restored now('tokenIt'=end),
// we just return the current token for later restoring
// Then, if the backup point is restored, we also return
// the current token, but this time the token is just before
// 'tokenIt'
//
// Note!'Backup' and 'Restore' can be called in a nested order,
// however, you must call restore in the order of corresponding
// 'Backup' order, just like brackets do.
list<TokenUnit>::iterator Backup()
{
list<TokenUnit>::iterator it;
if(tokenIt==tokenHist.end()) it=--tokenHist.end();
else {
it=--tokenIt;
++tokenIt;
}
return it;
}
// Restore a backup point, the parameter is the value
// returned by the 'Backup' function
void Restore(list<TokenUnit>::iterator &it)
{
tokenIt=it;
token=GetToken();
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -