📄 scanner.cpp
字号:
/****************************************************/
/* File: scanner.cpp */
/* The scanner implementation for the C- compiler */
/* Xiang Cui (sean) */
/* 230030782 */
/****************************************************/
#include "scanner.h"
#include "map.h"
map<string,TokenType> keywords;
Myscanner::Myscanner(const char *FileName)
{
keywords["else"]=ELSE;
keywords["if"]=IF;
keywords["int"]=INT;
keywords["return"]=RETURN;
keywords["void"]=VOID;
keywords["while"]=WHILE;
sf.open(FileName);
if (!sf)
{
cout<<"File "<<FileName<<" not found"<<endl;
exit(1);
}
save=true;
state=START;
tokens="";
currentToken=ID;
lineno=1;
}
TokenType Myscanner::scan() //sf is a ifstream object,C- sourcefile.
{
tokens="";
state=START;
bool flag=true;
//ofstream listingfile("listing.mns");
//cout<<endl<<"C- COMPILATION: "<<sfn<<endl;
//cout<<"COMPILATION START..."<<endl;
while (state != DONE)
{
/* if (sf.eof())
{
currentToken=ENDFILE;
cout<<"COMPILATION OVER..."<<endl;
tokens="EOF";
state = DONE;
return currentToken;
}
else
{ */
sf.get(c); //get a character from sourcefile
if (sf.eof())
{
currentToken=ENDFILE;
tokens="EOF";
state = DONE;
return currentToken;
}
if (c=='\n') //if current character is '\n',# of Line+1
{lineno++;}
save = true;
switch (state)
{
case START:
if (isdigit(c))//if current char is a digit,change state to INNUM
state = INNUM;
else if (isalpha(c))//if current char is a letter,change state to INID
state = INID;
//if current char is '=',change state to INEQ,need further recognization to
//determine if it is a EQ token or a ASSIGN token
else if (c == '=')
{ // either ASSIGN or EQ
tokens="=";
sf.get(c);
if ( c == '=' )
{
currentToken = EQ;
}
else
{
if(c=='\n') lineno--;
currentToken = ASSIGN;
// backup in the input
sf.unget();
c='\0';
}
state = DONE;
}
//if current char is '<',change state to INLT,need further recognization to
//determine if it is a LT token or a LTEQ token
else if (c == '<')
state = INLT;
//if current char is '>',change state to INGT,need further recognization to
//determine if it is a GT token or a GTEQ token
else if (c == '>')
state = INGT;
//if current char is '!',change state to INNEQ,need further recognization to
//determine if it is a NEQ token or a ERROR token
else if (c == '!')
state = INNEQ;
//if current char is '/',change state to INSLASH,need further recognization
//to determine if it will be a LCMNT token or a DIV token
else if (c == '/')
state = INSLASH;
//consider the whitespace characters
else if ((c == ' ') || (c == '\t') || (c == '\n')||(c==13))
save = false;
else
{
//if not a multicharaters token,it's must be a single character one
state = DONE;
//recognize various single character tokens
switch (c)
{
case EOF:
save = false;
currentToken = ENDFILE;
break;
case '+':
currentToken = PLUS;
break;
case '-':
currentToken = MINUS;
break;
case '*':
currentToken = TIMES;
break;
case '(':
currentToken = LPAREN;
break;
case ')':
currentToken = RPAREN;
break;
case ';':
currentToken = SEMI;
break;
case ',':
currentToken = COMMA;
break;
case '[':
currentToken = LSQR;
break;
case ']':
currentToken = RSQR;
break;
case '{':
currentToken = LCRLY;
break;
case '}':
currentToken = RCRLY;
break;
default:
currentToken = ERROR;
break;
}
}
break;
//deal with double characters tokens
case INLCMNT:
save = false;
if (c == EOF)
{
state = DONE;
currentToken = ENDFILE;
}
//if current state is in INLCMNT and input character
//is '*',change state to INRCMNT,
else if (c == '*') {state = INRCMNT;/* if(flag) {cout<<"/* ";flag=false;}*/}
break;
case INRCMNT:
save = false;
if (c == EOF)
{
state = DONE;
currentToken = ENDFILE;
}
//if current state is in INRCMNT and input character
//is '/',change state to START(comments is end),
else if (c == '/') {save=false;state = DONE;currentToken = RCMNT;flag=false;}
else state=INLCMNT;
break;
case INEQ:
state = DONE;
//if current state is INEQ and input character
//is '=',currentToken =EQ
if (c == '=')
currentToken =EQ;
else
{ if(c=='\n') lineno--;
sf.unget();
c='\0';
currentToken =ASSIGN;//otherwise it's a sort of assignment
}
break;
case INNEQ:
state = DONE;
//if current state is INNEQ and input character is '=',
//currentToken =NEQ
if (c == '=')
currentToken =NEQ;
else
{ if(c=='\n') lineno--;
sf.unget();
c='\0';
currentToken =ERROR; //otherwise EORROR occured
}
break;
//if current state is INLT and input character
//is '=',currentToken =LTEQ
case INLT:
state = DONE;
if (c == '=')
currentToken =LTEQ;
else
{ if(c=='\n') lineno--;
sf.unget();
c='\0';
currentToken =LT; //otherwise it's a lessthan token
}
break;
//if current state is INGT and input character is '=',currentToken =GTEQ
case INGT:
state = DONE;
if (c == '=')
currentToken =GTEQ;
else
{ if(c=='\n') lineno--;
sf.unget();
c='\0';
currentToken = GT; //otherwise it's a ">=" token
}
break;
//if current state is INGT and input character
//is '*',currentToken ='/*' ,and change state to INLCMNT
case INSLASH:
//!!!!!state = DONE;
if (c == '*')
{
currentToken =LCMNT;
state= INLCMNT;
save=false;
}
else
{ if(c=='\n') lineno--;
sf.unget();
c='\0';
currentToken=DIV;//otherwise it's a "/" token
state=DONE;
}
break;
//if current state is INNUM and input character isn't
//a digit,currentToken is a number ,change state to
//DONE,back up current char as well
case INNUM:
if(isalpha(c))
{
currentToken = ERROR;
state = ERR;
}
if (!isalpha(c)&&!isdigit(c))
{ if(c=='\n') lineno--;
sf.unget();
c='\0';
save = false;
state = DONE;
currentToken = NUM;
}
break;
//if current state is INID and input character
//isn't a letter,currentToken is a ID ,change
//state to DONE,back up current char as well
case INID:
if(isdigit(c))
{
currentToken = ERROR;
state = ERR;
}
if (!isalpha(c)&&!isdigit(c))
{ if(c=='\n') lineno--;
sf.unget();
c='\0';
save = false;
state = DONE;
currentToken = ID;
}
break;
case ERR:
if (!isalpha(c)&&!isdigit(c))
{
sf.unget();
c='\0';
state = DONE;
}
break;
case DONE:
default:
state = DONE;
currentToken = ERROR;
break;
}
if (save)
tokens =tokens+c;
if ((state==DONE))
{
if (currentToken == ID)
currentToken = reservedLookup(tokens);
flag=false;
cout<<"Line:"<<lineno<<" ";
//printToken(currentToken,tokens);
//state=START;
if(currentToken== RCMNT){scan();}
flag=true;
}
}
//}
return currentToken;
}
void Myscanner::printToken(TokenType token, string tokenString)
{
switch (token)
{
case ELSE:
case INT:
case IF:
case RETURN:
case VOID:
case WHILE:
cout<<"reserved word: "<<tokenString<<endl;break;
case ASSIGN: cout<<"="<<endl; break;
case LT: cout<<"<"<<endl; break;
case EQ: cout<<"=="<<endl; break;
case LPAREN: cout<<"("<<endl; break;
case RPAREN: cout<<")"<<endl; break;
case SEMI: cout<<";"<<endl; break;
case PLUS: cout<<"+"<<endl; break;
case MINUS:cout<<"-"<<endl; break;
case TIMES: cout<<"*"<<endl; break;
case GT: cout<<">"<<endl; break;
case DIV: cout<<"/"<<endl; break;
case LTEQ: cout<<"<="<<endl; break;
case GTEQ: cout<<">="<<endl; break;
case NEQ: cout<<"!="<<endl; break;
case LSQR: cout<<"["<<endl; break;
case RSQR: cout<<"]"<<endl; break;
case LCRLY: cout<<"{"<<endl; break;
case RCRLY: cout<<"}"<<endl; break;
case LCMNT: cout<<"/*"<<endl; break;
case RCMNT: cout<<"*/"<<endl; break;
case COMMA: cout<<","<<endl; break;
case ENDFILE: cout<<"EOF"<<endl; break;
case NUM:
cout<<"NUM, val="<<tokenString<<endl;
break;
case ID:
cout<<"ID, name="<<tokenString<<endl;
break;
case ERROR:
cout<<"ERROR: "<<tokenString<<endl;
break;
default: /* should never happen */
cout<<"ERROR: should never happen"<<endl;
}
}
TokenType Myscanner::reservedLookup (string str)
{
if (keywords.find(str)!=keywords.end())
{
return keywords[str];
}
return ID;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -