📄 lexer.cpp
字号:
#include "Lexer.h"
#include <stdio.h>
#include <string.h>
#define MAXRESERVED 7
#define TOKENBUFSIZE 64
void getSingleOperator(char c, Token &token);
void keywordLookup(Token &token);
static Token ReservedWords[MAXRESERVED] = {
{IF, "if"},
{THEN, "then"},
{ELSE, "else"},
{WHILE, "while"},
{DO, "do"},
{BEGIN, "begin"},
{END, "end"}
};
Lexer::Lexer(char *filename){
buf = new char[TOKENBUFSIZE];
FILE *fp = fopen(filename, "r");
index = 0;
if(fp==NULL){
src = NULL;
printf("\n\n********************************************\n");
printf("* FATAL ERROR! LEXER COULD NOT OPEN FILE!!!\n");
printf("* %s : No such file.\n", filename);
printf("********************************************\n\n");
length = 0;
}else{
int i = 0;
while(fgetc(fp)!=EOF){
i++;
}
length = i;
src = new char[i+1];
rewind(fp);
i = 0;
while(src[i] = fgetc(fp)){
if(src[i] == EOF){
src[i] = '\0';
break;
}
i++;
}
fclose(fp);
}
}
Lexer::Lexer(){
}
Lexer::~Lexer(){
delete[] src;
}
void Lexer::reset(){
index = 0;
}
bool Lexer::isFinished(){
return (index == length - 1);
}
bool Lexer::isReady(){
return (src != NULL);
}
char* Lexer::getSrc(){
return src;
}
void Lexer::setSrc(char *s, int len){
src = s;
index = 0;
length = len;
buf = new char[TOKENBUFSIZE];
}
int Lexer::getIndex(){
return index;
}
/***=======================================================****/
Token Lexer::nextToken(){
Token token;
ScannerState state = START;
int bufindex = 0;
bool next = true; // index++
char c;
if(index==length-1){
token.type = ERROR;
token.name = "NO CHAR LEFT.";
return token;
}
c = src[index];
while(c==' ' || c=='\n' || c=='\r' || c=='\t'){
index ++;
c = src[index];
}
// get started
if((c>='a' && c<='z') || (c>='A' && c<='Z')){
state = INID;
token.type = ID;
buf[bufindex++] = c;
}else if(c>='0' && c<='9'){
state = INNUM;
token.type = NUM;
buf[bufindex++] = c;
}else if(c=='='){
state = INEQ;
}else if(c=='<'){
state = INLE;
}else if(c=='>'){
state = INGE;
}else{
state = DONE;
getSingleOperator(c, token);
}
index ++;
while(state!=DONE){
c = src[index];
switch(state){
case INEQ:
if(c=='='){
token.type = EQ;
token.name = "==";
}else{
token.type = ASSIGN;
token.name = "=";
next = false;
}
state = DONE;
break;
case INLE:
if(c=='='){
token.type = LE;
token.name = "<=";
}else{
token.type = LT;
token.name = "<";
next = false;
}
state = DONE;
break;
case INGE:
if(c=='='){
token.type = GE;
token.name = ">=";
}else{
token.type = GT;
token.name = ">";
next = false;
}
state = DONE;
break;
case INID:
if((c>='a' && c<='z') || (c>='A' && c<='Z') || (c>='0' && c<='9')){
buf[bufindex++] = c;
}else{
state = DONE;
next = false;
}
break;
case INNUM:
if(c>='0' && c<='9'){
buf[bufindex++] = c;
}else{
state = DONE;
next = false;
}
break;
default:
state = DONE;
token.type = ERROR;
token.name = "ERROR!";
printf("Error! Because no state is define! This should never happen! \
Current character is: %c\n", src[index]);
break;
}// end scanner state
index ++;
}// end while
if(next==false){
index --;
}
if(bufindex == TOKENBUFSIZE - 1){
// OUT OF BUFFER! It should never happen.
token.type = ERROR;
token.name = "OUT OF BUFFER!";
}
if(bufindex!=0){
buf[bufindex] = '\0';
token.name = new char[bufindex];
strcpy(token.name, buf);
if(token.type==ID){ // looking for reserved word and set the right type
keywordLookup(token);
}
}
return token;
}
// looking for reserved word and set the right type
void keywordLookup(Token &token){
for(int i=0;i<MAXRESERVED;i++){
if(strcmp(token.name, ReservedWords[i].name)==0){
token.type = ReservedWords[i].type;
if(token.type == BEGIN)
token.name = "{";
if(token.type == END)
token.name = "}";
break;
}
}
}
void getSingleOperator(char c, Token &token){
switch(c){
case '{':
token.type = BEGIN;
token.name = "{";
break;
case '}':
token.type = END;
token.name = "}";
break;
case '+':
token.type = PLUS;
token.name = "+";
break;
case '-':
token.type = MINUS;
token.name = "-";
break;
case '*':
token.type = MUL;
token.name = "*";
break;
case '/':
token.type = DIV;
token.name = "/";
break;
case '#':
case '\0': // end of file
token.type = LEXER_DONE;
token.name = "FINISH";
break;
case '&':
token.type = AND;
token.name = "&";
break;
case '|':
token.type = OR;
token.name = "|";
break;
case ';':
token.type = SEMI;
token.name = ";";
break;
default:
token.type = ERROR;
token.name = "ERROR! Unknown character.";
printf("Error occured when state=START! Current character is: %c\n", c);
break;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -