📄 lexer.cpp

📁 一个上课很好用的课件
💻 CPP
字号:
#include "Lexer.h"
#include <stdio.h>
#include <string.h>
#define MAXRESERVED 7
#define TOKENBUFSIZE 64
void getSingleOperator(char c, Token &token);
void keywordLookup(Token &token);
static Token ReservedWords[MAXRESERVED] = {
	{IF, "if"},
	{THEN, "then"},
	{ELSE, "else"},
	{WHILE, "while"},
	{DO, "do"},
	{BEGIN, "begin"},
	{END, "end"}
};
Lexer::Lexer(char *filename){
	buf = new char[TOKENBUFSIZE];
    FILE *fp = fopen(filename, "r");
    index = 0;
	if(fp==NULL){
        src = NULL;
		printf("\n\n********************************************\n");
		printf("* FATAL ERROR! LEXER COULD NOT OPEN FILE!!!\n");
		printf("* %s : No such file.\n", filename);
		printf("********************************************\n\n");
		length = 0;
	}else{
		int i = 0;
		while(fgetc(fp)!=EOF){
			i++;
		}
		length = i;
		src = new char[i+1];
		rewind(fp);
		i = 0;
		while(src[i] = fgetc(fp)){
			if(src[i] == EOF){
				src[i] = '\0';
				break;
			}
			i++;
		}
		fclose(fp);
	}
}
Lexer::Lexer(){
}
Lexer::~Lexer(){
	delete[] src;
}
void Lexer::reset(){
	index = 0;
}
bool Lexer::isFinished(){
	return (index == length - 1);
}
bool Lexer::isReady(){
	return (src != NULL);
}
char* Lexer::getSrc(){
	return src;
}
void Lexer::setSrc(char *s, int len){
	src = s;
	index = 0;
	length = len;
	buf = new char[TOKENBUFSIZE];
}
int  Lexer::getIndex(){
	return index;
}
/***=======================================================****/
Token Lexer::nextToken(){
	Token token;
	ScannerState state = START;
	int bufindex = 0;
	bool next = true;   // index++
	char c;
	if(index==length-1){
		token.type = ERROR;
		token.name = "NO CHAR LEFT.";
		return token;
	}
	c = src[index];
	while(c==' ' || c=='\n' || c=='\r' || c=='\t'){
		index ++;
		c = src[index];
	}
// get started
	if((c>='a' && c<='z') || (c>='A' && c<='Z')){
		state = INID;
		token.type = ID;
		buf[bufindex++] = c;
	}else if(c>='0' && c<='9'){
		state = INNUM;
		token.type = NUM;
		buf[bufindex++] = c;
	}else if(c=='='){
		state = INEQ;
	}else if(c=='<'){
		state = INLE;
	}else if(c=='>'){
		state = INGE;
	}else{
		state = DONE;
		getSingleOperator(c, token);
	}
	index ++;
	while(state!=DONE){
		c = src[index];
		switch(state){
		case INEQ:
			if(c=='='){
				token.type = EQ;
				token.name = "==";
			}else{
				token.type = ASSIGN;
				token.name = "=";
				next = false;
			}
			state = DONE;
			break;
		case INLE:
			if(c=='='){
				token.type = LE;
				token.name = "<=";
			}else{
				token.type = LT;
				token.name = "<";
				next = false;
			}
			state = DONE;
			break;
		case INGE:
			if(c=='='){
				token.type = GE;
				token.name = ">=";
			}else{
				token.type = GT;
				token.name = ">";
				next = false;
			}
			state = DONE;
			break;
		case INID:
			if((c>='a' && c<='z') || (c>='A' && c<='Z') || (c>='0' && c<='9')){
				buf[bufindex++] = c;
			}else{
				state = DONE;
				next = false;
			}
			break;
		case INNUM:
			if(c>='0' && c<='9'){
				buf[bufindex++] = c;
			}else{
				state = DONE;
				next = false;
			}
			break;
		default:
			state = DONE;
			token.type = ERROR;
			token.name = "ERROR!";
			printf("Error! Because no state is define! This should never happen! \
				Current character is: %c\n", src[index]);
			break;
		}// end scanner state
		index ++;
	}// end while
	if(next==false){
		index --;
	}
	if(bufindex == TOKENBUFSIZE - 1){
		// OUT OF BUFFER! It should never happen.
		token.type = ERROR;
		token.name = "OUT OF BUFFER!";
	}
	if(bufindex!=0){
		buf[bufindex] = '\0';
		token.name = new char[bufindex];
		strcpy(token.name, buf);
		if(token.type==ID){ // looking for reserved word and set the right type
			keywordLookup(token);
		}
	}
	return token;
}
// looking for reserved word and set the right type
void keywordLookup(Token &token){
	for(int i=0;i<MAXRESERVED;i++){
		if(strcmp(token.name, ReservedWords[i].name)==0){
			token.type = ReservedWords[i].type;
			if(token.type == BEGIN)
				token.name = "{";
			if(token.type == END)
				token.name = "}";
			break;
		}
	}
}
void getSingleOperator(char c, Token &token){
	switch(c){
	case '{':
		token.type = BEGIN;
		token.name = "{";
		break;
	case '}':
		token.type = END;
		token.name = "}";
		break;
	case '+':
        token.type = PLUS;
        token.name = "+";
        break;
    case '-':
        token.type = MINUS;
        token.name = "-";
        break;
    case '*':
        token.type = MUL;
        token.name = "*";
        break;
    case '/':
        token.type = DIV;
        token.name = "/";
        break;
    case '#':
    case '\0':  // end of file
        token.type = LEXER_DONE;
        token.name = "FINISH";
        break;
    case '&':
        token.type = AND;
        token.name = "&";
        break;
    case '|':
        token.type = OR;
        token.name = "|";
        break;
    case ';':
        token.type = SEMI;
        token.name = ";";
        break;
    default:
        token.type = ERROR;
        token.name = "ERROR! Unknown character.";
        printf("Error occured when state=START! Current character is: %c\n", c);
        break;
	}
}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -