📄 scan.cpp

📁 编译器模拟程序
💻 CPP
字号:
#include "scan.h"

/* source line number for listing */
int lineno; 
/* states in scanner DFA */
typedef enum
{ START,INASSIGN,INNOTASSIGN,INLEQ,INBEQ,INCOMMENT,INNUM,INID,DONE }
StateType;

static char lineBuf[BUFLEN]; /* holds the current line */
static int linepos = 0; /* current position in LineBuf */
static int bufsize = 0; /* current size of buffer string */
static int EOF_flag = false; /* corrects ungetNextChar behavior on EOF */

tokenForUse::tokenForUse(TokenType t,char c){

	type = t;
	memcpy(tokenValue, &c, 1);
	tokenValue[1] = '\0';
}
treenode::treenode(){

	this->leftChild = NULL;
	this->rightChild = NULL;
}
treenode::treenode(tokenForUse &t){

	this->content = t;
	this->leftChild = NULL;
	this->rightChild = NULL;
}
/* getNextChar fetches the next non-blank character
from lineBuf, reading in a new line if lineBuf is exhausted */
int compiler::getNextChar(void){ 
	
	if (!(linepos < bufsize)){ 
		
		lineno++;
		//fgets函数的功能是从指定的文件中读一个字符串到字符数组中，函数调用的形式为： 
		//fgets(字符数组名，n，文件指针)；在读入的最后一个字符后加上串结束标志'\0'。  
		if (fgets(lineBuf,BUFLEN-1,source))
			// fprintf(文件指针，格式字符串，输出表列)
			//将输出表列中的内容存入文件指针指向的文件
		{ 
			bufsize = strlen(lineBuf);
			linepos = 0;
			return lineBuf[linepos++];
		}
		else
		{ 
			
			EOF_flag = true;
			return EOF;
		}
	}
	else return lineBuf[linepos++];
}

void compiler::printToken(TokenType ty, const char* str){
	
	switch(ty){
		
	case IF:
	case ELSE:
	case INT:
	case RETURN:
	case VOID:
	case WHILE:
		fprintf(listing, "reserved word:  %s\n", str);
		break;
	case ASSIGN : fprintf(listing, "=\n");
		break;
	case NOTASSIGN :fprintf(listing, "!=\n");
		break;
	case EQ :fprintf(listing, "==\n");
		break;
	case LT:fprintf(listing, "<\n");
		break;
	case BT :fprintf(listing, ">\n");
		break;
	case PLUS :fprintf(listing, "+\n");
		break;
	case MINUS : fprintf(listing, "-\n");
		break;
	case TIMES :fprintf(listing, "*\n");
		break;
	case OVER:fprintf(listing, "/\n");
		break;
	case LPAREN :fprintf(listing, "(\n");
		break;
	case RPAREN :fprintf(listing, ")\n");
		break;
	case LFK :fprintf(listing, "[\n");
		break;
	case RFK :fprintf(listing, "]\n");
		break;
	case LDK :fprintf(listing, "{\n");
		break;
	case RDK :fprintf(listing, "}\n");
		break;
	case SEMI :fprintf(listing, ";\n");
		break;
	case LEQ: fprintf(listing, "<=\n");
		break;
	case BEQ: fprintf(listing, ">=\n");
		break;
	case COMMA: fprintf(listing, ",\n");
		break;
	case ENDFILE: fprintf(listing, "EOF\n");
		break;
	case NUM: fprintf(listing, "NUM val = %s\n", str);
		break;
	case ID: fprintf(listing, "ID name = %s\n", str);
		break;
	case ERROR: fprintf(listing, "ERROR %s\n", str);
		break;
	default:
		fprintf(listing, "nuknown token %d\n", ty);
	}
}

/* ungetNextChar backtracks one character
in lineBuf 不消耗下一个字符*/
void compiler::ungetNextChar(void){
	if (!EOF_flag) 
		
		linepos-- ;
}

/* lookup an identifier to see if it is a reserved word */
/* uses linear search */
TokenType compiler::reservedLookup (char * s){ 
	
	int i;
	for (i=0;i<MAXRESERVED;i++)
		if (!strcmp(s,reservedWords[i].str))
			return reservedWords[i].tok;
		return ID;
}

tokenForUse compiler::getToken(void){  
	
	tokenForUse token = *(new tokenForUse());
	/* index for storing into tokenString */
	int tokenStringIndex = 0;
	
	/* holds current token to be returned */
	TokenType currentToken;
	
	/* current state - always begins at START 
	StateType类型包括：START,INASSIGN,INNOTASSIGN,INLEQ,INBEQ,INCOMMENT,INNUM,INID,DONE
	*/
	StateType state = START;
	
	/* flag to indicate save to tokenString */
	bool save;
	while (state != DONE){ 
		
		int c = getNextChar();
		save = true;
		
		switch (state){
			
		case START:
			if (isdigit(c))
				
				state = INNUM;
			else 
				if (isalpha(c))
					
					state = INID;
				else 
					if (c == '=')
						
						state = INASSIGN;
					else 
						if ((c == ' ') || (c == '\t') || (c == '\n'))
							
							save = false;
						else 
							if (c == '/' && getNextChar() == '*'){ 
								
								save = false;
								state = INCOMMENT;
							}
							else if (c == '!')
								
								state = INNOTASSIGN;
							else if(c == '<'){
								
								state = INLEQ;
							}
							else if(c == '>'){
								
								state = INBEQ;
							}
							else
							{ 
								state = DONE;
								switch (c){ 
								case EOF:
									save = false;
									currentToken = ENDFILE;
									break;
								case '+':
									currentToken = PLUS;
									break;
								case '-':
									currentToken = MINUS;
									break;
								case '*':
									currentToken = TIMES;
									break;
								case '/':
									currentToken = OVER;
									break;
								case '(':
									currentToken = LPAREN;
									break;
								case ')':
									currentToken = RPAREN;
									break;
								case '[':
									currentToken = LFK;
									break;
								case ']':
									currentToken = RFK;
									break;
								case '{':
									currentToken = LDK;
									break;
								case '}':
									currentToken = RDK;
									break;
								case ';':
									currentToken = SEMI;
									break;
								case ',':
									currentToken = COMMA;
									break;
								default:
									//由于当前状态被置为接受状态，出错处理只需将currentToken = ERROR;
									currentToken = ERROR;
									break;
								}
							}
							break;
		case INCOMMENT:
			save = false;
			if (c == EOF){ 
				
				state = DONE;
				currentToken = ENDFILE;
			}
			else if (c == '*' && getNextChar() == '/'){
				
				state = START;
			}
			break;
			
		case INASSIGN:
			if(c == '='){
				
				state = DONE;
				currentToken = EQ;
			}
			else{
				
				state = DONE;
				currentToken = ASSIGN;
			}
			break;
			
		case INNOTASSIGN:
			if(c == '='){
				
				state = DONE;
				currentToken = NOTASSIGN;
			}
			else
			{ /* backup in the input 
			  不消耗当前输入的字符，并将currentToken = ERROR;
				*/
				ungetNextChar();
				save = false;
				currentToken = ERROR;
			}
			break;
		case INNUM:
			if (!isdigit(c))
			{ /* backup in the input在缓冲区中回退一个字符 */
				ungetNextChar();
				save = false;
				state = DONE;
				currentToken = NUM;
			}
			break;
		case INID:
			if (!isalpha(c))
			{ /* backup in the input在缓冲区中回退一个字符 */
				ungetNextChar();
				save = false;
				state = DONE;
				currentToken = ID;
			}
			break;
		case INLEQ:
			
			if(c == '='){
				
				state = DONE;
				currentToken = LEQ;
			}
			else{
				
				ungetNextChar();
				save = false;
				state = DONE;
				currentToken = LT;
			}
			break;
		case INBEQ:
			if(c == '='){
				
				state = DONE;
				currentToken = BEQ;
			}
			else{
				
				ungetNextChar();
				save = false;
				state = DONE;
				currentToken = BT;
			}
			break;
		case DONE:
		default: /* should never happen */
			fprintf(listing,"Scanner Bug: state= %d\n",state);
			state = DONE;
			currentToken = ERROR;
			break;
     }
	 
     if ((save) && (tokenStringIndex <= MAXTOKENLEN))
		 
		 tokenString[tokenStringIndex++] = (char) c;
	 
     if (state == DONE){ 
		 
		 tokenString[tokenStringIndex] = '\0';
		 TraceScan = true;
		 if (currentToken == ID)
			 
			 currentToken = reservedLookup(tokenString);
     }
   }
   //如果跟踪词法分析的结果则打印相应的信息
   if (TraceScan) {
	   
	   fprintf(listing,"\t%d: ",lineno);
	   
	   printToken(currentToken,tokenString);
	   
	   token.type = currentToken;
	   
	   memcpy(token.tokenValue,tokenString,MAXTOKENLEN+1);
	   return token;
   }
   return token;
} /* end getToken */
💿 文件大小 24 K
👤 上传用户 zyz5925629
📂 所属分类编译器/解释器
🏷️ 相关标签

#编译器 #模拟 #程序
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -