📄 scan.cpp

📁 手工构造C-词法分析器
💻 CPP
字号:
#include <iostream>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>

using namespace std;

typedef enum 
	{START,INCOMMENT,INCOMMENTA,INNUM,INID,DONE,
	 STATEA,STATEB,STATEC,STATED,STATEE,STATEF}
	StateType;

typedef enum
	{ERROR, ENDFILE, 
	 ELSE, IF, INT, RETURN, VOID, WHILE,
	 ID, NUM,	 
     // + - * / < <= > >= == != =
	 PLUS, MINUS, TIMES, OVER, LESS, LESSEQ, MORE, MOREEQ, EQ, NOTEQ, ASSIGN,
	 // ; , ( ) { } [ ] /* */
	 SEMI, COMMA, LP, RP, LB, RB, LSB, RSB, LCOMMENT, RCOMMENT
	 }TokenType;

FILE* source;
FILE* listing;

#define MAXTOKENLEN 40
#define MAXRESERVED 6
#define BUFLEN 256

#define FALSE 0
#define TRUE 1

char tokenString[MAXTOKENLEN+1];
char lineBuf[BUFLEN];
TokenType getToken(void);
void printToken( TokenType, const char* );


int linepos = 0;
int bufsize = 0;
int lineno = 0;
int EOF_Flag = FALSE;


static char getNextChar(void) 
{ 
	if(!(linepos<bufsize)) 
	{ 
		lineno++; 
		if (fgets(lineBuf,BUFLEN-1,source)) 
		{ 
			fprintf(listing,"%d:%s",lineno,lineBuf); 
			bufsize = strlen(lineBuf); 
			linepos = 0; 
			return lineBuf[linepos++]; 
		} 
		else{
			EOF_Flag = TRUE;
			return EOF;
		} 
	} 
	else return lineBuf[linepos++]; 
} 

//ungetNextChar backtracks one character in linebuf 
static void ungetNextChar(void) 
{ 
	linepos--; 
} 

//lookup table of reserved words 
static struct 
{ 
	char* str; 
	TokenType tok; 
}reservedWords[MAXRESERVED] 
	= {{"if",IF}, {"else",ELSE}, {"int",INT}, {"return",RETURN},{"void",VOID},{"while",WHILE}}; 

//lookup an identifier to see if it is a reserved word 
//uses linear search 
static TokenType reservedLookup(char* s) 
{ 
	int i; 
	for(i=0;i<MAXRESERVED;i++) 
	if(!strcmp(s,reservedWords[i].str)) 
		return reservedWords[i].tok; 
	return ID; 
} 




// the primary function of the scanner 

//function getToken returns the next token in source file 
TokenType getToken(void) 
{ 

	int tokenStringIndex = 0; 
	TokenType currentToken;  
	StateType state = START; 
	int save; 

	while (state!=DONE) 
	{ 
		char c = getNextChar(); 
		save = TRUE; 
		switch(state) 
		{ 
		case START: 
			if(isdigit(c)) 
				state = INNUM; 
			else if(isalpha(c)) 
				state = INID; 
			else if((c==' ')||(c=='\t')||(c=='\n')) 
				save = FALSE;  
			else if(c=='/'){
				save = FALSE;
				state = STATEA;
			}
			else if(c=='<')
				state = STATEB;
			else if(c=='>')
				state = STATEC;
			else if(c=='!')
				state = STATED;
			else if(c=='=')
				state = STATEE;
			else if(c=='*')
				state = STATEF;
			else{ 
				state = DONE; 
				switch(c) 
				{ 
				case EOF: 
					save = FALSE; 
					currentToken = ENDFILE; 
					break; 
				case ';': 
					currentToken = SEMI; 
					break; 
				case ',': 
					currentToken = COMMA; 
					break; 
				case '+': 
					currentToken = PLUS; 
					break; 
				case '-': 
					currentToken = MINUS; 
					break; 
				case '(': 
					currentToken = LP; 
					break; 
				case ')': 
					currentToken = RP; 
					break; 
				case '[': 
					currentToken = LSB; 
					break; 
				case ']': 
					currentToken = RSB; 
					break; 
				case '{': 
					currentToken = LB; 
					break; 
				case '}': 
					currentToken = RB; 
					break; 
				default: 
					currentToken = ERROR; 
					break; 
				} 
			} 
			break; 
		case STATEA:
			if(c=='*'){
				save = FALSE;
				state = INCOMMENT;
			}
			else{
				state = DONE;
				ungetNextChar();
				save = FALSE;
				currentToken = OVER;
			}
			break;
		case STATEB:
			state = DONE;
			if(c=='='){
				currentToken = LESSEQ;
			}
			else{
				ungetNextChar();
				save = FALSE;
				currentToken = LESS;
			}
			break;
		case STATEC:
			state = DONE;
			if(c=='='){
				currentToken = MOREEQ;
			}
			else{
				ungetNextChar();
				save = FALSE;
				currentToken = MORE;
			}
			break;
		case STATED:
			state = DONE;
			if(c=='='){
				currentToken = NOTEQ;
			}
			else{
				ungetNextChar();
				save = FALSE;
				currentToken = ERROR;
			}
			break;
		case STATEE:
			state = DONE;
			if(c=='='){
				currentToken = EQ;
			}
			else{
				ungetNextChar();
				save = FALSE;
				currentToken = ASSIGN;
			}
			break;
		case STATEF:
			if(c=='/'){
				save = FALSE;
				state = START;
			}
			else{
				state = DONE;
				ungetNextChar();
				save = FALSE;
				currentToken = TIMES;
			}
			break;		
		case INCOMMENT:
			save = FALSE;
			if (c==EOF) {
				state = DONE;
				currentToken = ENDFILE;
			}
			else if(c=='*')
				state = INCOMMENTA;  
			break; 
		case INCOMMENTA:
			save = FALSE;
			if (c=='/') 
				state = START; 
			else if (c=='*')
				state = INCOMMENTA;
			else
				state = INCOMMENT;
			break;
		case INNUM: 
			if(!isdigit(c)) 
			{ 
				//backup in the input 
				ungetNextChar(); 
				save = FALSE; 
				state = DONE; 
				currentToken = NUM; 
			} 
			break; 
		case INID: 
			if(!isalpha(c)) 
			{ 
				//backup in the input 
				ungetNextChar(); 
				save = FALSE; 
				state = DONE; 
				currentToken = ID; 
			} 
			break; 
		case DONE: 
		default:  //should never happen 
			fprintf(listing, "Scanner Bug: state = %d\n",state); 
			state = DONE; 
			currentToken = ERROR; 
			break; 
		} 
		
		if((save)&&(tokenStringIndex <= MAXTOKENLEN)) 
			tokenString[tokenStringIndex++] = c; 
		if (state == DONE) 
		{ 
			tokenString[tokenStringIndex] = '\0'; 
			if(currentToken == ID) 
				currentToken = reservedLookup(tokenString); 
		} 
	} 

	fprintf(listing,"\t%d:",lineno); 
	printToken(currentToken,tokenString); 

	return currentToken; 
}//end getToken 

void printToken( TokenType token, const char* tokenString)
{
	switch(token) {
	case IF:
	case ELSE:
	case INT:
	case RETURN:
	case VOID:
	case WHILE:
		fprintf(listing, "reserved word: %s\n", tokenString);
		break;
	case PLUS: fprintf(listing, "+\n");		break;
	case MINUS: fprintf(listing, "-\n");	break;
	case TIMES: fprintf(listing, "*\n");	break;
	case OVER: fprintf(listing, "/\n");		break;
	case LESS: fprintf(listing, "<\n");		break;
	case LESSEQ: fprintf(listing, "<=\n");	break;
	case MORE: fprintf(listing, ">\n"); 	break;
	case MOREEQ: fprintf(listing, ">=\n");	break;
	case EQ: fprintf(listing, "==\n");		break;
	case NOTEQ: fprintf(listing, "!=\n");	break;
	case ASSIGN: fprintf(listing, "=\n");   break;
	case SEMI: fprintf(listing, ";\n");		break;
	case COMMA: fprintf(listing, ",\n");	break;
	case LP: fprintf(listing, "(\n");		break;
	case RP: fprintf(listing, ")\n");		break;
	case LB: fprintf(listing, "{\n");		break;
	case RB: fprintf(listing, "}\n");		break;
	case LSB: fprintf(listing, "[\n");		break;
	case RSB: fprintf(listing, "]\n");		break;
	case ENDFILE: fprintf(listing, "EOF\n"); break;
	case NUM:
		fprintf( listing, "NUM, val= %s\n", tokenString);
		break;
	case ID:
		fprintf( listing, "ID, name= %s\n", tokenString);
		break;
	case ERROR:
		fprintf( listing, "ERROR: %s\n", tokenString);
		break;
	default:
		fprintf(listing, "Unknow token: %d\n", token);
	}
}



void main()
{
    char pgm[20] = "1.txt" ;
	source = fopen(pgm,"r");
	if (source == NULL)
	{
		fprintf(stderr,"File %s not found\n",pgm);
		exit(1);
	}
	listing = stdout;
	fprintf(listing,"\nC- COMPILATION: %s\n",pgm);
	while (getToken() != ENDFILE);
}
💿 文件大小 11 K
👤 上传用户 dingjuan_01
📂 所属分类编译器/解释器
🏷️ 相关标签

#分析器
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -