📄 scan.cpp

📁 一个c-语言的词法分析器
💻 CPP
字号:

#include<string>
#include<stdio.h>
#include<ctype.h>

#include"scan.h"
#define BUFLEN 256


typedef enum
{START,INID,INNUM,INCOMMENT,DONE} StateType; //对应DFA的状态

static struct  //关键字
{
	char* str;
	TokenType tok;
} reservedWords[MAXRESERVED]
={   
	{"if",IF},{"else",ELSE},{"int",INT},
	{"return",RETURN},{"void",VOID},{"while",WHILE}
};

static char tokenString[MAXTOKENLEN + 1];
static char lineBuf[BUFLEN];
static int linepos = 0;
static int bufsize = 0;

static void readLine(char a[])   //从键盘读入一行用户输入，回车结束
{
	char b = getchar();
	int i = 0;
	while(b !='\n')
	{   
		a[i] = b;
		b = getchar();
		i++;
	}
}

void clearLineBuf()
{
	for(int i = 0; i < BUFLEN; i++)
		lineBuf[i] = NULL;
}

static char getNextChar(void)
{
	char temp = NULL;
	if(!(linepos < bufsize)) //说明tokenString中的char已处理完需要刷新
	{
		clearLineBuf();
		readLine(lineBuf);
		bufsize = strlen(lineBuf);
		linepos = 0;
		
	}
	temp = lineBuf[linepos];
	linepos++;
	return temp;
}

static ungetNextChar()
{
	linepos--;
}

static TokenType reservedLookup(char* s)
{
	for(int i = 0; i < MAXRESERVED; i++)
	{
		if(!strcmp(s,reservedWords[i].str))
			return reservedWords[i].tok;
	}

	return ID;
}

Token getToken(void)
{
	for(int x = 0; x < MAXTOKENLEN + 1 ; x++)
		tokenString[x] = NULL;
	int tokenStringIndex = 0;
	Token currentToken;
	TokenType currentTokenType;
	StateType state = START;
	boolean save;

	while(state!=DONE)
	{
		char c = getNextChar();
		save = TRUE;
		switch(state)
		{
			case START:
				if(isdigit(c))
					state = INNUM;
				else if(isalpha(c))
					state = INID;
				else if(c == ' ' || c == '\t' || c == '\n')
					save = FALSE;
				else 
				{
					state = DONE;
					switch(c)
					{
						case '+':
							currentTokenType = PLUS;
							break;
						case '-':
							currentTokenType = MINUS;
							break;
						case '*':
							currentTokenType = TIMES;
							break;
						case '/':
							if(getNextChar() != '*')
							{
								currentTokenType = DIVIDE;
								ungetNextChar();
							}
							else
							{
								state = INCOMMENT;
								save = FALSE;
							}
							break;
						case '>':
							if((c = getNextChar()) != '=')
							{
								tokenString[tokenStringIndex++] = '>';
								save = FALSE;
								currentTokenType = GT;
								ungetNextChar();
							}
							else
							{	
								tokenString[tokenStringIndex++] = '>';
								currentTokenType = GE;
							}
							break;
						case '<':
							if((c = getNextChar()) != '=')
							{
								tokenString[tokenStringIndex++] = '<';
								save = FALSE;
								currentTokenType = LT;
								ungetNextChar();
							}
							else
							{	
								tokenString[tokenStringIndex++] = '<';
								currentTokenType = LE;
							}
							break;
						case '=': 
							if(( c = getNextChar()) != '=')
							{
								tokenString[tokenStringIndex++] = '=';
								save = FALSE;
								currentTokenType = ASSIGN;
								ungetNextChar();
							}
							else
							{	
								tokenString[tokenStringIndex++] = '=';
								currentTokenType = EQ;
							}
							break;
						case '!':
							if((c = getNextChar()) != '=')
							{
								tokenString[tokenStringIndex++] = '!';
								save = FALSE;
								currentTokenType = NOT;
								ungetNextChar();
							}
							else
							{	
								tokenString[tokenStringIndex++] = '!';
								currentTokenType = NE;
							}
							break;
						case ';':
							currentTokenType = SEMICOLON;
							break;
						case ',':
							currentTokenType = COMMA;
							break;
						case '(':
							currentTokenType = LPAREN;
							break;
						case ')':
							currentTokenType = RPAREN;
							break;
						case '{':
							currentTokenType = LBRACKET;
							break;
						case '}':
							currentTokenType = RBRACKET;
							break;
						case '[':
							currentTokenType = LSBRACKET;
							break;
						case ']':
							currentTokenType = RSBRACKET;
							break;
						default:
							currentTokenType = ERROR;
							break;
					}
				}
				break;
			case INID:
				if(!(isalpha(c)||isdigit(c)))
				{
					ungetNextChar();
					save = false;
					state = DONE;
					currentTokenType = ID;
				}
				break;
			case INNUM:
				if(!isdigit(c))
				{
					ungetNextChar();
					save = false;
					state = DONE;
					currentTokenType = NUM;
				}
				break;
			case INCOMMENT:
				save = FALSE;
				while(state!=START)
				{
					if(c != '*')
					{
						state = INCOMMENT;
						c=getNextChar();
					}
					else
					{
						if( (c=getNextChar()) == '/')
							state = START;
						else
						{
							state = INCOMMENT;
						}
					}
					
				}
				break;
			
			case DONE:
			default:
				printf("Scanner Error, state = %d\n",state);
				state = DONE;
				currentTokenType = ERROR;
				break;
	   }
	   if((save) && (tokenStringIndex <= MAXTOKENLEN))
		   tokenString[tokenStringIndex++] = c;
	   if(state == DONE)
	   {
		   tokenString[tokenStringIndex] = '\0';
		   if(currentTokenType == ID)
			   currentTokenType = reservedLookup(tokenString);
	   }
	}

	for(int i = 0; i <= tokenStringIndex; i++)
	{
		currentToken.value[i] = tokenString[i];
		tokenString[i] = NULL;
	}
	
	currentToken.type = currentTokenType;
	return currentToken;
}

int main()
{
	

	while(1)
	{
		Token temp = getToken();
		int i = (int)temp.type;
		printf("Type: %d \t Value: %s\n", i, temp.value);
	}
	return 0;
}
💿 文件大小 11 K
👤 上传用户 zhangtaoai007
📂 所属分类编译器/解释器
🏷️ 相关标签

#语言 #分析器
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -