⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 scan.cpp

📁 编译原理课程设计 词,语法分析器 用C++手工编写
💻 CPP
字号:
#include "globals.h"
#include "util.h"
#include "scan.h"

//	states in scanner DFA
typedef enum
//	{START, INASSIGN, INCOMMENT, INNUM, INID, DONE}
	{START, INNUM, INID, INASSIGN, GREATER, LESS, NOT, DIVISION, INCOMMENT, EXPLAIN, DONE}
	StateType;

//	lexeme of identifier or reserved word
char tokenString[MAXTOKENLEN + 1];

#define BUFLEN 1024

static char lineBuf[BUFLEN];
static int linepos = 0;
static int bufsize = 0;

static char getNextChar(void)
{
	if (!(linepos < bufsize))
	{
		lineno++;
		if (fgets(lineBuf, BUFLEN - 1, source))
		{
			if (EchoSource) fprintf(listing, "%4d: %s", lineno, lineBuf);
			bufsize = strlen(lineBuf);
			linepos = 0;
			return lineBuf[linepos++];

		}
		else return EOF;
	}
	else return lineBuf[linepos++];
}




static void ungetNextChar(void)
{

	linepos--;
}


//	lookup table of reserved words
static struct
{
	char* str;
	TokenType tok;
}reservedWords[MAXRESERVED] = 
{	{"else", ELSE},				{"if", IF},			{"int", INT},	 {"return", RETURN},
	{"void", VOID},		{"while", WHILE},
};



//	lookup an identifier to see if it is a reserved word
static TokenType reservedLookup( char* s)
{
	int i;
	for (i = 0; i < MAXRESERVED; i++)
	{
		if (!strcmp(s, reservedWords[i].str))
			return reservedWords[i].tok;
	}
	return ID;
}

/****************************************/
/*	the primary function of the scanner */
/****************************************/

TokenType getToken( void )
{
	/*	index for storing into tokenString		*/
	int tokenStringIndex = 0;
	/*	holds current token to be returned		*/	
	TokenType currentToken;
	/*	current state -always begins at START	*/
	StateType state = START;
	/*	flag to indicate save to tokenString	*/
	int save;

	while (state != DONE)
	{
		char c = getNextChar();
		save = TRUE;
		switch (state)
		{
		case START:
			if (isdigit(c))
				state = INNUM;
			else if (isalpha(c))
				state = INID;
			else if (c == '=')
				state = INASSIGN;
			else if(c == '>')
                state = GREATER;
            else if(c == '<')
                state = LESS;
            else if(c == '!')
                state = NOT;
			else if ((c == ' ') || (c == '\t') || (c == '\n'))
				save = FALSE;
			else if(c == '/')
			{
				save = FALSE;
				state = DIVISION;
			}
			else
			{
				state = DONE;
				switch(c)
				{
				case EOF:
					save = FALSE;
					currentToken = ENDFILE;
					break;
				case '+':
					currentToken = PLUS;
					break;
				case '-':
					currentToken = MINUS;
					break;
				case '*':
					currentToken = TIMES;
					break;
			    case ';':
                   currentToken = SEMI;
                   break;  
                case ',':
                   currentToken = COMMA;
                   break;  
                case '(':
                   currentToken = LPAREN;
                   break;  
                case ')':
                   currentToken = RPAREN;
                   break;  
                case '[':
                currentToken = LSPAREN;
                   break;  
                case ']':
                   currentToken = RSPAREN;
                   break;  
                case '{':
                   currentToken = LBPAREN;
                   break;  
                case '}':
                   currentToken = RBPAREN;
                   break;  
				default:
					currentToken = ERROR;
					break;
				}
			}
			break;//case (state) START
		case DIVISION:
            if(c == '*')
			{ 
				save = FALSE;
				state = INCOMMENT;
			}
	//		else if(c == ' ')
	//		{   
	//			save = FALSE;
	//			state = DONE;
	//			currentToken = DIV;
	//		} 
			else
			{ 
				ungetNextChar();
				save = FALSE;
				state = DONE;
				currentToken = DIV;//ERROR;
			 }	
            break;

		case INASSIGN:
			state = DONE;
	//		if(c == ' ')
//				currentToken = ASSIGN;
//			else if(c == '=')
			if ( c == '=')
			{
				currentToken = EQ;
			}
			else
			{
				ungetNextChar();
                save = FALSE;
                currentToken = ASSIGN;//ERROR;
			}
			break;

		case GREATER:     
            state = DONE;
            if(c == '=')
               currentToken = GEQ;
   //         else if(c == ' ')
//			{
//				currentToken = GT;
//			}
			 else
            { 
				ungetNextChar();
				save = FALSE;
				currentToken = GT;//ERROR;
            }
			break;
		 case LESS:     
            state = DONE;
            if(c == '=')
               currentToken = LEQ;
  //          else if(c == ' ')
//			{
//				currentToken = LT;
//			}
			else
            { 
				ungetNextChar();
				save = FALSE;
				currentToken = LT;//ERROR;
            }   
            break;    

		 case NOT:     
            state = DONE;
            if(c == '=')
               currentToken = NEQ;
            else
            { 
				ungetNextChar();
				save = FALSE;
				currentToken = ERROR;
            }    
            break;

		case INNUM:
			if(!isdigit(c))
			{
				ungetNextChar();
				save = FALSE;
				state = DONE;
				currentToken = NUM;
			}
			break;

		case INID:
			if (!isalpha(c))
			{
				ungetNextChar();
				save = FALSE;
				state = DONE;
				currentToken = ID;
			}
			break;

		 case INCOMMENT:
			 save = FALSE;
			 if(c == '*')
			 {
				 state = EXPLAIN;
			 }  
			 break;

		 case EXPLAIN:
			 if(c == '/')
			 { 
			   save = FALSE;
			   state = START;
			 }
			 else if(c == '*')
			 { 
			   save = FALSE;
			   state = EXPLAIN;
			 }

			 else
			 {
				 save = FALSE;
				 state = INCOMMENT;
			 }
			 break;
		case DONE:
		default:
			fprintf(listing, "Scanner Bug: state = %d\n", state);
			state = DONE;
			currentToken = ERROR;
			break;
		}
		if ((save) && (tokenStringIndex <= MAXTOKENLEN))
			tokenString[tokenStringIndex++] = c;
		if (state == DONE)
		{
			tokenString[tokenStringIndex] = '\0';
			if (currentToken == ID)
				currentToken = reservedLookup(tokenString);
		}
	}
	if (TraceScan)
	{
		fprintf(listing, "\t%d: ", lineno);
		printToken(currentToken, tokenString);
	}
	return currentToken;
} /*	end getToken	*/

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -