⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 scan.c

📁 SIP软件开发记录 由于通信网的封闭性
💻 C
字号:
/* scan.c
 * The scanner implementation for TINY compiler
 *
 * Compiler Construction: Principles and Practice
 * Kenneth C. Louden
 * 编译原理及实践
 * (美) Kenneth C. Louden 著
 * 冯博琴 冯岚 等译
 * 机械工业出版社 IBSN 7-111-07703-2
 * 源代码:zwf编辑并修订
 * Code Modify: 
 */
#include "globals.h"
#include "util.h"
#include "scan.h"

/* states in scanner DFA */
typedef enum {
	START, INASSIGN, INCOMMENT, INNUM, INID, DONE, INLT, INGT
} StateType;					/* used INLT INGT to scan <= >= and <> Larry */

/* lexeme of identifier or reserved word */
char tokenString[MAXTOKENLEN + 1];

/* BUFLEN = length of the input buffer for source code lines */
#define BUFLEN 256

static char lineBuf[BUFLEN];	/* holds the current line */
static int linepos = 0;			/* current position in lineBuf */
static int bufsize = 0;			/* current size of buffer string */

/* getNextChar fetches the next non-blank character
 * from lineBuf, reading in a new line if lineBuf is
 * exhausted
 */
static char getNextChar(void)
{
	if (!(linepos < bufsize)) {
		lineno++;
		if (fgets(lineBuf, BUFLEN - 1, source)) {
			if (EchoSource)
				fprintf(listing, "%4d: %s", lineno, lineBuf);
			bufsize = strlen(lineBuf);
			linepos = 0;
			return lineBuf[linepos++];
		} else
			return EOF;
	} else
		return lineBuf[linepos++];
}

/* ungetNextChar backtracks one character in lineBuf */
static void ungetNextChar(void)
{
	linepos--;
}

/* lookup table of reserved words */
static struct {
	char *str;
	TokenType tok;
} reservedWords[MAXRESERVED] = {
	{"if", IF},
	{"then", THEN},
	{"else", ELSE},
	{"end", END},
	{"repeat", REPEAT},
	{"until", UNTIL},
	{"read", READ},
	{"write", WRITE},
	{"如果", IF},
	{"那么", THEN},
	{"否则", ELSE},
	{"结束", END},
	{"循环", REPEAT},
	{"直到", UNTIL},
	{"读取", READ},
	{"输出", WRITE}				/* Chinese GB2312 support */
};

/* lookup an identifier to see if it is a reserved word */
static TokenType reservedLookup(char *s)
{
	int i;

	for (i = 0; i < MAXRESERVED; i++)
		if (!strcmp(s, reservedWords[i].str))
			return reservedWords[i].tok;
	return ID;
}

/* the primary function of the scanner */

/* function getToken returns the
 * next token in source file
 */
TokenType getToken(void)
{
	int tokenStrIdx = 0;		/* index for storing into tokenString */
	TokenType curToken;			/* holds current token to be returned */
	StateType state = START;	/* current state - always begins at START */
	int save;					/* flag to indicate save to tokenString */

	while (state != DONE) {
		char c = getNextChar();

		save = TRUE;
		switch (state) {
		case START:
			if (isnum(c))
				state = INNUM;
			else if (isname(c))
				state = INID;
			else if (c == ':')
				state = INASSIGN;
			else if (c == '<')	/* used INLT INGT to scan <= >= and <>, Larry */
				state = INLT;
			else if (c == '>')	/* see ahead, Larry */
				state = INGT;
			else if ((c == ' ') || (c == '\t') || (c == '\n'))
				save = FALSE;
			else if (c == '{') {
				save = FALSE;
				state = INCOMMENT;
			} else {
				state = DONE;
				switch (c) {
				case EOF:
					save = FALSE;
					curToken = ENDFILE;
					break;
				case '=':
					curToken = EQ;
					break;
				case '<':		/* nul, old code, Larry */
					curToken = LT;
					break;
				case '>':		/* nul, old code, Larry */
					curToken = GT;
					break;
				case '+':
					curToken = PLUS;
					break;
				case '-':
					curToken = MINUS;
					break;
				case '*':
					curToken = TIMES;
					break;
				case '/':
					curToken = OVER;
					break;
				case '(':
					curToken = LPAREN;
					break;
				case ')':
					curToken = RPAREN;
					break;
				case ';':
					curToken = SEMI;
					break;
				default:
					curToken = ERROR;
					break;
				}
			}
			break;
		case INCOMMENT:
			save = FALSE;
			if (c == '}')
				state = START;
			break;
		case INASSIGN:
			state = DONE;
			if (c == '=')
				curToken = ASSIGN;
			else {
				ungetNextChar();	/* backup in the input */
				save = FALSE;
				curToken = ERROR;
			}
			break;
		case INLT:				/* < <= <> Larry */
			state = DONE;
			if (c == '=')
				curToken = LE;	/* <= */
			else if (c == '>')
				curToken = NE;	/* <> */
			else {
				ungetNextChar();
				curToken = LT;	/* < */
			}
			break;
		case INGT:				/* > >= Larry */
			state = DONE;
			if (c == '=')
				curToken = GE;	/* >= */
			else {
				ungetNextChar();
				curToken = GT;	/* > */
			}					/* no >< */
			break;
		case INNUM:
			if (!isnum(c)) {
				ungetNextChar();	/* backup in the input */
				save = FALSE;
				state = DONE;
				curToken = NUM;
			}
			break;
		case INID:
			if (!isname(c)) {
				ungetNextChar();	/* backup in the input */
				save = FALSE;
				state = DONE;
				curToken = ID;
			}
			break;
		case DONE:
		default:
#ifdef CHINESE
			fprintf(listing, "扫描错误: 状态= %d\n", state);
#else
			fprintf(listing, "Scanner Bug: state= %d\n", state);
#endif
			state = DONE;
			curToken = ERROR;
			break;
		}
		if ((save) && (tokenStrIdx <= MAXTOKENLEN))
			tokenString[tokenStrIdx++] = c;
		if (state == DONE) {
			tokenString[tokenStrIdx++] = '\0';
			if (curToken == ID)
				curToken = reservedLookup(tokenString);
		}
	}
	if (TraceScan) {
		fprintf(listing, "\t%d: ", lineno);
		printToken(curToken, tokenString);
	}
	return curToken;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -