📄 scan.c
字号:
/* scan.c
* The scanner implementation for TINY compiler
*
* Compiler Construction: Principles and Practice
* Kenneth C. Louden
* 编译原理及实践
* (美) Kenneth C. Louden 著
* 冯博琴 冯岚 等译
* 机械工业出版社 IBSN 7-111-07703-2
* 源代码:zwf编辑并修订
* Code Modify:
*/
#include "globals.h"
#include "util.h"
#include "scan.h"
/* states in scanner DFA */
typedef enum {
START, INASSIGN, INCOMMENT, INNUM, INID, DONE, INLT, INGT
} StateType; /* used INLT INGT to scan <= >= and <> Larry */
/* lexeme of identifier or reserved word */
char tokenString[MAXTOKENLEN + 1];
/* BUFLEN = length of the input buffer for source code lines */
#define BUFLEN 256
static char lineBuf[BUFLEN]; /* holds the current line */
static int linepos = 0; /* current position in lineBuf */
static int bufsize = 0; /* current size of buffer string */
/* getNextChar fetches the next non-blank character
* from lineBuf, reading in a new line if lineBuf is
* exhausted
*/
static char getNextChar(void)
{
if (!(linepos < bufsize)) {
lineno++;
if (fgets(lineBuf, BUFLEN - 1, source)) {
if (EchoSource)
fprintf(listing, "%4d: %s", lineno, lineBuf);
bufsize = strlen(lineBuf);
linepos = 0;
return lineBuf[linepos++];
} else
return EOF;
} else
return lineBuf[linepos++];
}
/* ungetNextChar backtracks one character in lineBuf */
static void ungetNextChar(void)
{
linepos--;
}
/* lookup table of reserved words */
static struct {
char *str;
TokenType tok;
} reservedWords[MAXRESERVED] = {
{"if", IF},
{"then", THEN},
{"else", ELSE},
{"end", END},
{"repeat", REPEAT},
{"until", UNTIL},
{"read", READ},
{"write", WRITE},
{"如果", IF},
{"那么", THEN},
{"否则", ELSE},
{"结束", END},
{"循环", REPEAT},
{"直到", UNTIL},
{"读取", READ},
{"输出", WRITE} /* Chinese GB2312 support */
};
/* lookup an identifier to see if it is a reserved word */
static TokenType reservedLookup(char *s)
{
int i;
for (i = 0; i < MAXRESERVED; i++)
if (!strcmp(s, reservedWords[i].str))
return reservedWords[i].tok;
return ID;
}
/* the primary function of the scanner */
/* function getToken returns the
* next token in source file
*/
TokenType getToken(void)
{
int tokenStrIdx = 0; /* index for storing into tokenString */
TokenType curToken; /* holds current token to be returned */
StateType state = START; /* current state - always begins at START */
int save; /* flag to indicate save to tokenString */
while (state != DONE) {
char c = getNextChar();
save = TRUE;
switch (state) {
case START:
if (isnum(c))
state = INNUM;
else if (isname(c))
state = INID;
else if (c == ':')
state = INASSIGN;
else if (c == '<') /* used INLT INGT to scan <= >= and <>, Larry */
state = INLT;
else if (c == '>') /* see ahead, Larry */
state = INGT;
else if ((c == ' ') || (c == '\t') || (c == '\n'))
save = FALSE;
else if (c == '{') {
save = FALSE;
state = INCOMMENT;
} else {
state = DONE;
switch (c) {
case EOF:
save = FALSE;
curToken = ENDFILE;
break;
case '=':
curToken = EQ;
break;
case '<': /* nul, old code, Larry */
curToken = LT;
break;
case '>': /* nul, old code, Larry */
curToken = GT;
break;
case '+':
curToken = PLUS;
break;
case '-':
curToken = MINUS;
break;
case '*':
curToken = TIMES;
break;
case '/':
curToken = OVER;
break;
case '(':
curToken = LPAREN;
break;
case ')':
curToken = RPAREN;
break;
case ';':
curToken = SEMI;
break;
default:
curToken = ERROR;
break;
}
}
break;
case INCOMMENT:
save = FALSE;
if (c == '}')
state = START;
break;
case INASSIGN:
state = DONE;
if (c == '=')
curToken = ASSIGN;
else {
ungetNextChar(); /* backup in the input */
save = FALSE;
curToken = ERROR;
}
break;
case INLT: /* < <= <> Larry */
state = DONE;
if (c == '=')
curToken = LE; /* <= */
else if (c == '>')
curToken = NE; /* <> */
else {
ungetNextChar();
curToken = LT; /* < */
}
break;
case INGT: /* > >= Larry */
state = DONE;
if (c == '=')
curToken = GE; /* >= */
else {
ungetNextChar();
curToken = GT; /* > */
} /* no >< */
break;
case INNUM:
if (!isnum(c)) {
ungetNextChar(); /* backup in the input */
save = FALSE;
state = DONE;
curToken = NUM;
}
break;
case INID:
if (!isname(c)) {
ungetNextChar(); /* backup in the input */
save = FALSE;
state = DONE;
curToken = ID;
}
break;
case DONE:
default:
#ifdef CHINESE
fprintf(listing, "扫描错误: 状态= %d\n", state);
#else
fprintf(listing, "Scanner Bug: state= %d\n", state);
#endif
state = DONE;
curToken = ERROR;
break;
}
if ((save) && (tokenStrIdx <= MAXTOKENLEN))
tokenString[tokenStrIdx++] = c;
if (state == DONE) {
tokenString[tokenStrIdx++] = '\0';
if (curToken == ID)
curToken = reservedLookup(tokenString);
}
}
if (TraceScan) {
fprintf(listing, "\t%d: ", lineno);
printToken(curToken, tokenString);
}
return curToken;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -