📄 scan.cpp
字号:
#include <iostream>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
using namespace std;
typedef enum
{START,INCOMMENT,INCOMMENTA,INNUM,INID,DONE,
STATEA,STATEB,STATEC,STATED,STATEE,STATEF}
StateType;
typedef enum
{ERROR, ENDFILE,
ELSE, IF, INT, RETURN, VOID, WHILE,
ID, NUM,
// + - * / < <= > >= == != =
PLUS, MINUS, TIMES, OVER, LESS, LESSEQ, MORE, MOREEQ, EQ, NOTEQ, ASSIGN,
// ; , ( ) { } [ ] /* */
SEMI, COMMA, LP, RP, LB, RB, LSB, RSB, LCOMMENT, RCOMMENT
}TokenType;
FILE* source;
FILE* listing;
#define MAXTOKENLEN 40
#define MAXRESERVED 6
#define BUFLEN 256
#define FALSE 0
#define TRUE 1
char tokenString[MAXTOKENLEN+1];
char lineBuf[BUFLEN];
TokenType getToken(void);
void printToken( TokenType, const char* );
int linepos = 0;
int bufsize = 0;
int lineno = 0;
int EOF_Flag = FALSE;
static char getNextChar(void)
{
if(!(linepos<bufsize))
{
lineno++;
if (fgets(lineBuf,BUFLEN-1,source))
{
fprintf(listing,"%d:%s",lineno,lineBuf);
bufsize = strlen(lineBuf);
linepos = 0;
return lineBuf[linepos++];
}
else{
EOF_Flag = TRUE;
return EOF;
}
}
else return lineBuf[linepos++];
}
//ungetNextChar backtracks one character in linebuf
static void ungetNextChar(void)
{
linepos--;
}
//lookup table of reserved words
static struct
{
char* str;
TokenType tok;
}reservedWords[MAXRESERVED]
= {{"if",IF}, {"else",ELSE}, {"int",INT}, {"return",RETURN},{"void",VOID},{"while",WHILE}};
//lookup an identifier to see if it is a reserved word
//uses linear search
static TokenType reservedLookup(char* s)
{
int i;
for(i=0;i<MAXRESERVED;i++)
if(!strcmp(s,reservedWords[i].str))
return reservedWords[i].tok;
return ID;
}
// the primary function of the scanner
//function getToken returns the next token in source file
TokenType getToken(void)
{
int tokenStringIndex = 0;
TokenType currentToken;
StateType state = START;
int save;
while (state!=DONE)
{
char c = getNextChar();
save = TRUE;
switch(state)
{
case START:
if(isdigit(c))
state = INNUM;
else if(isalpha(c))
state = INID;
else if((c==' ')||(c=='\t')||(c=='\n'))
save = FALSE;
else if(c=='/'){
save = FALSE;
state = STATEA;
}
else if(c=='<')
state = STATEB;
else if(c=='>')
state = STATEC;
else if(c=='!')
state = STATED;
else if(c=='=')
state = STATEE;
else if(c=='*')
state = STATEF;
else{
state = DONE;
switch(c)
{
case EOF:
save = FALSE;
currentToken = ENDFILE;
break;
case ';':
currentToken = SEMI;
break;
case ',':
currentToken = COMMA;
break;
case '+':
currentToken = PLUS;
break;
case '-':
currentToken = MINUS;
break;
case '(':
currentToken = LP;
break;
case ')':
currentToken = RP;
break;
case '[':
currentToken = LSB;
break;
case ']':
currentToken = RSB;
break;
case '{':
currentToken = LB;
break;
case '}':
currentToken = RB;
break;
default:
currentToken = ERROR;
break;
}
}
break;
case STATEA:
if(c=='*'){
save = FALSE;
state = INCOMMENT;
}
else{
state = DONE;
ungetNextChar();
save = FALSE;
currentToken = OVER;
}
break;
case STATEB:
state = DONE;
if(c=='='){
currentToken = LESSEQ;
}
else{
ungetNextChar();
save = FALSE;
currentToken = LESS;
}
break;
case STATEC:
state = DONE;
if(c=='='){
currentToken = MOREEQ;
}
else{
ungetNextChar();
save = FALSE;
currentToken = MORE;
}
break;
case STATED:
state = DONE;
if(c=='='){
currentToken = NOTEQ;
}
else{
ungetNextChar();
save = FALSE;
currentToken = ERROR;
}
break;
case STATEE:
state = DONE;
if(c=='='){
currentToken = EQ;
}
else{
ungetNextChar();
save = FALSE;
currentToken = ASSIGN;
}
break;
case STATEF:
if(c=='/'){
save = FALSE;
state = START;
}
else{
state = DONE;
ungetNextChar();
save = FALSE;
currentToken = TIMES;
}
break;
case INCOMMENT:
save = FALSE;
if (c==EOF) {
state = DONE;
currentToken = ENDFILE;
}
else if(c=='*')
state = INCOMMENTA;
break;
case INCOMMENTA:
save = FALSE;
if (c=='/')
state = START;
else if (c=='*')
state = INCOMMENTA;
else
state = INCOMMENT;
break;
case INNUM:
if(!isdigit(c))
{
//backup in the input
ungetNextChar();
save = FALSE;
state = DONE;
currentToken = NUM;
}
break;
case INID:
if(!isalpha(c))
{
//backup in the input
ungetNextChar();
save = FALSE;
state = DONE;
currentToken = ID;
}
break;
case DONE:
default: //should never happen
fprintf(listing, "Scanner Bug: state = %d\n",state);
state = DONE;
currentToken = ERROR;
break;
}
if((save)&&(tokenStringIndex <= MAXTOKENLEN))
tokenString[tokenStringIndex++] = c;
if (state == DONE)
{
tokenString[tokenStringIndex] = '\0';
if(currentToken == ID)
currentToken = reservedLookup(tokenString);
}
}
fprintf(listing,"\t%d:",lineno);
printToken(currentToken,tokenString);
return currentToken;
}//end getToken
void printToken( TokenType token, const char* tokenString)
{
switch(token) {
case IF:
case ELSE:
case INT:
case RETURN:
case VOID:
case WHILE:
fprintf(listing, "reserved word: %s\n", tokenString);
break;
case PLUS: fprintf(listing, "+\n"); break;
case MINUS: fprintf(listing, "-\n"); break;
case TIMES: fprintf(listing, "*\n"); break;
case OVER: fprintf(listing, "/\n"); break;
case LESS: fprintf(listing, "<\n"); break;
case LESSEQ: fprintf(listing, "<=\n"); break;
case MORE: fprintf(listing, ">\n"); break;
case MOREEQ: fprintf(listing, ">=\n"); break;
case EQ: fprintf(listing, "==\n"); break;
case NOTEQ: fprintf(listing, "!=\n"); break;
case ASSIGN: fprintf(listing, "=\n"); break;
case SEMI: fprintf(listing, ";\n"); break;
case COMMA: fprintf(listing, ",\n"); break;
case LP: fprintf(listing, "(\n"); break;
case RP: fprintf(listing, ")\n"); break;
case LB: fprintf(listing, "{\n"); break;
case RB: fprintf(listing, "}\n"); break;
case LSB: fprintf(listing, "[\n"); break;
case RSB: fprintf(listing, "]\n"); break;
case ENDFILE: fprintf(listing, "EOF\n"); break;
case NUM:
fprintf( listing, "NUM, val= %s\n", tokenString);
break;
case ID:
fprintf( listing, "ID, name= %s\n", tokenString);
break;
case ERROR:
fprintf( listing, "ERROR: %s\n", tokenString);
break;
default:
fprintf(listing, "Unknow token: %d\n", token);
}
}
void main()
{
char pgm[20] = "1.txt" ;
source = fopen(pgm,"r");
if (source == NULL)
{
fprintf(stderr,"File %s not found\n",pgm);
exit(1);
}
listing = stdout;
fprintf(listing,"\nC- COMPILATION: %s\n",pgm);
while (getToken() != ENDFILE);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -