📄 lexicalanalysis.cpp
字号:
//在写完文件后,按“回车”键表示文件结束
#include <iostream>
#define BUFLEN 256/* BUFLEN = length of the input buffer for source code lines */
#define FALSE 0
#define TRUE 1
#define MAXRESERVED 7
#define MAXTOKENLEN 50
static char lineBuf[BUFLEN]; /* holds the current line */
static int linepos = 0; /* current position in LineBuf */
static int bufsize = 0; /* current size of buffer string */
static int EOF_flag = FALSE; /* corrects ungetNextChar behavior on EOF */
char tokenString[MAXTOKENLEN+1];/* lexeme of identifier or reserved word */
int lineno; /* source line number for listing */
/* states in scanner DFA */
typedef enum{START,INDIVIDE,INCOMMENT,COMMENT,
INASSIGN,INLESSTHAN,INNUM,INDOT,INREAL,INID,INUNDERLINE,DONE}StateType;
typedef enum
/* book-keeping tokens */
{ENDFILE,ERROR,
/* reserved words */
IF,ELSE,WHILE,READ,WRITE,INT,REAL,
/* multicharacter tokens */
ID,INTEGER_LITERAL,REAL_LITERAL,
/* special symbols */
ASSIGN,EQ,LT,NE,PLUS,MINUS,MULTIPLY,DIVIDE,
LPAREN,RPAREN,SEMICOLON,LBRACE,RBRACE,LBRACKET,RBRACKET,LCOM,RCOM
} TokenType;
/* lookup table of reserved words */
static struct
{
char* str;
TokenType tok;
} reservedWords[MAXRESERVED]
= {{"if",IF},{"else",ELSE},{"while",WHILE},{"read",READ},{"write",WRITE},{"int",INT},{"real",REAL}};
FILE* source; /* source code text file */
FILE* listing; /* listing output text file */
/* TraceScan = TRUE causes token information to be
* printed to the listing file as each token is
* recognized by the scanner
*/
int TraceScan=1;
/* getNextChar fetches the next non-blank character
from lineBuf, reading in a new line if lineBuf is
exhausted */
static int getNextChar()
{ if (!(linepos < bufsize))
{ lineno++;
if (fgets(lineBuf,BUFLEN-1,source))
{ fprintf(listing,"%4d: %s",lineno,lineBuf);
bufsize = strlen(lineBuf);
linepos = 0;
return lineBuf[linepos++];
}
else
{ EOF_flag = TRUE;
return EOF;
}
}
else return lineBuf[linepos++];
}
/* ungetNextChar backtracks one character
in lineBuf */
static void ungetNextChar()
{
if (!EOF_flag)
linepos-- ;
}
/* lookup an identifier to see if it is a reserved word */
/* uses linear search */
static TokenType reservedLookup (char * s)
{ int i;
for (i=0;i<MAXRESERVED;i++)
if (!strcmp(s,reservedWords[i].str))
return reservedWords[i].tok;
return ID;
}
/* Procedure printToken prints a token
* and its lexeme to the listing file
*/
void printToken( TokenType token, const char* tokenString )
{ switch (token)
{ case IF:
case ELSE:
case WHILE:
case READ:
case WRITE:
case INT:
case REAL:
fprintf(listing,"reserved word: %s\n",tokenString);break;
case ASSIGN: fprintf(listing,"=\n"); break;
case LT: fprintf(listing,"<\n"); break;
case EQ: fprintf(listing,"==\n"); break;
case NE: fprintf(listing,"<>\n"); break;
case PLUS: fprintf(listing,"+\n"); break;
case MINUS: fprintf(listing,"-\n"); break;
case MULTIPLY: fprintf(listing,"*\n"); break;
case DIVIDE: fprintf(listing,"/\n"); break;
case LPAREN: fprintf(listing,"(\n"); break;
case RPAREN: fprintf(listing,")\n"); break;
case SEMICOLON: fprintf(listing,";\n"); break;
case LBRACE: fprintf(listing,"{\n"); break;
case RBRACE: fprintf(listing,"}\n"); break;
case LBRACKET: fprintf(listing,"[\n"); break;
case RBRACKET: fprintf(listing,"]\n"); break;
case ENDFILE: fprintf(listing,"EOF\n"); break;
case INTEGER_LITERAL: fprintf(listing,"INT, val= %s\n",tokenString); break;
case REAL_LITERAL: fprintf(listing,"REAL, val= %s\n",tokenString);break;
case ID: fprintf(listing,"ID, name= %s\n",tokenString); break;
case ERROR: fprintf(listing,"ERROR: %s\n",tokenString); break;
default: /* should never happen */
fprintf(listing,"Unknown token: %d\n",token);
}
}
/* function getToken returns the
* next token in source file
*/
TokenType getToken(){
int tokenStringIndex = 0;/* index for storing into tokenString */
TokenType currentToken;/* holds current token to be returned */
StateType state = START; /* current state - always begins at START */
int save; /* flag to indicate save to tokenString */
while (state != DONE)
{
int c = getNextChar();
save = TRUE;
switch (state)
{
case START:
if (isdigit(c)) state = INNUM;
else if (isalpha(c)) state = INID;
else if (c == '=') state = INASSIGN;
else if ((c == ' ') || (c == '\t') || (c == '\n')) save = FALSE;
else if (c == '/') state = INDIVIDE;
else if(c=='<') state = INLESSTHAN;
else
{
state = DONE;
switch (c)
{
case EOF: save = FALSE; currentToken = ENDFILE; break;
case '+': currentToken = PLUS; break;
case '-': currentToken = MINUS; break;
case '*': currentToken = MULTIPLY; break;
case '(': currentToken = LPAREN; break;
case ')': currentToken = RPAREN; break;
case '{': currentToken = LBRACE; break;
case '}': currentToken = RBRACE; break;
case '[': currentToken = LBRACKET; break;
case ']': currentToken = RBRACKET; break;
case ';': currentToken = SEMICOLON; break;
default: currentToken = ERROR; break;
}
}
break;
//判断究竟是"/"还是注释的前部"/*"
case INDIVIDE:
if (c == '*'){save = FALSE;
state = INCOMMENT;}
else{
ungetNextChar();
currentToken = DIVIDE;
state = DONE;}
break;
case INCOMMENT:
save = FALSE;
if (c != '*')
state = INCOMMENT;
else{
state = COMMENT;save = FALSE;ungetNextChar();
}
break;
case COMMENT:
save = FALSE;
if (c == '/') { save = FALSE;state = START;ungetNextChar();}
else {save = FALSE;state=COMMENT;}
break;
case INASSIGN:
state = DONE;
if (c == '=')
currentToken = EQ;
else{/* backup in the input */
ungetNextChar();
save = FALSE;
currentToken = ASSIGN;
state= DONE;}
break;
case INLESSTHAN:
state = DONE;
if (c == '>')
currentToken = NE;
else{/* backup in the input */
ungetNextChar();
currentToken = LT;
state= DONE;}
break;
case INNUM:
if (!isdigit(c))
{
if(c=='.') state=INDOT;
else {/* backup in the input */
ungetNextChar();
save = FALSE;
state = DONE;
currentToken = INTEGER_LITERAL;}
}
break;
case INDOT:
if (!isdigit(c)){
ungetNextChar();
save = FALSE;
currentToken =ERROR;}
else {ungetNextChar();save = FALSE;state=INREAL;}
break;
case INREAL:
if (!isdigit(c)){
ungetNextChar();
save = FALSE;
state = DONE;
currentToken = REAL_LITERAL;}
break;
case INID:
if(!isdigit(c)&&!isalpha(c)){
// ungetNextChar();
if(c=='_') state=INUNDERLINE;
else{ ungetNextChar();
save = FALSE;
state=DONE;
currentToken = ID; }
}
break;
case INUNDERLINE:
if(c!='_'){
//ungetNextChar();
if(isdigit(c)||isalpha(c)) {state=INID;}
else{ ungetNextChar();
save = FALSE;
currentToken = ERROR;
state=DONE;}
}
break;
case DONE:
default: /* should never happen */
fprintf(listing,"Scanner Bug: state= %d\n",state);
state = DONE;
currentToken = ERROR;
break;
}
if ((save) && (tokenStringIndex <= MAXTOKENLEN))
tokenString[tokenStringIndex++] = (char) c;
if (state == DONE)
{ tokenString[tokenStringIndex] = '\0';
if (currentToken == ID)
currentToken = reservedLookup(tokenString);
}
}
fprintf(listing,"\t%d: ",lineno);
printToken(currentToken,tokenString);
return currentToken;
} /* end getToken */
int main( int argc, char * argv[] )
{ source = fopen("input.txt","r");
listing=fopen("output.txt","w");
while(!EOF_flag)
getToken();
return 0;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -