📄 decafclexer.l
字号:
/**************************************************************************** * File name: decafcLexer.l * * Description: lexical analysis program for Decaf language * * Input: none * * Output: none * * Author: Luojian Chen * * Date: April 16, 1997 * ****************************************************************************/%{#include "decafcLexer.h"/* external function prototypes */extern Node * CreateNode(NodeType, ...);extern void ReportError(ErrorIndex);extern SymbolTablePtr NewSymbolTable(int);extern SymbolTablePtr DupSymbolTable(SymbolTablePtr);// extern void Push(SymbolTableStackPtr, SymbolTablePtr);extern void Push(SymbolTableStackPtr, SymbolTablePtr, int);// extern SymbolTablePtr Top(SymbolTableStackPtr);extern void DisplaySymbolTable(SymbolTablePtr);/* external variable */extern YYSTYPE yylval;extern char currentLine[];extern SymbolTablePtr currentBlockSymbolTable;extern SymbolTableStackPtr symbolTableStackPtr;extern Boolean inMethod;extern ElementPtr intTypePtr;extern int parameterVarID;extern int localVarID;extern Boolean firstBlock;/* global function prototypes */void GetNextLine();/* global variables */String token; /* token points the current recognized symbol's token name */long currentLineNumber = 1; /* current line number */int currentColumnNumber = 1; /* current column number *//* local function prototypes */static int GetKeywordTokenID(String);static int GetOperatorTokenID(String);static void GetEntireWord();static Boolean IsOperator(int);static int beginLocalVarID = 0;/* keywordTable stores all the keywords themselves, their token name, and their token id */ReservedStringTableEntry keywordTable[] = { {"class", TOKEN_KW_CLASS}, {"else", TOKEN_KW_ELSE}, {"if", TOKEN_KW_IF}, {"int", TOKEN_KW_INT}, {"new", TOKEN_KW_NEW}, {"null", TOKEN_KW_NULL}, {"print", TOKEN_KW_PRINT}, {"read", TOKEN_KW_READ}, {"return", TOKEN_KW_RETURN}, {"this", TOKEN_KW_THIS}, {"void", TOKEN_KW_VOID}, {"while", TOKEN_KW_WHILE}};/* operatorTable stores all the operators themselves, their token name, and their token id */ReservedStringTableEntry operatorTable[] = { {"[", TOKEN_OP_LEFT_SQUARE_BRACKET}, {"]", TOKEN_OP_RIGHT_SQUARE_BRACKET}, {"{", TOKEN_OP_LEFT_CURLY_BRACKET}, {"}", TOKEN_OP_RIGHT_CURLY_BRACKET}, {"!=", TOKEN_OP_NOT_EQUAL}, {"==", TOKEN_OP_EQUAL}, {"<", TOKEN_OP_LESS}, {">", TOKEN_OP_GREATER}, {"<=", TOKEN_OP_LESS_OR_EQUAL}, {">=", TOKEN_OP_GREATER_OR_EQUAL}, {"&&", TOKEN_OP_AND}, {"||", TOKEN_OP_OR}, {"!", TOKEN_OP_NOT}, {"+", TOKEN_OP_PLUS}, {"-", TOKEN_OP_MINUS}, {"*", TOKEN_OP_MULTIPLY}, {"/", TOKEN_OP_DIVIDE}, {"%", TOKEN_OP_MODULAR}, {";", TOKEN_OP_SEMICOLON}, {",", TOKEN_OP_COMMA}, {"(", TOKEN_OP_LEFT_PARENTHESIS}, {")", TOKEN_OP_RIGHT_PARENTHESIS}, {"=", TOKEN_OP_ASSIGN}, {"//", TOKEN_OP_COMMENTS}, {".", TOKEN_OP_DOT},};%}IDENTIFIER ([a-z]|[A-Z])([a-z]|[A-Z]|[0-9]|_)*NUMBER [0-9]+EOL \n%%" "+ { /* space characters, ignore */ /* increment current column number */ currentColumnNumber += yyleng; }\t+ { /* tab characters, ignore */ /* increment current column number */ if (currentColumnNumber % TAB_DISTANCE == 0) { /* the first tab space begins at a tab stop */ currentColumnNumber += 1 + (yyleng - 1) * TAB_DISTANCE; } else { /* the first tab space begins at a tab stop */ currentColumnNumber = currentColumnNumber / TAB_DISTANCE * TAB_DISTANCE + yyleng * TAB_DISTANCE + 1; } }{EOL} { /* new line character */ /* increment current line number and reset current column number to 1 */ currentLineNumber ++; currentColumnNumber = 1; /* get the next line */ GetNextLine(); }"class" |"else" |"if" |"int" |"new" |"null" |"print" |"read" |"return" |"this" |"void" |"while" { /* reserved keywords */ /* increment current column number */ currentColumnNumber += yyleng; /* create a parse tree leaf node */ /* yylval = CreateNode(yytext, 0); */ yylval = CreateNode(NODE_TYPE_KEYWORD, yytext, 0); /* return the token id to the parser */ return(GetKeywordTokenID(yytext)); }"[" |"]" |"}" |"!=" |"==" |"<" |">" |"<=" |">=" |"&&" |"||" |"!" |"+" |"-" |"*" |"/" |"%" |";" |"," |"(" |")" |"=" |"." { /* operators (excluding "//" and "{") */ /* increment current column number */ currentColumnNumber += yyleng; /* create a parse tree leaf node */ /* yylval = CreateNode(yytext, 0); */ yylval = CreateNode(NODE_TYPE_OPERATOR, yytext, 0); /* return the token id to the parser */ return(GetOperatorTokenID(yytext)); }"{" { if (inMethod == TRUE) { // printf("firstBlock is %d\n", firstBlock); if (firstBlock == TRUE) { beginLocalVarID = parameterVarID; localVarID = parameterVarID; firstBlock = FALSE; } // if (parameterVarID != 0) { // beginLocalVarID = parameterVarID + 1; // localVarID = parameterVarID + 1; // } // Push(symbolTableStackPtr, currentBlockSymbolTable); Push(symbolTableStackPtr, currentBlockSymbolTable, beginLocalVarID); // printf("push beginLocalVarID %d\n", beginLocalVarID); currentBlockSymbolTable = DupSymbolTable(currentBlockSymbolTable); #ifdef DEBUG printf("Enter a new block.Push symbol table:\n"); printf("Duplicate block symbol table:\n"); DisplaySymbolTable(currentBlockSymbolTable); #endif } // beginLocalVarID = localVarID; return(TOKEN_OP_LEFT_CURLY_BRACKET); }"//".*\n { /* "//" (comments) operator, ignore rest of the line */ /* increment current line number and column number */ currentLineNumber ++; currentColumnNumber = 1; /* get the next line */ GetNextLine(); }{IDENTIFIER} { /* identifier */ /* increment current column number */ currentColumnNumber += yyleng; /* create a parse tree leaf node */ /* yylval = CreateNode("identifier", 0); */ yylval = CreateNode(NODE_TYPE_IDENTIFIER, yytext, 0); /* return the token id to the parser */ return(TOKEN_IDENTIFIER); }{NUMBER} { /* number */ /* increment current column number */ currentColumnNumber += yyleng; /* create a parse tree node */ /* yylval = CreateNode("number", 0); */ yylval = CreateNode(NODE_TYPE_NUMBER, atoi(yytext), 0); yylval.nodePtr->typePtr = intTypePtr; /* return the token id to the parser */ return(TOKEN_NUMBER); }. { /* unrecognized characters */ /* get the entire unrecognized word */ GetEntireWord(); yyleng = strlen(yytext); if (yyleng == 1) { /* unrecognized character */ ReportError(ERROR_BAD_CHARACTER); } else { /* unrecognized word */ ReportError(ERROR_BAD_WORD); } /* increment current column number */ currentColumnNumber += yyleng; }%%/**************************************************************************** Function name: GetNextLine Description: get the line in the source file Procedure: 1. while the line buffer is not full and a newline character is not met read a character into the line buffer 2. if the line buffer is full put "..." in the end of the line buffer 3. if the line is empty put a newline character in the line buffer 4. unread the line Return value: none Input parameter: none Output parameter: none ****************************************************************************/void GetNextLine(){ int i; int ch; i = 0; /* read characters into the line buffer until the end of the file or end of line or buffer is full */ do { ch = input(); if ((ch == EOF) || ch == 0) { break; } currentLine[i] = ch; i ++; } while (ch != EOL && i < MAX_LINE_BUFFER_LENGTH - 3); if (i == 0) { /* empty line, only happens at the end of the file */ /* produce a blank line */ currentLine[0] = EOL; currentLine[1] = EOS; } else if (ch != EOL) { /* the line is too long, truncate it */ currentLine[i] = '.'; currentLine[i + 1] = '.'; currentLine[i + 2] = '.'; currentLine[i + 3] = EOL; currentLine[i + 4] = EOS; } else { /* the whole line has been read into the buffer */ currentLine[i] = EOS; } /* unread the line */ do { i --; unput(currentLine[i]); } while (i > 0);}/**************************************************************************** Function name: GetKeywordTokenID Description: get the token id of a keyword Procedure: search the keyword table if find the keyword return its token id else return TOKEN_INVALID_TOKEN Return value: the token id of the keyword, if it is found in the keyword table TOKEN_INVALID_TOKEN, if the keyword is not found Input parameter: string the keyword Output parameter: none ****************************************************************************/static int GetKeywordTokenID(String string){ int index; for (index = 0; index < sizeof(keywordTable) / sizeof(ReservedStringTableEntry); index ++) { if (strcmp(string, keywordTable[index].string) == 0) { /* the keyword is found */ return(keywordTable[index].id); } } /* the keyword is not found. This should never happen if my program is written correctly. Because when I call GetKeywordToken, I always pass it a valid keyword */ return(TOKEN_INVALID_TOKEN);}/**************************************************************************** Function name: GetOperatorTokenID Description: get the token id of an operator Procedure: search the operator table if find the operator return its token id else return TOKEN_INVALID_TOKEN Return value: the operator's token id, if it is found in the operator table TOKEN_INVALID_TOKEN, if the operator is not found Input parameter: string the operator Output parameter: none ****************************************************************************/static int GetOperatorTokenID(String string){ int index; for (index = 0; index < sizeof(operatorTable) / sizeof(ReservedStringTableEntry); index ++) { if (strcmp(string, operatorTable[index].string) == 0) { /* the operator is found */ return(operatorTable[index].id); } } /* the operator is not found. This should never happen if my program is written correctly. Because when I call GetOperatorToken, I always pass it a valid operator */ return(TOKEN_INVALID_TOKEN);}/**************************************************************************** Function name: GetEntireWord Description: after find an unrecognized character, read ahead until a blank character (space, tab, and newline) or an operator is met. Procedure: while not finished read a character if it is a space or an operator unread the character break out of the loop else append the character to the unrecognized word Return value: none Input parameter: none Output parameter: none ****************************************************************************/static void GetEntireWord(){ int ch; char *p = NULL; p = yytext + strlen(yytext); /* read characters until a legal character is met */ do { ch = input(); if (isspace(ch) || (IsOperator(ch) == TRUE)) { unput(ch); break; } else { *p = ch; p ++; } } while ((ch != EOF) && (ch != 0)); *p = '\0';}/**************************************************************************** Function name: IsOperator Description: test if a character is an operator or really belongs to an operator Procedure: 1. for every operator in the operator table if the character is the same as the first character of the operator if the operator has more than one characters if the first character is '&' or '|' read a character unread it if it is the same as the first character return TRUE else return FALSE else return TRUE else return TRUE 2. return FALSE (in this case, the character is not found in the operator table) Return value: TRUE, if the character is an operator or really belongs to an operator Input parameter: ch the character Output parameter: none ****************************************************************************/static Boolean IsOperator(int ch){ int index; int ch1; for (index = 0; index < sizeof(operatorTable) / sizeof(ReservedStringTableEntry); index ++) { if (ch == operatorTable[index].string[0]) { /* the character belongs to one of the operators in the operator table. We can not tell if it really belongs an operator now */ if (strlen(operatorTable[index].string) != 1) { /* the operator has more than one characters */ if ((operatorTable[index].string[0] == '&') || (operatorTable[index].string[0] == '|')) { /* we need only consider "&&" and "||" because in other cases the second character of the operator is different from the first one */ /* read one more character */ ch1 = input(); /* unread the character because it should be read again by the lexical analyzer. ch will be unread after this function is called anyway. So we do not unread ch here */ unput(ch1); if (ch == ch1) { /* the operator is "&&" or "||" */ return(TRUE); } else { /* it is the case '&' or '|' followed by any character other than '&' or '|' respectively */ return(FALSE); } } else { /* In this case, we have matched an operator other than "&&" and "||" */ return(TRUE); } } else { /* the operator has only one character. Here one operator has been matched */ return(TRUE); } } } /* the character does not match any of the operators in the operator table. It is not an operator */ return(FALSE);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -