📄 decafclexer.l

📁 decafc的源代码
💻 L
字号:
/**************************************************************************** * File name:	decafcLexer.l						    * * Description:	lexical analysis program for Decaf language		    * * Input:	none							    * * Output:	none							    * * Author:	Luojian Chen						    * * Date:	April 16, 1997						    * ****************************************************************************/%{#include "decafcLexer.h"/* external function prototypes */extern Node *		CreateNode(NodeType, ...);extern void		ReportError(ErrorIndex);extern SymbolTablePtr	NewSymbolTable(int);extern SymbolTablePtr	DupSymbolTable(SymbolTablePtr);// extern void		Push(SymbolTableStackPtr, SymbolTablePtr);extern void		Push(SymbolTableStackPtr, SymbolTablePtr, int);// extern SymbolTablePtr	Top(SymbolTableStackPtr);extern void		DisplaySymbolTable(SymbolTablePtr);/* external variable */extern YYSTYPE			yylval;extern char			currentLine[];extern SymbolTablePtr		currentBlockSymbolTable;extern SymbolTableStackPtr	symbolTableStackPtr;extern Boolean			inMethod;extern ElementPtr		intTypePtr;extern int			parameterVarID;extern int			localVarID;extern Boolean			firstBlock;/* global function prototypes */void	GetNextLine();/* global variables */String	token;				/* token points the current recognized					   symbol's token name */long	currentLineNumber = 1;		/* current line number */int	currentColumnNumber = 1;	/* current column number *//* local function prototypes */static int	GetKeywordTokenID(String);static int	GetOperatorTokenID(String);static void	GetEntireWord();static Boolean	IsOperator(int);static int	beginLocalVarID = 0;/* keywordTable stores all the keywords themselves, their token name,   and their token id */ReservedStringTableEntry	keywordTable[] = {	{"class",	TOKEN_KW_CLASS},	{"else",	TOKEN_KW_ELSE},	{"if",		TOKEN_KW_IF},	{"int",		TOKEN_KW_INT},	{"new",		TOKEN_KW_NEW},	{"null",	TOKEN_KW_NULL},	{"print",	TOKEN_KW_PRINT},	{"read",	TOKEN_KW_READ},	{"return",	TOKEN_KW_RETURN},	{"this",	TOKEN_KW_THIS},	{"void",	TOKEN_KW_VOID},	{"while",	TOKEN_KW_WHILE}};/* operatorTable stores all the operators themselves, their token name,   and their token id */ReservedStringTableEntry	operatorTable[] = {	{"[",	TOKEN_OP_LEFT_SQUARE_BRACKET},	{"]",	TOKEN_OP_RIGHT_SQUARE_BRACKET},	{"{",	TOKEN_OP_LEFT_CURLY_BRACKET},	{"}",	TOKEN_OP_RIGHT_CURLY_BRACKET},	{"!=",	TOKEN_OP_NOT_EQUAL},	{"==",	TOKEN_OP_EQUAL},	{"<",	TOKEN_OP_LESS},	{">",	TOKEN_OP_GREATER},	{"<=",	TOKEN_OP_LESS_OR_EQUAL},	{">=",	TOKEN_OP_GREATER_OR_EQUAL},	{"&&",	TOKEN_OP_AND},	{"||",	TOKEN_OP_OR},	{"!",	TOKEN_OP_NOT},	{"+",	TOKEN_OP_PLUS},	{"-",	TOKEN_OP_MINUS},	{"*",	TOKEN_OP_MULTIPLY},	{"/",	TOKEN_OP_DIVIDE},	{"%",	TOKEN_OP_MODULAR},	{";",	TOKEN_OP_SEMICOLON},	{",",	TOKEN_OP_COMMA},	{"(",	TOKEN_OP_LEFT_PARENTHESIS},	{")",	TOKEN_OP_RIGHT_PARENTHESIS},	{"=",	TOKEN_OP_ASSIGN},	{"//",	TOKEN_OP_COMMENTS},	{".",	TOKEN_OP_DOT},};%}IDENTIFIER	([a-z]|[A-Z])([a-z]|[A-Z]|[0-9]|_)*NUMBER		[0-9]+EOL		\n%%" "+		{			/* space characters, ignore */			/* increment current column number */			currentColumnNumber += yyleng;		}\t+		{			/* tab characters, ignore */			/* increment current column number */			if (currentColumnNumber % TAB_DISTANCE == 0) {				/* the first tab space begins at a tab stop */				currentColumnNumber +=					1 + (yyleng - 1) * TAB_DISTANCE;			} else {				/* the first tab space begins at a tab stop */				currentColumnNumber =					currentColumnNumber / TAB_DISTANCE *					TAB_DISTANCE +			  		yyleng * TAB_DISTANCE + 1;			}		}{EOL}		{			/* new line character */			/* increment current line number and reset current			   column number to 1 */			currentLineNumber ++;			currentColumnNumber = 1;			/* get the next line */			GetNextLine();		}"class" |"else" |"if" |"int" |"new" |"null" |"print" |"read" |"return" |"this" |"void" |"while"		{			/* reserved keywords */			/* increment current column number */			currentColumnNumber += yyleng;			/* create a parse tree leaf node */			/* yylval = CreateNode(yytext, 0); */			yylval = CreateNode(NODE_TYPE_KEYWORD,					       yytext,					       0);			/* return the token id to the parser */			return(GetKeywordTokenID(yytext));		}"[" |"]" |"}" |"!=" |"==" |"<" |">" |"<=" |">=" |"&&" |"||" |"!" |"+" |"-" |"*" |"/" |"%" |";" |"," |"(" |")" |"=" |"."		{			/* operators (excluding "//" and "{") */			/* increment current column number */			currentColumnNumber += yyleng;			/* create a parse tree leaf node */			/* yylval = CreateNode(yytext, 0); */			yylval = CreateNode(NODE_TYPE_OPERATOR,					    yytext,					    0);			/* return the token id to the parser */			return(GetOperatorTokenID(yytext));		}"{"		{			if (inMethod == TRUE) {			  // printf("firstBlock is %d\n", firstBlock);			  if (firstBlock == TRUE) {			    beginLocalVarID = parameterVarID;			    localVarID = parameterVarID;			    firstBlock = FALSE;			  }			  // if (parameterVarID != 0) {			        // beginLocalVarID = parameterVarID + 1;			  	// localVarID = parameterVarID + 1;			  // }			  // Push(symbolTableStackPtr, currentBlockSymbolTable);			  Push(symbolTableStackPtr,			       currentBlockSymbolTable,			       beginLocalVarID);			  // printf("push beginLocalVarID %d\n", beginLocalVarID);			  currentBlockSymbolTable =			    DupSymbolTable(currentBlockSymbolTable);			  #ifdef DEBUG			  printf("Enter a new block.Push symbol table:\n");			  printf("Duplicate block symbol table:\n");			  DisplaySymbolTable(currentBlockSymbolTable);			  #endif			}			// beginLocalVarID = localVarID;			return(TOKEN_OP_LEFT_CURLY_BRACKET);		}"//".*\n	{			/* "//" (comments) operator, ignore rest of the line */			/* increment current line number and column number */			currentLineNumber ++;			currentColumnNumber = 1;			/* get the next line */			GetNextLine();		}{IDENTIFIER}	{			/* identifier */			/* increment current column number */			currentColumnNumber += yyleng; 			/* create a parse tree leaf node */			/* yylval = CreateNode("identifier", 0); */			yylval = CreateNode(NODE_TYPE_IDENTIFIER,					    yytext,					    0);			/* return the token id to the parser */			return(TOKEN_IDENTIFIER);		}{NUMBER}	{			/* number */			/* increment current column number */			currentColumnNumber +=  yyleng;			/* create a parse tree node */			/* yylval = CreateNode("number", 0); */			yylval = CreateNode(NODE_TYPE_NUMBER,					    atoi(yytext),					    0);			yylval.nodePtr->typePtr = intTypePtr;			/* return the token id to the parser */			return(TOKEN_NUMBER);		}.		{			/* unrecognized characters */			/* get the entire unrecognized word */			GetEntireWord();			yyleng = strlen(yytext);			if (yyleng == 1) {				/* unrecognized character */				ReportError(ERROR_BAD_CHARACTER);			} else {				/* unrecognized word */				ReportError(ERROR_BAD_WORD);			}			/* increment current column number */			currentColumnNumber += yyleng;		}%%/**************************************************************************** Function name:		GetNextLine Description:		get the line in the source file Procedure:		1. while the line buffer is not full and			   a newline character is not met				read a character into the line buffer			2. if the line buffer is full				put "..." in the end of the line buffer			3. if the line is empty				put a newline character in the line buffer			4. unread the line Return value:		none Input parameter:	none Output parameter:	none ****************************************************************************/void GetNextLine(){	int	i;	int	ch;	i = 0;	/* read characters into the line buffer until the end of the file	   or end of line or buffer is full */	do {		ch = input();		if ((ch == EOF) || ch == 0) { 			break;		}		currentLine[i] = ch;		i ++;	} while (ch != EOL && i < MAX_LINE_BUFFER_LENGTH - 3);	if (i == 0) {		/* empty line, only happens at the end of the file */		/* produce a blank line */		currentLine[0] = EOL;		currentLine[1] = EOS;	} else if (ch != EOL) {		/* the line is too long, truncate it */		currentLine[i] = '.';		currentLine[i + 1] = '.';		currentLine[i + 2] = '.';		currentLine[i + 3] = EOL;		currentLine[i + 4] = EOS;	} else {		/* the whole line has been read into the buffer */		currentLine[i] = EOS;	}	/* unread the line */	do {		i --;		unput(currentLine[i]);	} while (i > 0);}/**************************************************************************** Function name:		GetKeywordTokenID Description:		get the token id of a keyword Procedure:		search the keyword table			if find the keyword				return its token id			else				return TOKEN_INVALID_TOKEN Return value:		the token id of the keyword, if it is found in the			keyword table			TOKEN_INVALID_TOKEN, if the keyword is not found Input parameter:	string	the keyword Output parameter:	none ****************************************************************************/static int GetKeywordTokenID(String	string){	int	index;	for (index = 0;	     index < sizeof(keywordTable) / sizeof(ReservedStringTableEntry);	     index ++) {		if (strcmp(string, keywordTable[index].string) == 0) {			/* the keyword is found */			return(keywordTable[index].id);		}	}	/* the keyword is not found. This should never happen if my program	   is written correctly. Because when I call GetKeywordToken, I	   always pass it a valid keyword */	return(TOKEN_INVALID_TOKEN);}/**************************************************************************** Function name:		GetOperatorTokenID Description:		get the token id of an operator Procedure:		search the operator table			if find the operator				return its token id			else				return TOKEN_INVALID_TOKEN Return value:		the operator's token id, if it is found in the			operator table			TOKEN_INVALID_TOKEN, if the operator is not found Input parameter:	string	the operator Output parameter:	none ****************************************************************************/static int GetOperatorTokenID(String	string){	int	index;	for (index = 0;	     index < sizeof(operatorTable) / sizeof(ReservedStringTableEntry);	     index ++) {		if (strcmp(string, operatorTable[index].string) == 0) {			/* the operator is found */			return(operatorTable[index].id);		}	}	/* the operator is not found. This should never happen if my program	   is written correctly. Because when I call GetOperatorToken, I	   always pass it a valid operator */	return(TOKEN_INVALID_TOKEN);}/**************************************************************************** Function name:		GetEntireWord Description:		after find an unrecognized character, read ahead			until a blank character (space, tab, and newline)			or an operator is met. Procedure:		while not finished				read a character				if it is a space or an operator					unread the character					break out of the loop				else					append the character to the					unrecognized word Return value:		none Input parameter:	none Output parameter:	none ****************************************************************************/static void GetEntireWord(){	int	ch;	char	*p = NULL;	p = yytext + strlen(yytext);	/* read characters until a legal character is met */	do {		ch = input();		if (isspace(ch) || (IsOperator(ch) == TRUE)) {			unput(ch);			break;		} else {			*p = ch;			p ++;		}	} while ((ch != EOF) && (ch != 0));	*p = '\0';}/**************************************************************************** Function name:		IsOperator Description:		test if a character is an operator or really belongs			to an operator Procedure:		1. for every operator in the operator table			     if the character is the same as the first			     character of the operator			       if the operator has more than one characters				 if the first character is '&' or '|'				   read a character				   unread it				   if it is the same as the first character				     return TRUE				   else				     return FALSE				 else				   return TRUE			       else				 return TRUE			2. return FALSE (in this case, the character is not			   found in the operator table) Return value:		TRUE, if the character is an operator or really			belongs to an operator Input parameter:	ch	the character Output parameter:	none ****************************************************************************/static Boolean IsOperator(int	ch){	int	index;	int	ch1;	for (index = 0;	     index < sizeof(operatorTable) / sizeof(ReservedStringTableEntry);	     index ++) {		if (ch == operatorTable[index].string[0]) {			/* the character belongs to one of the operators in			   the operator table. We can not tell if it really			   belongs an operator now */			if (strlen(operatorTable[index].string) != 1) {				/* the operator has more than one characters */				if ((operatorTable[index].string[0] == '&') ||				    (operatorTable[index].string[0] == '|')) {					/* we need only consider "&&" and "||"					   because in other cases the second					   character of the operator is					   different from the first one */					/* read one more character */					ch1 = input();					/* unread the character because it					   should be read again by the lexical					   analyzer. ch will be unread after					   this function is called anyway. So					   we do not unread ch here */					unput(ch1);					if (ch == ch1) {						/* the operator is "&&"						   or "||" */						return(TRUE);					} else {						/* it is the case '&' or '|'						   followed by any character						   other than '&' or '|'						   respectively */						return(FALSE);					}				} else {					/* In this case, we have matched an					   operator other than "&&" and "||" */					return(TRUE);				}			} else {				/* the operator has only one character.				   Here one operator has been matched */				return(TRUE);			}		}	}	/* the character does not match any of the operators in the operator	   table. It is not an operator */	return(FALSE);}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -