⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 scan.c

📁 根据tiny实现的C-词法语法分析器 编译原理课程
💻 C
字号:
/****************************************************/
/* File: scan.c                                     */
/* The scanner implementation for the TINY compiler */
/* Compiler Construction: Principles and Practice   */
/* Kenneth C. Louden                                */
/****************************************************/

#include "globals.h"
#include "util.h"
#include "scan.h"

/* states in scanner DFA */
typedef enum
   { START,INASSIGN,INCOMMENT,INNUM,INID,DONE }
   StateType;

/* lexeme of identifier or reserved word */
char tokenString[MAXTOKENLEN+1];

/* BUFLEN = length of the input buffer for
   source code lines */
#define BUFLEN 256

static char lineBuf[BUFLEN]; /* holds the current line */
static int linepos = 0; /* current position in LineBuf */
static int bufsize = 0; /* current size of buffer string */
static int EOF_flag = FALSE; /* corrects ungetNextChar behavior on EOF */

/* getNextChar fetches the next non-blank character
   from lineBuf, reading in a new line if lineBuf is
   exhausted */

int getlinepos(void)
{
	return linepos;

}
static int getNextChar(void)
{ if (!(linepos < bufsize))
  { lineno++;
    if (fgets(lineBuf,BUFLEN-1,source))
    { if (EchoSource) fprintf(listing,"%4d: %s",lineno,lineBuf);
      bufsize = strlen(lineBuf);
      linepos = 0;
      return lineBuf[linepos++];
    }
    else
    { EOF_flag = TRUE;
      return EOF;
    }
  }
  else return lineBuf[linepos++];
}

/* ungetNextChar backtracks one character
   in lineBuf */
void ungetNextChar(void)
{ if (!EOF_flag) linepos-- ;}

/* lookup table of reserved words */
static struct
    { char* str;
      TokenType tok;
    } reservedWords[MAXRESERVED]
   = {{"if",IF},{"else",ELSE},{"int",INT},
      {"return",RETURN},{"void",VOID},{"while",WHILE}};

/* lookup an identifier to see if it is a reserved word */
/* uses linear search */
static TokenType reservedLookup (char * s)
{ int i;
  for (i=0;i<MAXRESERVED;i++)
    if (!strcmp(s,reservedWords[i].str))
	{
		flag=2;
      return reservedWords[i].tok;
	}
  return ID;
}

/****************************************/
/* the primary function of the scanner  */
/****************************************/
/* function getToken returns the 
 * next token in source file
 */
TokenType getToken(void)
{  /* index for storing into tokenString */
   int tokenStringIndex = 0;
   /* holds current token to be returned */
   TokenType currentToken;
   /* current state - always begins at START */
   StateType state = START;
   /* flag to indicate save to tokenString */
   int save;
   while (state != DONE)
   { int c = getNextChar();
   int b;
     save = TRUE;
     switch (state)
     { case START:
         if (isdigit(c))
           state = INNUM;
         else if (isalpha(c))
           state = INID;
         else if (c == '!')
           state = INASSIGN;
		 else if (c == '<')
			 state = LT;
        else if (c == '>')
			state = RT;
		else if (c == '=')
			state = EQ;
		else if (c == '/')
		{
			 b = getNextChar();
			 if(b== '*')
			 {
				 state = OVER;
				 save = FALSE;	
				 ungetNextChar();
			 }
			 else
			 {
				 ungetNextChar();
           save = TRUE;
           currentToken =OVER;
		   state = DONE; 
			 }

		}

		else if ((c == ' ') || (c == '\t') || (c == '\n'))
           save = FALSE;
         //else if (c == '{')
         //{ save = FALSE;
         //  state = INCOMMENT;
        // }
         else
         { state = DONE;
           switch (c)
           { case EOF:
               save = FALSE;
               currentToken = ENDFILE;
               break;
            // case '=':
           //    currentToken = EQ;
           //    break;
            // case '<':
           //    currentToken = LT;
           //    break;
             case '+':
               currentToken = PLUS;
               break;
             case '-':
               currentToken = MINUS;
               break;
             case '*':
               currentToken = TIMES;
               break;
             //case '/':
             //  currentToken = OVER;
             //  break;
             case '(':
               currentToken = LPAREN;
               break;
             case ')':
               currentToken = RPAREN;
               break;
			 case '{':
               currentToken = LPAREND;
               break;
             case '}':
               currentToken = RPAREND;
               break;
			 case '[':
               currentToken = LPARENZ;
               break;
             case ']':
               currentToken = RPARENZ;
               break;
			 case ',':
               currentToken = COMMA;
               break;
             case ';':
               currentToken = SEMI;
               break;
             default:
               currentToken = ERROR;
               break;
           }
         }
         break;


     //  case INCOMMENT:
     //    save = FALSE;
      //   if (c == EOF)
      //   { state = DONE;
     //      currentToken = ENDFILE;
      //   }
      //   else if (c == '}') state = START;
      //   break;



        case OVER:
		 
         if (c == '*')
		 {
			 state=CON;
			 save = FALSE;
		 }
	
		else
         { /* backup in the input */
           ungetNextChar();
           save = FALSE;
           currentToken =OVER;
		   state = DONE; 
         }
         break;  //  /*

	   case CON:
		   
         if (c == '*')
		 {
           state =CON1;
		   save = FALSE;}
		 else
			 save = FALSE;
		 break; 

	   case CON1:
		   
         if (c == '/')
		 {state=START;save = FALSE;}
		 else if(c == '*')
         { /* backup in the input */
           save = FALSE;
           state = CON1;
         }
		 else
			{ /* backup in the input */
           save = FALSE;
           state = CON;
         }
         break;





	   case EQ:
		   state = DONE;
         if (c == '=')
           currentToken = EQQ;
		 else
         { /* backup in the input */
           ungetNextChar();
           save = FALSE;
           currentToken = EQ;
         }
         break;

	   case LT:
		   state = DONE;
         if (c == '=')
           currentToken = LTT;
		 else
         { /* backup in the input */
           ungetNextChar();
           save = FALSE;
           currentToken = LT;
         }
         break;


	    case RT:
		   state = DONE;
         if (c == '=')
           currentToken =RTT;
		 else
         { /* backup in the input */
           ungetNextChar();
           save = FALSE;
           currentToken = RT;
         }
         break;

	    

       case INASSIGN:
         state = DONE;
         if (c == '=')
           currentToken = ASSIGN;
         else
         { /* backup in the input */
           ungetNextChar();
           save = FALSE;
           currentToken = ERROR;
         }
         break;
       case INNUM:
         if (!isdigit(c))
         { /* backup in the input */
           ungetNextChar();
           save = FALSE;
           state = DONE;
           currentToken = NUM;
		   flag=3;
         }
         break;
       case INID:
         if (!isalpha(c))
         { /* backup in the input */
           ungetNextChar();
           save = FALSE;
           state = DONE;
           currentToken = ID;
		   flag=1;
         }
         break;
       case DONE:
       default: /* should never happen */
         fprintf(listing,"Scanner Bug: state= %d\n",state);
         state = DONE;
         currentToken = ERROR;
         break;
     }
     if ((save) && (tokenStringIndex <= MAXTOKENLEN))
       tokenString[tokenStringIndex++] = (char) c;
     if (state == DONE)
     { tokenString[tokenStringIndex] = '\0';
       if (currentToken == ID)
         currentToken = reservedLookup(tokenString);
     }
   }
   if (TraceScan) {
     fprintf(listing,"\t%d: ",lineno);
     printToken(currentToken,tokenString);
   }
   if(currentToken==ENDFILE)
   {
	   linepos = 0;
	   bufsize = 0;
	   EOF_flag = FALSE;
   }

   return currentToken;
} /* end getToken */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -