📄 lexicalanalysis.cpp

📁 自己写的一个c的子集的词法分析器
💻 CPP
字号:
//在写完文件后，按“回车”键表示文件结束

#include  <iostream>

#define BUFLEN 256/* BUFLEN = length of the input buffer for source code lines */
#define FALSE 0
#define TRUE 1
#define MAXRESERVED 7
#define MAXTOKENLEN 50

static char lineBuf[BUFLEN]; /* holds the current line */
static int linepos = 0; /* current position in LineBuf */
static int bufsize = 0; /* current size of buffer string */
static int EOF_flag = FALSE; /* corrects ungetNextChar behavior on EOF */
char tokenString[MAXTOKENLEN+1];/* lexeme of identifier or reserved word */
int lineno; /* source line number for listing */

/* states in scanner DFA */
typedef enum{START,INDIVIDE,INCOMMENT,COMMENT,
INASSIGN,INLESSTHAN,INNUM,INDOT,INREAL,INID,INUNDERLINE,DONE}StateType;

typedef enum 
    /* book-keeping tokens */
   {ENDFILE,ERROR,
    /* reserved words */
    IF,ELSE,WHILE,READ,WRITE,INT,REAL,
    /* multicharacter tokens */
    ID,INTEGER_LITERAL,REAL_LITERAL,
    /* special symbols */
    ASSIGN,EQ,LT,NE,PLUS,MINUS,MULTIPLY,DIVIDE,
	LPAREN,RPAREN,SEMICOLON,LBRACE,RBRACE,LBRACKET,RBRACKET,LCOM,RCOM
   } TokenType;

/* lookup table of reserved words */
static struct
{ 
	char* str;
	TokenType tok;
} reservedWords[MAXRESERVED]
= {{"if",IF},{"else",ELSE},{"while",WHILE},{"read",READ},{"write",WRITE},{"int",INT},{"real",REAL}};

 FILE* source; /* source code text file */
 FILE* listing; /* listing output text file */

/* TraceScan = TRUE causes token information to be
 * printed to the listing file as each token is
 * recognized by the scanner
 */
 int TraceScan=1;

/* getNextChar fetches the next non-blank character
   from lineBuf, reading in a new line if lineBuf is
   exhausted */
static int getNextChar()
{ if (!(linepos < bufsize))
  { lineno++;
    if (fgets(lineBuf,BUFLEN-1,source))
    { fprintf(listing,"%4d: %s",lineno,lineBuf);
      bufsize = strlen(lineBuf);
      linepos = 0;
      return lineBuf[linepos++];
    }
    else
    { EOF_flag = TRUE;
      return EOF;
    }
  }
  else return lineBuf[linepos++];
}


/* ungetNextChar backtracks one character
   in lineBuf */
static void ungetNextChar()
{
	if (!EOF_flag) 
		linepos-- ;
}


/* lookup an identifier to see if it is a reserved word */
/* uses linear search */
static TokenType reservedLookup (char * s)
{ int i;
  for (i=0;i<MAXRESERVED;i++)
    if (!strcmp(s,reservedWords[i].str))
      return reservedWords[i].tok;
  return ID;
}


/* Procedure printToken prints a token 
 * and its lexeme to the listing file
 */
void printToken( TokenType token, const char* tokenString )
{ switch (token)
  { case IF:
    case ELSE:
	case WHILE:
    case READ:
    case WRITE:
	case INT:
	case REAL:
      fprintf(listing,"reserved word: %s\n",tokenString);break;
    case ASSIGN: fprintf(listing,"=\n"); break;
    case LT: fprintf(listing,"<\n"); break;
    case EQ: fprintf(listing,"==\n"); break;
    case NE: fprintf(listing,"<>\n"); break;
    case PLUS: fprintf(listing,"+\n"); break;
    case MINUS: fprintf(listing,"-\n"); break;
    case MULTIPLY: fprintf(listing,"*\n"); break;
    case DIVIDE: fprintf(listing,"/\n"); break;
	case LPAREN: fprintf(listing,"(\n"); break;
    case RPAREN: fprintf(listing,")\n"); break;
    case SEMICOLON: fprintf(listing,";\n"); break;
	case LBRACE: fprintf(listing,"{\n"); break;
	case RBRACE: fprintf(listing,"}\n"); break;
	case LBRACKET: fprintf(listing,"[\n"); break;
	case RBRACKET: fprintf(listing,"]\n"); break;
    case ENDFILE: fprintf(listing,"EOF\n"); break;
    case INTEGER_LITERAL: fprintf(listing,"INT, val= %s\n",tokenString); break;
	case REAL_LITERAL: fprintf(listing,"REAL, val= %s\n",tokenString);break;
    case ID: fprintf(listing,"ID, name= %s\n",tokenString); break;
    case ERROR: fprintf(listing,"ERROR: %s\n",tokenString); break;
    default: /* should never happen */
      fprintf(listing,"Unknown token: %d\n",token);
  }
}

/* function getToken returns the 
 * next token in source file
 */
TokenType getToken(){  
	int tokenStringIndex = 0;/* index for storing into tokenString */
	TokenType currentToken;/* holds current token to be returned */
    StateType state = START; /* current state - always begins at START */ 
	int save; /* flag to indicate save to tokenString */
	
	while (state != DONE)
   {
	   int c = getNextChar(); 
	   save = TRUE;
	   switch (state)
	   {
	   case START:
		   if (isdigit(c))        state = INNUM;
		   else if (isalpha(c))   state = INID;
		   else if (c == '=')     state = INASSIGN;
		   else if ((c == ' ') || (c == '\t') || (c == '\n'))   save = FALSE;
		   else if (c == '/')     state = INDIVIDE;
		   else if(c=='<')  	   state = INLESSTHAN;
		   else
		   {
			   state = DONE;
			   switch (c)
			   { 
			   case EOF: save = FALSE; currentToken = ENDFILE; break;
			   case '+': currentToken = PLUS;  break;
			   case '-': currentToken = MINUS; break;
			   case '*': currentToken = MULTIPLY; break;
			   case '(': currentToken = LPAREN; break;
			   case ')': currentToken = RPAREN; break;
			   case '{': currentToken = LBRACE; break;
			   case '}': currentToken = RBRACE; break;
			   case '[': currentToken = LBRACKET; break;
			   case ']': currentToken = RBRACKET; break;
			   case ';': currentToken = SEMICOLON; break;
			   default: currentToken = ERROR; break;
			   }
		   }
         break;
    
		 //判断究竟是"/"还是注释的前部"/*"	 
	   case INDIVIDE:		   
		   if (c == '*'){save = FALSE;
		   state = INCOMMENT;} 
		   else{
			   ungetNextChar();
			   currentToken = DIVIDE;
			   state = DONE;}
		   break;

       case INCOMMENT:
         save = FALSE;
         if (c != '*')
         state = INCOMMENT;         
         else{  
			 state = COMMENT;save = FALSE;ungetNextChar();
		 }
         break;

	   case COMMENT:
         save = FALSE;
		 if (c == '/')  { save = FALSE;state = START;ungetNextChar();}
		 else {save = FALSE;state=COMMENT;}
         break;

 
	   case INASSIGN:
         state = DONE;
         if (c == '=')
           currentToken = EQ;
		 else{/* backup in the input */
			 ungetNextChar();
             save = FALSE;
			 currentToken = ASSIGN;
			 state= DONE;}
		 break;

	   case INLESSTHAN:
		   state = DONE;
		   if (c == '>')
           currentToken = NE;
		   else{/* backup in the input */
			   ungetNextChar();
			   currentToken = LT;
			   state= DONE;}         
         break;
 
	   case INNUM:
         if (!isdigit(c))

         {    
			 if(c=='.') state=INDOT;
			else {/* backup in the input */
           ungetNextChar();
		   save = FALSE;
           state = DONE;
           currentToken = INTEGER_LITERAL;}
         }
         break;
	   
	   case INDOT:
		   if (!isdigit(c)){
			   ungetNextChar();
			   save = FALSE;			  
			   currentToken =ERROR;}
		   else {ungetNextChar();save = FALSE;state=INREAL;}
		   break;
	   
	   case INREAL:
		   if (!isdigit(c)){
			   ungetNextChar();
			   save = FALSE;
			    state = DONE;
			   currentToken = REAL_LITERAL;}
		   break;


	   case INID:
		   if(!isdigit(c)&&!isalpha(c)){
			   // ungetNextChar();
			   if(c=='_') state=INUNDERLINE; 
			   else{ ungetNextChar();
			   save = FALSE;
			   state=DONE;
			   currentToken = ID; }
		   }
		   break;
	   
	   case INUNDERLINE:
		   if(c!='_'){
			   //ungetNextChar();
			   if(isdigit(c)||isalpha(c)) {state=INID;}
			   else{ ungetNextChar();
			   save = FALSE;
			   currentToken = ERROR;
			   state=DONE;}
		   }
		   break;
 
	   case DONE:
	   default: /* should never happen */
		   fprintf(listing,"Scanner Bug: state= %d\n",state);
		   state = DONE;
		   currentToken = ERROR;
		   break;
      }

     if ((save) && (tokenStringIndex <= MAXTOKENLEN))
       tokenString[tokenStringIndex++] = (char) c;
    
	 if (state == DONE)
     { tokenString[tokenStringIndex] = '\0';
       if (currentToken == ID)
         currentToken = reservedLookup(tokenString);
     }
   }
    
     fprintf(listing,"\t%d: ",lineno);
     printToken(currentToken,tokenString);
   
   return currentToken;
} /* end getToken */


int main( int argc, char * argv[] )
{ source = fopen("input.txt","r");
listing=fopen("output.txt","w");
 while(!EOF_flag)
	 getToken();
  return 0;
}
💿 文件大小 280 K
👤 上传用户 leeixndong
📂 所属分类编译器/解释器
🏷️ 相关标签

#分析器
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -