📄 scanner.cpp

📁 自己做的小编译程序,大家看看好不好用哦,是词法分析器
💻 CPP
字号:
#ifndef SCANNER_CPP_INCLUDED
#define SCANNER_CPP_INCLUDED

#include <stdio.h>
#include <string>
#include <iostream>
using namespace std; 


#include "scanner.h"
#include "mylist.h"

FILE*   f;

bool    EOF_Flag;

char    buffer[256];
char    ch; 

int     flag;
int     nCount;
int     bufferIndex;
int     line = 1;

string  currentstring; 

CToken  currenttoken;

StateType state =s0;

CMyList<CToken> TokenList ;
CMyList<string> identiferList;
CMyList<string> unsignedIntList;

string LexName[42] = 
{
	"ENDFILE","ERROR",
	
	"PROGRAM","PROCEDURE","TYPE","VAR",
	"IF","THEN","ELSE","FI","WHILE","DO",
	"ENDWH","BEGIN","END","READ","WRITE",
	"ARRAY","OF","RECORD","RETURN",
	
    "INTEGER","CHAR",

	"ID", "INTC", "CHARC",
	
    "ASSIGN","EQ","LT","PLUS","MINUS","TIMES",
	"OVER","LPAREN","RPAREN","DOT","COLON",
	"SEMI","COMMA","LMIDPAREN","RMIDPAREN",
	"UNDERANGE"
};

int getNextChar(FILE *f, char buffer[ ])      
{
	if (EOF_Flag) return -1;
	if(nCount == 0 || bufferIndex == nCount)
	{
		bufferIndex = 0;
		if(fgets(buffer,256,f) == NULL)
		{
			EOF_Flag = true;
			return -1;
		}
		nCount = strlen(buffer);
	}
	return buffer[bufferIndex++];
}


void ungetNextChar(char buffer[])
{
	bufferIndex--;
}


int reservedLookup(string & rs)
{
	int n = 0;
	while(n < MaxReserved)
	{
		string t = ReservedWord[n].str;
		if(t == rs)
		{
			return n;
		}
		else n++;
	}
	return -1;
}

CToken getToken(FILE *f,char buffer[])
{
	ch = getNextChar(f,buffer);
	if(ch == -1) 
	{
		currenttoken.lineShow = line;
		currenttoken.lex      = ENDFILE;
		currenttoken.sem      = "";
	}
	else 
    {		
		state = s0;
		currentstring = "";
		while (state != send)
		{
		      if (ch == -1)
			  {
				  currenttoken.lineShow = line;
				  currenttoken.lex      = ENDFILE;
				  currenttoken.sem      = "";
				  state = send;
				  break;
			  }
			  switch (state)
			  {
			  case s0:
				  if ((isalpha(ch)) && flag == 0)
				  {
					  state = s1;
					  break;
				  }
				  else  if ((isdigit(ch)) && flag == 0)
				  { 
					  state = s3;
					  break;
				  }
				  else  if (flag == 0)
				  {           
					  switch (ch)
					  { 			
					  case '+':
						  currenttoken.lineShow  = line;
						  currenttoken.lex       = PLUS;
						  currenttoken.sem       = "";
						  state=send;
						  break;
					  case '=':					          
						  currenttoken.lineShow  = line;
						  currenttoken.lex       = EQ;
						  currenttoken.sem       = "";
						  state=send;
						  break;
					  case '<':
						  currenttoken.lineShow  = line;
						  currenttoken.lex       = LT;
						  currenttoken.sem       = "";
						  state=send;
						  break;
					  case '-': 
					      currenttoken.lineShow  = line;
						  currenttoken.lex       = MINUS;
						  currenttoken.sem       = "";
						  state=send;
						  break;
					  case '*': 
						  currenttoken.lineShow  = line;
						  currenttoken.lex       = TIMES;
						  currenttoken.sem       = "";
						  state=send;
						  break;
	   				  case '/':  
						  currenttoken.lineShow  = line;
						  currenttoken.lex       = OVER;
						  currenttoken.sem       = "";
						  state=send;
						  break;
				   	  case '(':  
						  currenttoken.lineShow  = line;
						  currenttoken.lex       = LPAREN;
						  currenttoken.sem       = "";
						  state=send;
						  break;
					  case ')':  
						  currenttoken.lineShow  = line;
						  currenttoken.lex       = RPAREN;
						  currenttoken.sem       = "";
						  state=send;
						  break;
					  case ';':  
						  currenttoken.lineShow  = line;
						  currenttoken.lex       = SEMI;
				          currenttoken.sem       = "";
						  state=send;
						  break;
	       			  case '[':  
						  currenttoken.lineShow  = line;
						  currenttoken.lex       = LMIDPAREN;
						  currenttoken.sem       = "";
						  state=send;
						  break;
					  case ']':  
						  currenttoken.lineShow  = line;
						  currenttoken.lex       = RMIDPAREN;
						  currenttoken.sem       = "";
						  state=send;
						  break;
				      case ',':  
						  currenttoken.lineShow  = line;
				          currenttoken.lex       = COMMA;
						  currenttoken.sem       = "";
						  state=send;
						  break;
					  case '\t': 
					  case ' ' : 
						  ch= getNextChar(f,buffer);
				    	  state=s0;
				 		  break;
					  case '.' : 
						  state = s4;
						  break;
					  case ':' :  
						  state = s5;
						  break;
					  case '{' : 
						  state = s6;
						  break;
					  case '\n': 
						 line++;
				   		 ch = getNextChar(f,buffer);
				 		 state = s0;
				 		 break;
				   }
				 }
                 break;
	  
	  case s1:
		  while ((isalpha(ch))||(isdigit(ch))||(ch == '_'))
			   {
				  currentstring += ch;
				  ch = getNextChar(f,buffer);
			   }
		  ungetNextChar(buffer);
    	  state = s2;
	      break;
	   case s2:  
		   int res;
		   if((res = reservedLookup(currentstring)) != -1)
				{
                    currenttoken.lineShow  = line;
					currenttoken.lex       = ReservedWord[res].tok;
					currenttoken.sem       = "";
					state = send;
				}
	   	   else
			  {
				  if(identiferList.GetFirstNode(currentstring) != NULL)
				  {
					  currenttoken.sem = identiferList.GetFirstNode(currentstring)->data;
				  }
                  else 
				  {
					  identiferList.InsertAtTail(currentstring);
					  currenttoken.sem = currentstring;
				  }

					  currenttoken.lineShow  = line;
					  currenttoken.lex       = ID;
       				  state = send;
			  }
			break;
	  case s3:
		        while (isdigit(ch))
				{
					 currentstring += ch;
				     ch = getNextChar(f,buffer);
				}
             		 ungetNextChar(buffer);
                 if(unsignedIntList.GetFirstNode(currentstring) != NULL)
				 {
					  currenttoken.sem = unsignedIntList.GetFirstNode(currentstring)->data;
				 }
				 else 
				 {
					 unsignedIntList.InsertAtTail(currentstring);
					 currenttoken.sem = currentstring;
				 }
				 
				 currenttoken.lineShow  = line;
				 currenttoken.lex       = INTEGER;
		         state = send;
				 break;
      case s4:
		  {
			  ch = getNextChar(f,buffer);
                if(ch == '.')
				{
					currenttoken.lineShow  = line;
					currenttoken.lex       = UNDERANGE;
					currenttoken.sem       = "";

				}
				else 
				{
					ungetNextChar(buffer);
					currenttoken.lineShow  = line;
					currenttoken.lex       = DOT;
					currenttoken.sem       = "";
				    //ungetNextChar(buffer);
				}
			state = send;
			break;
		  }
	  case s5:
		  {
			    ch=getNextChar(f,buffer);
                if(ch == '=')
				{
					currenttoken.lineShow  = line;
					currenttoken.lex       = ASSIGN;
					currenttoken.sem       = "";

				}
				else 
				{
					ungetNextChar(buffer);
					currenttoken.lineShow  = line;
					currenttoken.lex       = COLON;
					currenttoken.sem       = "";
				    ungetNextChar(buffer);
				}
			state = send;
			break;
		  }

	  case s6: 
		  {
			  flag++;
			  while (flag != 0 && ch != -1)
			  {
				  ch = getNextChar(f,buffer);
				  if(ch == '}')
					  flag--;
				  else if(ch == '{')
					  flag++;
			  }
			  ch = getNextChar(f,buffer);
			  state = s0;
			  break;
		  }
		  }
    
	}
	}  
   return currenttoken;
}


CMyList<CToken>  getTokenList(FILE *f,char buffer[])
{
	CToken token1 = getToken(f,buffer);
	CMyList<CToken> TempTokenList;
    
	while(token1.lex != ENDFILE)
	{
		 TempTokenList.InsertAtTail(token1);
		 token1 = getToken(f,buffer);
	}

	  
    if (token1.lex == ENDFILE)
	   TempTokenList.InsertAtTail(token1);

	return TempTokenList;
		
}



CToken GetOneToken(FILE* f,char buffer[],int k)
{
	CMyList<CToken> tempList = getTokenList(f,buffer);
	CToken tempToken;
	Node<CToken>* tempPtr = tempList.GetNode(k);
	tempToken.lex         = (tempPtr->data).lex;
	tempToken.lineShow    = (tempPtr->data).lineShow  ;
	tempToken.sem         = (tempPtr->data).sem;
	return tempToken;
}

#endif
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -