⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lexicalanalyser.cpp

📁 词汇分析
💻 CPP
字号:
/* LexicalAnalyser.cpp: implementation of the following classes:
		CToken
		CLexicalAnalyser
		CExpression
		CSymbol
		CIntLiteralExpression
		CRealLiteralExpression

Author: Terence Kam

Note
----
1.	You may use this code for free, but I will greatly appreaciate it if
	you give proper credits and acknowledgement when you use my code.
2.	I'm not responsible if my code cause any damage to anybody or 
	their property. Although my code tested well and behaved properly 
	in my computer system, I can't guarantee that it will do so in
	other computer systems. Therefore, use the code AT YOUR OWN RISK.
3.	My code is tested using Microsoft Visual C++ 6.0
4.	I extracted this code from an existing project. If there are any
	unnecessary compiler directives (for eg. the !define(AFX_PARSER_H
	... below) feel free to remove it.
5.	Any bug fix or improvement is greatly welcomed! :-)
6.	Good luck and fruitful programming!

//////////////////////////////////////////////////////////////////////
*/
#include "stdafx.h"
#include "LexicalAnalyser.h"
#include<ctype.h>

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif

//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
CIntLiteralExpression::CIntLiteralExpression(int val): value(val)
{
	ExpressionType=integerExpression;
}

CRealLiteralExpression::CRealLiteralExpression(double val): value(val)
{
	ExpressionType=realExpression;
}


CToken::CToken(): TokenType(nilT), TokenInt(0)
{

}

CToken::~CToken()
{

}

//////////////////////////////////////////////////////////////////////
// CLexicalAnalyser Class
//////////////////////////////////////////////////////////////////////

//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////

CLexicalAnalyser::CLexicalAnalyser(): CurrentPosition(NULL)
{

}

CLexicalAnalyser::~CLexicalAnalyser()
{

}

//////////////////////////////////////////////////////////////////////
// CSymbol Class
//////////////////////////////////////////////////////////////////////

//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////

CSymbol::CSymbol(CString string, int type):	SymbolString(string),
											TokenType(type)
{
	
}

CSymbol::~CSymbol()
{

}

bool CSymbol::operator < (CSymbol &Symbol)
{
 return SymbolString.GetLength()< Symbol.SymbolString.GetLength();
}

bool CSymbol::operator <= (CSymbol &Symbol)
{
 return SymbolString.GetLength()<= Symbol.SymbolString.GetLength();
}

bool CSymbol::operator > (CSymbol &Symbol)
{
 return SymbolString.GetLength()> Symbol.SymbolString.GetLength();
}

bool CSymbol::operator >= (CSymbol &Symbol)
{
 return SymbolString.GetLength()>= Symbol.SymbolString.GetLength();
}

void CLexicalAnalyser::setSymbol(CSymbol Symbol)
{
	if(SymbolTable.GetSize()==0)
		SymbolTable.Add(Symbol);
	else
	{
		for(int i=0;i<SymbolTable.GetSize()-1;i++)
		{
			if(SymbolTable[i]>Symbol && SymbolTable[i+1]<=Symbol)
			{
				SymbolTable.InsertAt(i+1,Symbol);
				return;
			}
			else if(SymbolTable[i]<=Symbol)
			{
				SymbolTable.InsertAt(i,Symbol);
				return;
			}
		}
		SymbolTable.Add(Symbol);
	}

}

CSymbol::CSymbol()
{

}

void CLexicalAnalyser::String2TokenSequence(CString sequence)
{
	CToken token;

	TokenSequence.RemoveAll();
	CurrentPosition=TokenSequence.GetHeadPosition();

	sequence.TrimLeft();//trim off leading whitespaces, tabs and newlines
	sequence.TrimRight();//trim off ending whitespaces, tabs and newlines

	//This loop goes to every character and tokenise the string
	for(int i=0; i< sequence.GetLength();i++)
	{
		token.TokenType=nilT;
 
		//if character is a space or tab, skip
		if(sequence[i]==' ' || sequence[i]=='\t')
			continue;
		//else if the character is a " character...
		else if(sequence[i]=='\"') 
		{
			i++;
			token.TokenType=stringT;
			token.TokenName.Empty();

			while(i< sequence.GetLength() && sequence[i]!='\"') //while the closing " is not there
			{
				token.TokenName+=sequence[i];
				i++;
			}
			AddToken(token);
		}
		//else if the character is a newline...
		else if(sequence[i]=='\r' && sequence[i+1]=='\n')
		{
			token.TokenType=endlineT;
			i++;
			AddToken(token);
		}
		//else if the character is a digit or a decimal point...
		else if(isdigit(sequence[i]) || sequence[i]=='.')
		{
			token.TokenType=intT;
			token.TokenName.Empty();
			//while the character is within bounds and is either a 
			//digit or decimal point...
			while(i< sequence.GetLength() && (isdigit(sequence[i]) || sequence[i]=='.'))
			{
				if(sequence[i]=='.') token.TokenType=realT;
				token.TokenName+=sequence[i];
				i++;
			}
			i--;//Need to decrement it because in the for loop, i will be incremented
			if(token.TokenType==intT) token.TokenInt=atoi((LPCSTR)token.TokenName);
			else if(token.TokenType==realT) token.TokenReal=atof((LPCSTR)token.TokenName);

			//if invalid decimal number is given
			if(token.TokenType==realT &&token.TokenReal==0.0)
				token.TokenType=errorT;
			AddToken(token);
		}
		//else if the character is still neither number, newline or quotes,
		//it may be one of the symbols
		else if(token.TokenType==nilT)
		{
			//This loop look up the symbol table and assign the correct token type
			for(int j=0;j<SymbolTable.GetSize();j++)
			{
				//if the following sequence string matches the string of one of
				//the symbol in the symbol table...
				if(sequence.Mid(i,SymbolTable[j].SymbolString.GetLength())==SymbolTable[j].SymbolString
					&& !IsAllAplhaOrDigit(sequence,i,SymbolTable[j].SymbolString.GetLength()+1) )
				{//...assign the correct token type
					token.TokenType=SymbolTable[j].TokenType;
					i+=SymbolTable[j].SymbolString.GetLength()-1;
					AddToken(token);
					break;
				}
			}
		}
		//still, if the character is still not one of the symbols, check
		//whether it is an identifier
		if(token.TokenType==nilT)
		{
			token.TokenName.Empty();
			
			//if character at sequence[i] is not either digit or alphabets, 
			//or space don't even bother scanning- it must be an error
			if(i<sequence.GetLength() && (isalpha(sequence[i]) || isdigit(sequence[i]) ) )
			{
				//condition: i within bounds and the character is either a character or digit or '_'
				while(i<sequence.GetLength()  && (isalpha(sequence[i]) || isdigit(sequence[i]) || sequence[i]=='_') )
				{
					token.TokenType=idT;
					token.TokenName+=sequence[i];
					i++;
				}
				if(i<sequence.GetLength()) //if i is within bounds of the string
					if(sequence[i]!=' '/* && !isalpha(sequence[i]) 
						&& !isdigit(sequence[i])*/ ) 
						i--;
				if(token.TokenType!=nilT) 
					AddToken(token);
			}
		}
		//still, if the character is not an identifier, an error has occured
		if(token.TokenType==nilT)
		{
			token.TokenType=errorT;
			AddToken(token);
		}
	}
	token.TokenType=endfileT;
	AddToken(token);
	CurrentPosition=TokenSequence.GetHeadPosition();
}

CToken CLexicalAnalyser::GetCurrentToken()
{
	CToken NIL;
	NIL.TokenType=nilT;

	if(CurrentPosition==NULL) 
		return NIL;
	else
		return TokenSequence.GetAt(CurrentPosition);
}

CToken CLexicalAnalyser::NextToken()
{
	CToken NIL;
	NIL.TokenType=nilT;

	if(CurrentPosition==NULL) 
		return NIL;
	else
		return TokenSequence.GetNext(CurrentPosition);
}

void CLexicalAnalyser::AddToken(CToken token)
{
	CurrentPosition=TokenSequence.AddTail(token);
}

BOOL CLexicalAnalyser::IsSequenceEmpty()
{
	return TokenSequence.IsEmpty();
}

bool CLexicalAnalyser::IsAllAplhaOrDigit(CString s, int begin, int length)
{
	//if the length is overshot
	if(begin+length>=s.GetLength())
		return false;
	for(int i=begin;i<begin+length;i++)
	{
		if(!isalpha(s[i]) && !isdigit(s[i])) 
			return false;
	}

	return true;
}


//////////////////////////////////////////////////////////////////////
// CExpression Class
//////////////////////////////////////////////////////////////////////

//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////

CExpression::CExpression()
{

}

CExpression::~CExpression()
{

}

void CLexicalAnalyser::ResetPosition()
{
	CurrentPosition=TokenSequence.GetHeadPosition();	
}

void CLexicalAnalyser::ClearAllTokens()
{
	while(!TokenSequence.IsEmpty())
		TokenSequence.RemoveHead();
}

POSITION CLexicalAnalyser::GetCurrentPosition()
{
	return CurrentPosition;
}

void CLexicalAnalyser::SetCurrentPosition(POSITION pos)
{
	CurrentPosition=pos;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -