📄 lexicalanalyzer.cpp

📁 a basic interpreter free basic
💻 CPP
字号:
#include "StdAfx.h"
#include "LexicalAnalyzer.h"

CLexicalAnalyzer::CLexicalAnalyzer(void)
{
	this->m_TokenList = new tokenlist_t();
}

CLexicalAnalyzer::~CLexicalAnalyzer(void)
{
	tokenlist_t::iterator tokenIterator = this->m_TokenList->begin();
	while(tokenIterator != this->m_TokenList->end())
	{
		CToken *Token = *tokenIterator;
		delete Token;
		tokenIterator++;
	}

	this->m_TokenList->clear();

	delete(this->m_TokenList);
}

// Lexically analyze a string
tokenlist_t * CLexicalAnalyzer::Analyze(string Value)
{
	// Clear the existing token list
	tokenlist_t::iterator tokenIterator = this->m_TokenList->begin();
	while(tokenIterator != this->m_TokenList->end())
	{
		CToken *Token = *tokenIterator;
		delete Token;
		tokenIterator++;
	}

	this->m_TokenList->clear();

	// Iterator for the code string
	string::iterator Curchar = Value.begin();

	// Holder for the current token
	CToken *thisToken;

	// Special case: empty string
	if(Value.length() <= 0)
	{
		return NULL;
	}

	// Go through the string and tokenize
	while(Curchar != Value.end())
	{
		// Set up a new token
		thisToken = new CToken();

		// Grab the current character
		char ch = toupper(*Curchar);

		// Try to determine what we're looking at
		if(isspace(ch))
		{
			Curchar++;
			continue;
		}
		if(isalpha(ch))
		{
			thisToken->SetTokenType(ALPHA);
		}
		else if(isdigit(ch))
		{
			thisToken->SetTokenType(DIGIT);
		}
		else if(ch == '.')
		{
			thisToken->SetTokenType(T_DECIMAL);
		}
		else if(ch == '\"')
		{
			// For string values, we'll want to grab the whole string
			Curchar++; // We don't need the initial quote
			string::iterator LastChar = Curchar;
			LastChar++;

			// Wrap to the end of the string
			while(*LastChar != '\"' || LastChar == Value.end())
			{
				LastChar++;
			}

			// If we hit the end of the entire string without a closing
			// quote, there's an error
			if(*LastChar != '\"')
			{
				throw INCORRECT_FORMAT;
			}

			// Otherwise, retrieve the string
			string tempToken;
			tempToken.assign(Curchar, LastChar);
			thisToken->SetLexeme(tempToken);
			thisToken->SetTokenType(STRING);

			// And move the current character pointer
			Curchar = LastChar;
		}
		else if(ch == '+' || ch == '-')
		{
			thisToken->SetTokenType(ADDOP);
		}
		else if(ch == '*' || ch == '/')
		{
			thisToken->SetTokenType(MULOP);
		}
		else if(ch == '^')
		{
			thisToken->SetTokenType(EXPOP);
		}
		else if(ch == '<' || ch == '>' || ch == '=')
		{
			thisToken->SetTokenType(RELOP);

			// Relational operators can be one or two characters
			string tempToken;
			string::iterator LastChar = Curchar;
			LastChar++;

			// This handles cases such as >=, <=, <>
			switch(*LastChar)
			{
			case '=':
			case '>':
				// Bump up the lastchar pointer
				// to completely include the entire operator
				LastChar++;
				tempToken.assign(Curchar, LastChar);
				thisToken->SetLexeme(tempToken);

				// Increment the current character pointer to skip
				// the rest of the operator
				Curchar++;
				break;
			default:
				break;
			}
		}
		else if(ch == '(' || ch == ')')
		{
			thisToken->SetTokenType(PARENTHESIS);
		}
		else if(ch == ',')
		{
			thisToken->SetTokenType(COMMA);
		}

		// If the token lexeme has not already been set, set it
		if(thisToken->GetLexeme() == "")
		{
			// Strings won't take individual characters
			// as arguments, so we need to massage
			// the character into a full string.
			// Do this by creating a dummy string
			// and calling its 'append' method,
			// with two iterators bounding the character
			// in the original string.  Convert
			// to uppercase in the meantime.
			string tempToken;
			string::iterator thisChar = Curchar;
			string::iterator LastChar = Curchar;
			LastChar++;

			while(thisChar != LastChar)
			{
				tempToken.append(1, toupper(*thisChar));
				thisChar++;
			}
			
			thisToken->SetLexeme(tempToken);
		}

		// Add the token to the token list
		this->m_TokenList->push_back(thisToken);

		// Increment the string iterator
		Curchar++;
	}

	// Finally, add an EOL token to the end, to prevent
	// non-dereferencable iterator exceptions in the parser
	thisToken = new CToken(EOL, "");
	this->m_TokenList->push_back(thisToken);

	return this->m_TokenList;
}
💿 文件大小 65 K
👤 上传用户 yingyingyingyin
📂 所属分类企业管理
🏷️ 相关标签

#basic #interpreter #free
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -