📄 lexicalanalyzer.cpp
字号:
#include "StdAfx.h"
#include "LexicalAnalyzer.h"
CLexicalAnalyzer::CLexicalAnalyzer(void)
{
this->m_TokenList = new tokenlist_t();
}
CLexicalAnalyzer::~CLexicalAnalyzer(void)
{
tokenlist_t::iterator tokenIterator = this->m_TokenList->begin();
while(tokenIterator != this->m_TokenList->end())
{
CToken *Token = *tokenIterator;
delete Token;
tokenIterator++;
}
this->m_TokenList->clear();
delete(this->m_TokenList);
}
// Lexically analyze a string
tokenlist_t * CLexicalAnalyzer::Analyze(string Value)
{
// Clear the existing token list
tokenlist_t::iterator tokenIterator = this->m_TokenList->begin();
while(tokenIterator != this->m_TokenList->end())
{
CToken *Token = *tokenIterator;
delete Token;
tokenIterator++;
}
this->m_TokenList->clear();
// Iterator for the code string
string::iterator Curchar = Value.begin();
// Holder for the current token
CToken *thisToken;
// Special case: empty string
if(Value.length() <= 0)
{
return NULL;
}
// Go through the string and tokenize
while(Curchar != Value.end())
{
// Set up a new token
thisToken = new CToken();
// Grab the current character
char ch = toupper(*Curchar);
// Try to determine what we're looking at
if(isspace(ch))
{
Curchar++;
continue;
}
if(isalpha(ch))
{
thisToken->SetTokenType(ALPHA);
}
else if(isdigit(ch))
{
thisToken->SetTokenType(DIGIT);
}
else if(ch == '.')
{
thisToken->SetTokenType(T_DECIMAL);
}
else if(ch == '\"')
{
// For string values, we'll want to grab the whole string
Curchar++; // We don't need the initial quote
string::iterator LastChar = Curchar;
LastChar++;
// Wrap to the end of the string
while(*LastChar != '\"' || LastChar == Value.end())
{
LastChar++;
}
// If we hit the end of the entire string without a closing
// quote, there's an error
if(*LastChar != '\"')
{
throw INCORRECT_FORMAT;
}
// Otherwise, retrieve the string
string tempToken;
tempToken.assign(Curchar, LastChar);
thisToken->SetLexeme(tempToken);
thisToken->SetTokenType(STRING);
// And move the current character pointer
Curchar = LastChar;
}
else if(ch == '+' || ch == '-')
{
thisToken->SetTokenType(ADDOP);
}
else if(ch == '*' || ch == '/')
{
thisToken->SetTokenType(MULOP);
}
else if(ch == '^')
{
thisToken->SetTokenType(EXPOP);
}
else if(ch == '<' || ch == '>' || ch == '=')
{
thisToken->SetTokenType(RELOP);
// Relational operators can be one or two characters
string tempToken;
string::iterator LastChar = Curchar;
LastChar++;
// This handles cases such as >=, <=, <>
switch(*LastChar)
{
case '=':
case '>':
// Bump up the lastchar pointer
// to completely include the entire operator
LastChar++;
tempToken.assign(Curchar, LastChar);
thisToken->SetLexeme(tempToken);
// Increment the current character pointer to skip
// the rest of the operator
Curchar++;
break;
default:
break;
}
}
else if(ch == '(' || ch == ')')
{
thisToken->SetTokenType(PARENTHESIS);
}
else if(ch == ',')
{
thisToken->SetTokenType(COMMA);
}
// If the token lexeme has not already been set, set it
if(thisToken->GetLexeme() == "")
{
// Strings won't take individual characters
// as arguments, so we need to massage
// the character into a full string.
// Do this by creating a dummy string
// and calling its 'append' method,
// with two iterators bounding the character
// in the original string. Convert
// to uppercase in the meantime.
string tempToken;
string::iterator thisChar = Curchar;
string::iterator LastChar = Curchar;
LastChar++;
while(thisChar != LastChar)
{
tempToken.append(1, toupper(*thisChar));
thisChar++;
}
thisToken->SetLexeme(tempToken);
}
// Add the token to the token list
this->m_TokenList->push_back(thisToken);
// Increment the string iterator
Curchar++;
}
// Finally, add an EOL token to the end, to prevent
// non-dereferencable iterator exceptions in the parser
thisToken = new CToken(EOL, "");
this->m_TokenList->push_back(thisToken);
return this->m_TokenList;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -