📄 lexanalysis.cpp
字号:
#include "lexanalysis.h"
#include "stdio.h"
#include "math.h"
#include "string.h"
unsigned char charType[256] = {
0, 25, 25, 25, 25, 25, 25, 25, 25, 24, 24, 25, 25, 24, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 24, 14, 25, 25, 25, 8, 10, 3, 16, 17, 6, 4, 25,
5, 15, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 25, 25,
13, 9, 12, 25, 25, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 25, 25, 25, 25, 2, 25, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 25, 11, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25 };
char *remainWord[4] =
{
"AND",
"OR",
"NOT",
"LIKE"
};
bool cLexicalAnalysis::get_number(Token& token)
{
int stateTable = 1;
int curState = 1;
long pow1, pow2;
unsigned char curChar;
do
{
curChar = m_source->GetChar();
m_parsed[m_parseLen++] = curChar;
switch(curState)
{
case 1:
if(curChar >= '0' && curChar <= '9')
{
curState = 1;
token.num_val *= 10;
token.num_val += curChar - '0';
}
else if(curChar == '.')
{
curState = 2;
pow1 = 10;
}
else if(curChar == 'E' || curChar == 'e')
{
curState = 4;
}
else
{
if(curChar != 0)
m_source->PushBackOne();
return true;
}
break;
case 2:
if(curChar >= '0' && curChar <= '9')
{
curState = 3;
double temp = (curChar - '0');
token.num_val += (temp/pow1);
pow1 *= 10;
}
else
{
curState = 0;
m_parsed[m_parseLen] = 0;
sprintf(m_error_msg, "%s is an illegal number!", m_parsed);
}
break;
case 3:
if(curChar >= '0' && curChar <= '9')
{
curState = 3;
double temp = (curChar - '0');
token.num_val += (temp/pow1);
pow1 *= 10;
}
else if(curChar == '.')
{
curState = 0;
m_parsed[m_parseLen] = 0;
sprintf(m_error_msg, "%s is an illegal number!", m_parsed);
}
else if(curChar == 'E' || curChar == 'e')
{
curState = 4;
}
else
{
if(curChar != 0)
m_source->PushBackOne();
return true;
}
break;
case 4:
if(curChar >= '0' && curChar <= '9')
{
curState = 6;
pow2 = curChar - '0';
}
else if(curChar == '.' || curChar == 'E' || curChar == 'e')
{
curState = 0;
m_parsed[m_parseLen] = 0;
sprintf(m_error_msg, "%s is an illegal number!", m_parsed);
}
else if(curChar == '+')
{
curState = 5;
pow2 = 1;
}
else if(curChar = -1)
{
curState = 5;
pow2 = -1;
}
else
{
curState = 0;
m_parsed[m_parseLen] = 0;
sprintf(m_error_msg, "%s is an illegal number!", m_parsed);
}
break;
case 5:
if(curChar >= '0' && curChar <= '9')
{
curState = 6;
pow2 *= ( curChar - '0' );
}
else
{
curState = 0;
m_parsed[m_parseLen] = 0;
sprintf(m_error_msg, "%s is an illegal number!", m_parsed);
}
break;
case 6:
if(curChar >= '0' && curChar <= '9')
{
curState = 6;
pow2 *= 10;
pow2 += ( curChar - '0' );
}
else if(curChar == '.')
{
curState = 0;
m_parsed[m_parseLen] = 0;
sprintf(m_error_msg, "%s is an illegal number!", m_parsed);
}
else
{
if(curChar != 0)
m_source->PushBackOne();
token.num_val *= pow(10, pow2);
return true;
}
break;
default:
sprintf(m_error_msg, "Unknown error in an number parsing!");
return false;
}
}while(curState > 0);
return false;
}
bool cLexicalAnalysis::get_id(Token& token)
{
unsigned char curChar;
do
{
curChar = m_source->GetChar();
if(charType[curChar] == 1 || charType[curChar] == 2)
m_parsed[m_parseLen++] = curChar;
else
{
if (curChar != 0)
m_source->PushBackOne();
m_parsed[m_parseLen] = 0;
break;
}
}while(1);
int len = sizeof(remainWord)/sizeof(char *);
strcpy(token.str_val, m_parsed);
for(int i = 0; i < len; i++)
{
_strupr(m_parsed);
if(strcmp(remainWord[i], m_parsed) == 0)
break;
}
if(i < len)
{
switch(i)
{
case 0:
token.type = AND;
strcpy(token.str_val, "AND");
break;
case 1:
token.type = OR;
strcpy(token.str_val, "OR");
break;
case 2:
token.type = NOT;
strcpy(token.str_val, "NOT");
break;
case 3:
token.type = LIKE;
strcpy(token.str_val, "LIKE");
break;
default:
strcpy(m_error_msg, "Unknown error in identifier parsing!");
return false;
}
}
return true;
}
bool cLexicalAnalysis::get_string(Token& token)
{
unsigned char curChar;
curChar = m_source->GetChar();
while(curChar != '\'' && curChar != 0)
{
m_parsed[m_parseLen++] = curChar;
curChar = m_source->GetChar();
}
m_parsed[m_parseLen] = 0;
if(curChar == 0)
{
sprintf(m_error_msg, "%s is not a illegal string constant!");
return false;
}
strcpy(token.str_val, m_parsed);
return true;
}
cLexicalAnalysis::cLexicalAnalysis()
{
m_parsed[0] = 0;
m_parseLen = 0;
m_cur_state = 0;
m_error_msg[0] = 0;
m_source = 0;
}
bool cLexicalAnalysis::SetSource(cSource* source)
{
if(!source)
return false;
m_source = source;
return true;
}
bool cLexicalAnalysis::GetToken(Token& token)
{
unsigned char curChar;
m_cur_state = 0;
while(1)
{
m_parseLen = 0;
curChar = m_source->GetChar();
m_parsed[m_parseLen++] = curChar;
switch(charType[curChar])
{
case 0:
token.type = EOI;
return true;
case 1:
token.type = CONSTANT;
token.num_val = curChar - '0';
return get_number(token);
case 2:
token.type = IDENTIFIER;
return get_id(token);
case 3:
token.type = STRING;
m_parseLen--;
return get_string(token);
case 4:
token.type = PLUS;
return true;
case 5:
token.type = MINUS;
return true;
case 6:
token.type = MULTI;
return true;
case 7:
token.type = DIV;
return true;
case 8:
token.type = REMAINDER;
return true;
case 9:
token.type = EQ;
curChar = m_source->GetChar();
if(curChar == 0 || curChar == '=')
return true;
else
{
m_source->PushBackOne();
return true;
}
case 10:
token.type = AND;
curChar = m_source->GetChar();
if(curChar == '&')
return true;
sprintf(m_error_msg, "'&' is a illegal operator!");
if(curChar == 0)
return false;
else
{
m_source->PushBackOne();
return false;
}
case 11:
token.type = OR;
curChar = m_source->GetChar();
if(curChar = '|')
return true;
sprintf(m_error_msg, "'|' is a illegal operator!");
if(curChar == 0)
return false;
else
{
m_source->PushBackOne();
return false;
}
case 12:
curChar = m_source->GetChar();
if(curChar == '=')
token.type = GREATEQ;
else
{
if(curChar != 0)
m_source->PushBackOne();
token.type = GREAT;
}
return true;
case 13:
curChar = m_source->GetChar();
if(curChar == '>')
{
token.type = NOTEQ;
}
else if(curChar == '=')
token.type = LESSEQ;
else
{
if(curChar != 0)
m_source->PushBackOne();
token.type = LESS;
}
return true;
case 14:
curChar = m_source->GetChar();
if(curChar != '=')
{
if(curChar != 0)
m_source->PushBackOne();
token.type = NOT;
}
else
token.type = NOTEQ;
return true;
case 15:
token.type = PERIOD;
return true;
case 16:
token.type = LPARENTHESES;
return true;
case 17:
token.type = RPARENTHESES;
return true;
case 24: //step whitespace
break;
default:
sprintf(m_error_msg, "'%c' is illegal character!", curChar);
return false;
}
}
}
char* cLexicalAnalysis::GetLastError()
{
return m_error_msg;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -