📄 tokenreader.cpp
字号:
#include "stdafx.h"
#include "TokenReader.h"
#include "Page.h"
#include "CPGlobals.h"
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
UINT CTokenReader::mSymbols[MAX_SHORT];
BEGIN_TEST_DUMP(CTokenReader)
TEST_DUMP(mpCode)
END_TEST_DUMP()
BEGIN_TEST_DUMP(CTokenList)
TEST_CODE(POSITION pos = TEST_MEMBER(GetHeadPosition());)
TEST_CODE(while(pos != NULL))
TEST_CODE({)
TEST_DUMP(GetNext(pos));
TEST_CODE(})
END_TEST_DUMP()
CTokenReader::CTokenReader()
{
mpCode = NULL;
}
CTokenReader::~CTokenReader()
{
}
#define SYMBOLS_INDEX(c1, c2, index) \
i = c1; \
i <<= 8; \
i += c2; \
mSymbols[i] = index;
void CTokenReader::Init()
{
//只需初始化一次
static inited = FALSE;
if(inited) return;
inited = TRUE;
::memset(&mSymbols[0], TT_UNDEFINED, sizeof(UINT)*MAX_SHORT);
//换行
mSymbols['\n'] = TT_EOL; // '\n'
//转义行
mSymbols['\\'] = TT_REVERSE; // '\\'
//字母
int i;
for(i='a'; i<'z'; i++)
mSymbols[i] = TT_WORD;
for(i='A'; i<'Z'; i++)
mSymbols[i] = TT_WORD;
mSymbols['_'] = TT_WORD;
//数字
for(i='0'; i<'9'; i++)
mSymbols[i] = TT_DIGIT;
//预编译
mSymbols['#'] = TT_PREPROCESSOR;
//连行符
SYMBOLS_INDEX('\\', '\n', TT_CONNECT_LINE);
//连接符
SYMBOLS_INDEX('#', '#', TT_CONNECT);
//单行注释 //
SYMBOLS_INDEX('/', '/', TT_SINCOMMENT);
//多行注释 /*
SYMBOLS_INDEX('/', '*', TT_MULCOMMENT);
//字符串 "
mSymbols['\"'] = TT_STRING;
//宽字符串 L"
SYMBOLS_INDEX('L', '\"', TT_WSTRING);
//字符 '
mSymbols['\''] = TT_CHAR;
//宽字符串 L"
SYMBOLS_INDEX('L', '\'', TT_WCHAR);
//括号
//小括号 (
mSymbols['('] = TT_LPARAN;
mSymbols[')'] = TT_RPARAN;
//中括号 [
mSymbols['['] = TT_LBRACE;
mSymbols[']'] = TT_RBRACE;
//大括号 {
mSymbols['{'] = TT_LSQUARE;
mSymbols['}'] = TT_RSQUARE;
//特殊操作符
//逗号 ,
mSymbols[','] = TT_COMMA;
//分号
mSymbols[';'] = TT_SEMICOLON;
//冒号
mSymbols[':'] = TT_COLON;
mSymbols['?'] = TT_SELECT;
//域符号 ::
SYMBOLS_INDEX(':', ':', TT_SCOPE);
//操作符
mSymbols['+'] = TT_PLUS; // +
mSymbols['-'] = TT_MINUS; // -
mSymbols['*'] = TT_TIMES; // *
mSymbols['/'] = TT_DIV; // /
mSymbols['%'] = TT_MOD; // %
mSymbols['^'] = TT_XOR; // ^
mSymbols['&'] = TT_BIT_AND; // &
mSymbols['|'] = TT_BIT_OR; // |
mSymbols['~'] = TT_BIT_NOT; // ~
mSymbols['!'] = TT_LOGIC_NOT; // !
mSymbols['='] = TT_ASSIGN; // =
mSymbols['<'] = TT_LT; // <
mSymbols['>'] = TT_GT; // >
SYMBOLS_INDEX('<', '=', TT_NGT); // <=
SYMBOLS_INDEX('>', '=', TT_NLT); // >=
SYMBOLS_INDEX('+', '+', TT_PLUS_PLUS); // ++
SYMBOLS_INDEX('-', '-', TT_MINUS_MINUS); // --
SYMBOLS_INDEX('<', '<', TT_LMB); // <<
SYMBOLS_INDEX('>', '>', TT_RMB); // >>
SYMBOLS_INDEX('=', '=', TT_EQ); // ==
SYMBOLS_INDEX('!', '=', TT_NEQ); // !=
SYMBOLS_INDEX('&', '&', TT_AND); // &&
SYMBOLS_INDEX('|', '|', TT_OR); // ||
SYMBOLS_INDEX('+', '=', TT_PLUS_ASSIGN); // +=
SYMBOLS_INDEX('-', '=', TT_MINUS_ASSIGN); // -=
SYMBOLS_INDEX('/', '=', TT_DIV_ASSIGN); // /=
SYMBOLS_INDEX('%', '=', TT_MOD_ASSIGN); // %=
SYMBOLS_INDEX('^', '=', TT_XOR_ASSIGN); // ^=
SYMBOLS_INDEX('&', '=', TT_BIT_AND_ASSIGN); // &=
SYMBOLS_INDEX('|', '=', TT_BIT_OR_ASSIGN); // |=
SYMBOLS_INDEX('*', '=', TT_TIMES_ASSIGN); // *=
//SYMBOLS_INDEX('', '=', TT_LMB_ASSIGN); // <<=
//SYMBOLS_INDEX('=', '=', TT_RMB_ASSIGN); // >>=
SYMBOLS_INDEX('-', '>', TT_ARROW); // ->
//TT_ARROW_STAR, // ->*
mSymbols['.'] = TT_DOT; // .
SYMBOLS_INDEX('.', '*', TT_DOT_STAR); // .*
//SYMBOLS_INDEX('?', ':', TT_SELECT); // ? :
//优先级
::memset(CToken::sPrecedence, 0, sizeof(CToken::sPrecedence));
CToken::sPrecedence[TT_LPARAN] = -1; //小括号左 (
CToken::sPrecedence[TT_RPARAN] = 0; //小括号右 )
CToken::sPrecedence[TT_LBRACE] = -1; //中括号左 [
CToken::sPrecedence[TT_RBRACE] = 0; //中括号右 ]
CToken::sPrecedence[TT_LSQUARE] = -1; //大括号左 {
CToken::sPrecedence[TT_RSQUARE] = 0; //大括号右 }
CToken::sPrecedence[TT_COMMA] = 1; //逗号 ] = ;
CToken::sPrecedence[TT_SEMICOLON] = 0; //分号 ] = ;
CToken::sPrecedence[TT_COLON] = 3; //冒号 :
CToken::sPrecedence[TT_SELECT] = 3; //选择 ?
CToken::sPrecedence[TT_SCOPE] = 23; //域符号 ::
CToken::sPrecedence[TT_PLUS] = 8; // +
CToken::sPrecedence[TT_MINUS] = 8; // -
CToken::sPrecedence[TT_TIMES] = 9; // *
CToken::sPrecedence[TT_DIV] = 9; // /
CToken::sPrecedence[TT_MOD] = 9; // %
CToken::sPrecedence[TT_XOR] = 5; // ^
CToken::sPrecedence[TT_BIT_AND] = 5; // &
CToken::sPrecedence[TT_BIT_OR] = 5; // |
CToken::sPrecedence[TT_BIT_NOT] = 16; // ~
CToken::sPrecedence[TT_LOGIC_NOT] = 16; // !
CToken::sPrecedence[TT_ASSIGN] = 2; // =
CToken::sPrecedence[TT_LT] = 6; // <
CToken::sPrecedence[TT_GT] = 6; // >
CToken::sPrecedence[TT_NGT] = 6; // <=
CToken::sPrecedence[TT_NLT] = 6; // >=
CToken::sPrecedence[TT_PLUS_PLUS] = 17; // ++
CToken::sPrecedence[TT_MINUS_MINUS] = 17; // --
CToken::sPrecedence[TT_LMB] = 7; // <<
CToken::sPrecedence[TT_RMB] = 7; // >>
CToken::sPrecedence[TT_EQ] = 6; // ==
CToken::sPrecedence[TT_NEQ] = 6; // !=
CToken::sPrecedence[TT_AND] = 4; // &&
CToken::sPrecedence[TT_OR] = 4; // ||
CToken::sPrecedence[TT_PLUS_ASSIGN] = 2; // +=
CToken::sPrecedence[TT_MINUS_ASSIGN] = 2; // -=
CToken::sPrecedence[TT_DIV_ASSIGN] = 2; // /=
CToken::sPrecedence[TT_MOD_ASSIGN] = 2; // %=
CToken::sPrecedence[TT_XOR_ASSIGN] = 2; // ^=
CToken::sPrecedence[TT_BIT_AND_ASSIGN] = 2; // &=
CToken::sPrecedence[TT_BIT_OR_ASSIGN] = 2; // |=
CToken::sPrecedence[TT_TIMES_ASSIGN] = 2; // *=
CToken::sPrecedence[TT_ARROW] = 22; // ->
CToken::sPrecedence[TT_DOT] = 22; // .
CToken::sPrecedence[TT_DOT_STAR] = 11; // .*
CToken::sPrecedence[TT_LMB_ASSIGN] = 2; // <<=
CToken::sPrecedence[TT_RMB_ASSIGN] = 2; // >>=
CToken::sPrecedence[TT_ARROW_STAR] = 11; // ->*
}
BOOL CTokenReader::BeforeNextToken(UINT& oColumns)
{
if(mpCode == NULL) return FALSE;
//忽略空格及其他可忽略的字符
while(TRUE)
{
char ch = *mpCode;
if(ch == '\0') return FALSE;
UINT type = GetSymbolType(ch);
if(type != TT_UNDEFINED)
break;
mpCode++;
oColumns++;
}
return TRUE;
}
CToken* CTokenReader::ReadCodeToken(UINT iType,
PFReadToken pf,
int iOffset,
BOOL iForeSpace)
{
ASSERT(mpCode != NULL);
ASSERT(strlen(mpCode) > 0);
//由调用者负责删除
CToken* pToken = new CToken(TOKEN_LINE, TOKEN_COLUMN, iType, iForeSpace);
mpCode += iOffset;
int len = (pToken->*pf)(mpCode);
mpCode += (len-iOffset); //代码指针下移到该Token之后
COLUMN_INCREASE(len);
TEST_TRACE(pToken);
return pToken;
}
NOTEST
CToken* CTokenReader::ReadOperatorToken(UINT iType, int iLen, BOOL iForeSpace)
{
//由调用者负责删除
CToken* pToken = new CToken(TOKEN_LINE, TOKEN_COLUMN, iType, iForeSpace);
pToken->CopyContent(mpCode, iLen);
mpCode += iLen; //代码指针下移到该符号之后
COLUMN_INCREASE(iLen);
if(iType == TT_EOL)
{
LINE_INCREASE(1);
COLUMN_RESET();
}
return pToken;
}
CToken* CTokenReader::NextToken()
{
UINT foreSpace = 0; //记录Token前是的空格或其他有空格作用的字符数
LPCTSTR tempPtr = mpCode;
if(!BeforeNextToken(foreSpace)) return NULL;
COLUMN_INCREASE(foreSpace);
LPCTSTR pCode = mpCode; //临时指针
char ch = *pCode++; //经过BeforeNextToken处理后,ch不可能为NULL
char next = *pCode++;
//首先处理出现频率最高的:单词和数字
UINT type = GetSymbolType(ch);
if(type == TT_WORD) //单词
{
if(ch != 'L' || (next != '\"' && next != '\'') ) // L" L'是宽字符或字符串
return ReadCodeToken(type, &CToken::ReadWord, 0, foreSpace);
}
//数字
else if(type == TT_DIGIT)
{
return ReadCodeToken(type, &CToken::ReadDigit, 0, foreSpace);
}
//是否三字节操作符: <<= >>= ->*
if(next != 0)
{
if(ch == '<' && next == '<')
{
char nextnext = *pCode;
if(nextnext == '=') //<<=
return ReadOperatorToken(TT_LMB_ASSIGN, 3, foreSpace);
}
else if(ch == '>' && next == '>')
{
char nextnext = *pCode;
if(nextnext == '=') //<<=
return ReadOperatorToken(TT_RMB_ASSIGN, 3, foreSpace);
}
else if(ch == '-' && next == '>')
{
char nextnext = *pCode;
if(nextnext == '*') //<<=
return ReadOperatorToken(TT_ARROW_STAR, 3, foreSpace);
}
else if(ch == '\\' && next == '\r') //另一种连行符
{
char nextnext = *pCode;
int len = 2;
if(nextnext == '\n') len++;
LINE_INCREASE(1);
COLUMN_RESET();
mpCode += len;
return NextToken();
}
//是否两字节符号
UINT type2 = GetSymbolType(ch, next);
if(type2 != TT_UNDEFINED)
{
if(type2 == TT_SINCOMMENT) //单行注释 //
return ReadCodeToken(type2, &CToken::ReadSingleLineComment, 0, foreSpace);
else if(type2 == TT_MULCOMMENT) //多行注释 /*
return ReadCodeToken(type2, &CToken::ReadMulLineComment, 0, foreSpace);
else if(type2 == TT_WSTRING) //宽字符串 L"
return ReadCodeToken(type2, &CToken::ReadWString, 2, foreSpace);
else if(type2 == TT_WCHAR) //宽字符
return ReadCodeToken(type2, &CToken::ReadWChar, 2, foreSpace);
else if(type2 == TT_CONNECT_LINE) //连行符\\\n,忽略
{
LINE_INCREASE(1);
COLUMN_RESET();
mpCode += 2;
return NextToken();
}
else //其他,即操作符
return ReadOperatorToken(type2, 2, foreSpace);
}
}
//是否单字节符号,type未被改写
if(type != TT_UNDEFINED)
{
if(type == TT_STRING) //字符串 "
return ReadCodeToken(type, &CToken::ReadString, 1, foreSpace);
else if(type == TT_CHAR) //字符 '
return ReadCodeToken(type, &CToken::ReadChar, 1, foreSpace);
else //其他,即操作符及换行符
return ReadOperatorToken(type, 1, foreSpace);
}
return NULL;
}
void CTokenReader::ReadTokenList(CTokenList& oList, LPCTSTR ipCode)
{
if(ipCode == NULL) return;
Init();
SetCode(ipCode);
while(TRUE)
{
CToken* pToken = NextToken();
if(pToken == NULL) break;
TEST_TRACE(pToken);
oList.AddTail(pToken);
}
}
void CTokenReader::ClearTokenList(CTokenList& iList)
{
//删除临时Token
POSITION pos = iList.GetHeadPosition();
while(pos != NULL)
delete iList.GetNext(pos);
iList.RemoveAll();
}
void CTokenReader::DeleteInvalidToken(CTokenList& iList)
{
//删除临时Token
POSITION pos = iList.GetHeadPosition();
while(pos != NULL)
{
POSITION currPos = pos;
CToken* pToken = iList.GetNext(pos);
if(pToken->GetType() <= TT_LITERAL_BEGIN)
{
delete pToken;
iList.RemoveAt(currPos);
}
}
}
BOOL CTokenReader::TokenListToCString(CString& oStr,
CTokenList& ioList,
POSITION& ioPos,
UINT iEndType/*=TT_UNDEFINED*/,
UINT iSkipBeginType/*=TT_UNDEFINED*/,
UINT iSkipEndType/*=TT_UNDEFINED*/)
{
TokenListToString(oStr, ioList, ioPos, iEndType, iSkipBeginType, iSkipEndType);
oStr.Replace("\n", "\\\n");
oStr.Replace("\t", "\\\t");
oStr.Replace("\v", "\\\v");
oStr.Replace("\b", "\\\b");
oStr.Replace("\r", "\\\r");
oStr.Replace("\f", "\\\f");
oStr.Replace("\a", "\\\a");
oStr.Replace("\\", "\\\\");
oStr.Replace("\?", "\\\?");
oStr.Replace("\'", "\\\'");
oStr.Replace("\"", "\\\"");
oStr.Insert(0, '\"');
oStr += '\"';
//ASSERT(FALSE);
return TRUE;
}
BOOL CTokenReader::TokenListToString(CString& oStr,
CTokenList& ioList,
POSITION& ioPos,
UINT iEndType/*=TT_UNDEFINED*/,
UINT iSkipBeginType/*=TT_UNDEFINED*/,
UINT iSkipEndType/*=TT_UNDEFINED*/)
{
CToken* pToken = NULL;
BOOL skip = FALSE;
int paran = 0;
while(ioPos != NULL)
{
NEXT_TOKEN_OR_RETURN_VALUE(FALSE);
UINT type = pToken->GetType();
//是否出现不匹配的)
if(type == TT_LPARAN) paran--;
else if(type == TT_RPARAN) paran++;
if(paran == 1) return FALSE;
if(!skip && type == iEndType)
break;
else if(!skip && type == iSkipBeginType)
skip = TRUE;
else if(skip && type == iSkipEndType)
skip = FALSE;
pToken->CatContentTo(oStr);
}
return TRUE;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -