📄 lexer.cpp
字号:
/*
Project.
XSC - The XtremeScript Compiler Version 0.8
Abstract.
Lexical analyzer module
Date Created.
8.21.2002
Author.
Alex Varanese
*/
// ---- Include Files -------------------------------------------------------------------------
#include "lexer.h"
// ---- Globals -------------------------------------------------------------------------------
// ---- Lexer -----------------------------------------------------------------------------
LexerState g_CurrLexerState; // The current lexer state
LexerState g_PrevLexerState; // The previous lexer state (used for
// rewinding the token stream)
// ---- Operators -------------------------------------------------------------------------
// ---- First operator characters
OpState g_OpChars0 [ MAX_OP_STATE_COUNT ] = { { '+', 0, 2, 0 }, { '-', 2, 2, 1 }, { '*', 4, 1, 2 }, { '/', 5, 1, 3 },
{ '%', 6, 1, 4 }, { '^', 7, 1, 5 }, { '&', 8, 2, 6 }, { '|', 10, 2, 7 },
{ '#', 12, 1, 8 }, { '~', 0, 0, 9 }, { '!', 13, 1, 10 }, { '=', 14, 1, 11 },
{ '<', 15, 2, 12 }, { '>', 17, 2, 13 }, { '$', 19, 1, 35 } };
// ---- Second operator characters
OpState g_OpChars1 [ MAX_OP_STATE_COUNT ] = { { '=', 0, 0, 14 }, { '+', 0, 0, 15 }, // +=, ++
{ '=', 0, 0, 16 }, { '-', 0, 0, 17 }, // -=, --
{ '=', 0, 0, 18 }, // *=
{ '=', 0, 0, 19 }, // /=
{ '=', 0, 0, 20 }, // %=
{ '=', 0, 0, 21 }, // ^=
{ '=', 0, 0, 22 }, { '&', 0, 0, 23 }, // &=, &&
{ '=', 0, 0, 24 }, { '|', 0, 0, 25 }, // |=, ||
{ '=', 0, 0, 26 }, // #=
{ '=', 0, 0, 27 }, // !=
{ '=', 0, 0, 28 }, // ==
{ '=', 0, 0, 29 }, { '<', 0, 1, 30 }, // <=, <<
{ '=', 0, 0, 31 }, { '>', 1, 1, 32 }, // >=, >>
{ '=', 0, 0, 36 } }; // $=
// ---- Third operator characters
OpState g_OpChars2 [ MAX_OP_STATE_COUNT ] = { { '=', 0, 0, 33 }, { '=', 0, 0, 34 } }; // <<=, >>=
// ---- Delimiters ------------------------------------------------------------------------
char cDelims [ MAX_DELIM_COUNT ] = { ',', '(', ')', '[', ']', '{', '}', ';' };
// ---- Function Prototypes -------------------------------------------------------------------
int GetOpStateIndex ( char cChar, int iCharIndex, int iSubStateIndex, int iSubStateCount );
int IsCharOpChar ( char cChar, int iCharIndex );
OpState GetOpState ( int iCharIndex, int iStateIndex );
int IsCharDelim ( char cChar );
int IsCharWhitespace ( char cChar );
int IsCharNumeric ( char cChar );
int IsCharIdent ( char cChar );
char GetNextChar ();
// ---- Functions -----------------------------------------------------------------------------
/******************************************************************************************
*
* ResetLexer ()
*
* Resets the lexer.
*/
void ResetLexer ()
{
// Set the current line of code to the new line
g_CurrLexerState.iCurrLineIndex = 0;
g_CurrLexerState.pCurrLine = g_SourceCode.pHead;
// Reset the start and end of the current lexeme to the beginning of the source
g_CurrLexerState.iCurrLexemeStart = 0;
g_CurrLexerState.iCurrLexemeEnd = 0;
// Reset the current operator
g_CurrLexerState.iCurrOp = 0;
}
/******************************************************************************************
*
* CopyLexerState ()
*
* Copies one lexer state structure into another.
*/
void CopyLexerState ( LexerState & pDestState, LexerState & pSourceState )
{
// Copy each field individually to ensure a safe copy
pDestState.iCurrLineIndex = pSourceState.iCurrLineIndex;
pDestState.pCurrLine = pSourceState.pCurrLine;
pDestState.CurrToken = pSourceState.CurrToken;
strcpy ( pDestState.pstrCurrLexeme, pSourceState.pstrCurrLexeme );
pDestState.iCurrLexemeStart = pSourceState.iCurrLexemeStart;
pDestState.iCurrLexemeEnd = pSourceState.iCurrLexemeEnd;
pDestState.iCurrOp = pSourceState.iCurrOp;
}
/******************************************************************************************
*
* GetOpStateIndex ()
*
* Returns the index of the operator state associated with the specified character and
* character index.
*/
int GetOpStateIndex ( char cChar, int iCharIndex, int iSubStateIndex, int iSubStateCount )
{
int iStartStateIndex;
int iEndStateIndex;
// Is the character index is zero?
if ( iCharIndex == 0 )
{
// Yes, so there's no substates to worry about
iStartStateIndex = 0;
iEndStateIndex = MAX_OP_STATE_COUNT;
}
else
{
// No, so save the substate information
iStartStateIndex = iSubStateIndex;
iEndStateIndex = iStartStateIndex + iSubStateCount;
}
// Loop through each possible substate and look for a match
for ( int iCurrOpStateIndex = iStartStateIndex; iCurrOpStateIndex < iEndStateIndex; ++ iCurrOpStateIndex )
{
// Get the current state at the specified character index
char cOpChar;
switch ( iCharIndex )
{
case 0:
cOpChar = g_OpChars0 [ iCurrOpStateIndex ].cChar;
break;
case 1:
cOpChar = g_OpChars1 [ iCurrOpStateIndex ].cChar;
break;
case 2:
cOpChar = g_OpChars2 [ iCurrOpStateIndex ].cChar;
break;
}
// If the character is a match, return the index
if ( cChar == cOpChar )
return iCurrOpStateIndex;
}
// Return -1 if no match is found
return -1;
}
/******************************************************************************************
*
* IsCharOpChar ()
*
* Determines if the specified character is an operator character.
*/
int IsCharOpChar ( char cChar, int iCharIndex )
{
// Loop through each state in the specified character index and look for a match
for ( int iCurrOpStateIndex = 0; iCurrOpStateIndex < MAX_OP_STATE_COUNT; ++ iCurrOpStateIndex )
{
// Get the current state at the specified character index
char cOpChar;
switch ( iCharIndex )
{
case 0:
cOpChar = g_OpChars0 [ iCurrOpStateIndex ].cChar;
break;
case 1:
cOpChar = g_OpChars1 [ iCurrOpStateIndex ].cChar;
break;
case 2:
cOpChar = g_OpChars2 [ iCurrOpStateIndex ].cChar;
break;
}
// If the character is a match, return TRUE
if ( cChar == cOpChar )
return TRUE;
}
// Return FALSE if no match is found
return FALSE;
}
/******************************************************************************************
*
* GetOpState ()
*
* Returns the operator state associated with the specified index and state.
*/
OpState GetOpState ( int iCharIndex, int iStateIndex )
{
OpState State;
// Save the specified state at the specified character index
switch ( iCharIndex )
{
case 0:
State = g_OpChars0 [ iStateIndex ];
break;
case 1:
State = g_OpChars1 [ iStateIndex ];
break;
case 2:
State = g_OpChars2 [ iStateIndex ];
break;
}
return State;
}
/******************************************************************************************
*
* IsCharDelim ()
*
* Determines whether a character is a delimiter.
*/
int IsCharDelim ( char cChar )
{
// Loop through each delimiter in the array and compare it to the specified character
for ( int iCurrDelimIndex = 0; iCurrDelimIndex < MAX_DELIM_COUNT; ++ iCurrDelimIndex )
{
// Return TRUE if a match was found
if ( cChar == cDelims [ iCurrDelimIndex ] )
return TRUE;
}
// The character is not a delimiter, so return FALSE
return FALSE;
}
/******************************************************************************************
*
* IsCharWhitespace ()
*
* Returns a nonzero if the given character is whitespace, or zero otherwise.
*/
int IsCharWhitespace ( char cChar )
{
// Return true if the character is a space or tab.
if ( cChar == ' ' || cChar == '\t' || cChar == '\n' )
return TRUE;
else
return FALSE;
}
/******************************************************************************************
*
* IsCharNumeric ()
*
* Returns a nonzero if the given character is numeric, or zero otherwise.
*/
int IsCharNumeric ( char cChar )
{
// Return true if the character is between 0 and 9 inclusive.
if ( cChar >= '0' && cChar <= '9' )
return TRUE;
else
return FALSE;
}
/******************************************************************************************
*
* IsCharIdent ()
*
* Returns a nonzero if the given character is part of a valid identifier, meaning it's an
* alphanumeric or underscore. Zero is returned otherwise.
*/
int IsCharIdent ( char cChar )
{
// Return true if the character is between 0 or 9 inclusive or is an uppercase or
// lowercase letter or underscore
if ( ( cChar >= '0' && cChar <= '9' ) ||
( cChar >= 'A' && cChar <= 'Z' ) ||
( cChar >= 'a' && cChar <= 'z' ) ||
cChar == '_' )
return TRUE;
else
return FALSE;
}
/******************************************************************************************
*
* GetNextChar ()
*
* Returns the next character in the source buffer.
*/
char GetNextChar ()
{
// Make a local copy of the string pointer, unless we're at the end of the source code
char * pstrCurrLine;
if ( g_CurrLexerState.pCurrLine )
pstrCurrLine = ( char * ) g_CurrLexerState.pCurrLine->pData;
else
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -