📄 lexer.cpp
字号:
return '\0';
// If the current lexeme end index is beyond the length of the string, we're past the
// end of the line
if ( g_CurrLexerState.iCurrLexemeEnd >= ( int ) strlen ( pstrCurrLine ) )
{
// Move to the next node in the source code list
g_CurrLexerState.pCurrLine = g_CurrLexerState.pCurrLine->pNext;
// Is the line valid?
if ( g_CurrLexerState.pCurrLine )
{
// Yes, so move to the next line of code and reset the lexeme pointers
pstrCurrLine = ( char * ) g_CurrLexerState.pCurrLine->pData;
++ g_CurrLexerState.iCurrLineIndex;
g_CurrLexerState.iCurrLexemeStart = 0;
g_CurrLexerState.iCurrLexemeEnd = 0;
}
else
{
// No, so return a null terminator to alert the lexer that the end of the
// source code has been reached
return '\0';
}
}
// Return the character and increment the pointer
return pstrCurrLine [ g_CurrLexerState.iCurrLexemeEnd ++ ];
}
/******************************************************************************************
*
* GetNextToken ()
*
* Returns the next token in the source buffer.
*/
Token GetNextToken ()
{
// Save the current lexer state for future rewinding
CopyLexerState ( g_PrevLexerState, g_CurrLexerState );
// Start the new lexeme at the end of the last one
g_CurrLexerState.iCurrLexemeStart = g_CurrLexerState.iCurrLexemeEnd;
// Set the initial state to the start state
int iCurrLexState = LEX_STATE_START;
// Set the current operator state
int iCurrOpCharIndex = 0;
int iCurrOpStateIndex = 0;
OpState CurrOpState;
// Flag to determine when the lexeme is done
int iLexemeDone = FALSE;
// ---- Loop until a token is completed
// Current character
char cCurrChar;
// Current position in the lexeme string buffer
int iNextLexemeCharIndex = 0;
// Should the current character be included in the lexeme?
int iAddCurrChar;
// Begin the loop
while ( TRUE )
{
// Read the next character, and exit if the end of the source has been reached
cCurrChar = GetNextChar ();
if ( cCurrChar == '\0' )
break;
// Assume the character will be added to the lexeme
iAddCurrChar = TRUE;
// Depending on the current state of the lexer, handle the incoming character
switch ( iCurrLexState )
{
// If an unknown state occurs, the token is invalid, so exit
case LEX_STATE_UNKNOWN:
iLexemeDone = TRUE;
break;
// The start state
case LEX_STATE_START:
// Just loop past whitespace, and don't add it to the lexeme
if ( IsCharWhitespace ( cCurrChar ) )
{
++ g_CurrLexerState.iCurrLexemeStart;
iAddCurrChar = FALSE;
}
// An integer is starting
else if ( IsCharNumeric ( cCurrChar ) )
{
iCurrLexState = LEX_STATE_INT;
}
// A float is starting
else if ( cCurrChar == '.' )
{
iCurrLexState = LEX_STATE_FLOAT;
}
// An identifier is starting
else if ( IsCharIdent ( cCurrChar ) )
{
iCurrLexState = LEX_STATE_IDENT;
}
// A delimiter has been read
else if ( IsCharDelim ( cCurrChar ) )
{
iCurrLexState = LEX_STATE_DELIM;
}
// An operator is starting
else if ( IsCharOpChar ( cCurrChar, 0 ) )
{
// Get the index of the initial operand state
iCurrOpStateIndex = GetOpStateIndex ( cCurrChar, 0, 0, 0 );
if ( iCurrOpStateIndex == -1 )
return TOKEN_TYPE_INVALID;
// Get the full state structure
CurrOpState = GetOpState ( 0, iCurrOpStateIndex );
// Move to the next character in the operator (1)
iCurrOpCharIndex = 1;
// Set the current operator
g_CurrLexerState.iCurrOp = CurrOpState.iIndex;
iCurrLexState = LEX_STATE_OP;
}
// A string is starting, but don't add the opening quote to the lexeme
else if ( cCurrChar == '"' )
{
iAddCurrChar = FALSE;
iCurrLexState = LEX_STATE_STRING;
}
// It's invalid
else
iCurrLexState = LEX_STATE_UNKNOWN;
break;
// Integer
case LEX_STATE_INT:
// If a numeric is read, keep the state as-is
if ( IsCharNumeric ( cCurrChar ) )
{
iCurrLexState = LEX_STATE_INT;
}
// If a radix point is read, the numeric is really a float
else if ( cCurrChar == '.' )
{
iCurrLexState = LEX_STATE_FLOAT;
}
// If whitespace or a delimiter is read, the lexeme is done
else if ( IsCharWhitespace ( cCurrChar ) || IsCharDelim ( cCurrChar ) )
{
iAddCurrChar = FALSE;
iLexemeDone = TRUE;
}
// Anything else is invalid
else
iCurrLexState = LEX_STATE_UNKNOWN;
break;
// Floating-point
case LEX_STATE_FLOAT:
// If a numeric is read, keep the state as-is
if ( IsCharNumeric ( cCurrChar ) )
{
iCurrLexState = LEX_STATE_FLOAT;
}
// If whitespace or a delimiter is read, the lexeme is done
else if ( IsCharWhitespace ( cCurrChar ) || IsCharDelim ( cCurrChar ) )
{
iLexemeDone = TRUE;
iAddCurrChar = FALSE;
}
// Anything else is invalid
else
iCurrLexState = LEX_STATE_UNKNOWN;
break;
// Identifier
case LEX_STATE_IDENT:
// If an identifier character is read, keep the state as-is
if ( IsCharIdent ( cCurrChar ) )
{
iCurrLexState = LEX_STATE_IDENT;
}
// If whitespace or a delimiter is read, the lexeme is done
else if ( IsCharWhitespace ( cCurrChar ) || IsCharDelim ( cCurrChar ) )
{
iAddCurrChar = FALSE;
iLexemeDone = TRUE;
}
// Anything else is invalid
else
iCurrLexState = LEX_STATE_UNKNOWN;
break;
// Operator
case LEX_STATE_OP:
// If the current character within the operator has no substates, we're done
if ( CurrOpState.iSubStateCount == 0 )
{
iAddCurrChar = FALSE;
iLexemeDone = TRUE;
break;
}
// Otherwise, find out if the new character is a possible substate
if ( IsCharOpChar ( cCurrChar, iCurrOpCharIndex ) )
{
// Get the index of the next substate
iCurrOpStateIndex = GetOpStateIndex ( cCurrChar, iCurrOpCharIndex, CurrOpState.iSubStateIndex, CurrOpState.iSubStateCount );
if ( iCurrOpStateIndex == -1 )
{
iCurrLexState = LEX_STATE_UNKNOWN;
}
else
{
// Get the next operator structure
CurrOpState = GetOpState ( iCurrOpCharIndex, iCurrOpStateIndex );
// Move to the next character in the operator
++ iCurrOpCharIndex;
// Set the current operator
g_CurrLexerState.iCurrOp = CurrOpState.iIndex;
}
}
// If not, the lexeme is done
else
{
iAddCurrChar = FALSE;
iLexemeDone = TRUE;
}
break;
// Delimiter
case LEX_STATE_DELIM:
// Don't add whatever comes after the delimiter to the lexeme, because
// it's done
iAddCurrChar = FALSE;
iLexemeDone = TRUE;
break;
// String
case LEX_STATE_STRING:
// If the current character is a closing quote, finish the lexeme
if ( cCurrChar == '"' )
{
iAddCurrChar = FALSE;
iCurrLexState = LEX_STATE_STRING_CLOSE_QUOTE;
}
// If it's a newline, the string token is invalid
else if ( cCurrChar == '\n' )
{
iAddCurrChar = FALSE;
iCurrLexState = LEX_STATE_UNKNOWN;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -