📄 lexer.cpp

📁 < Game Script Mastery>> source code
💻 CPP
📖 第 1 页 / 共 3 页
字号:
            return '\0';

        // If the current lexeme end index is beyond the length of the string, we're past the
        // end of the line

        if ( g_CurrLexerState.iCurrLexemeEnd >= ( int ) strlen ( pstrCurrLine ) )
        {
            // Move to the next node in the source code list

            g_CurrLexerState.pCurrLine = g_CurrLexerState.pCurrLine->pNext;

            // Is the line valid?

            if ( g_CurrLexerState.pCurrLine )
            {
                // Yes, so move to the next line of code and reset the lexeme pointers

                pstrCurrLine = ( char * ) g_CurrLexerState.pCurrLine->pData;

                ++ g_CurrLexerState.iCurrLineIndex;
                g_CurrLexerState.iCurrLexemeStart = 0;
                g_CurrLexerState.iCurrLexemeEnd = 0;
            }
            else
            {
                // No, so return a null terminator to alert the lexer that the end of the
                // source code has been reached

                return '\0';
            }
            
        }

        // Return the character and increment the pointer

        return pstrCurrLine [ g_CurrLexerState.iCurrLexemeEnd ++ ];
    }

    /******************************************************************************************
    *
    *   GetNextToken ()
    *
    *   Returns the next token in the source buffer.
    */

    Token GetNextToken ()
    {
        // Save the current lexer state for future rewinding

        CopyLexerState ( g_PrevLexerState, g_CurrLexerState );

        // Start the new lexeme at the end of the last one

        g_CurrLexerState.iCurrLexemeStart = g_CurrLexerState.iCurrLexemeEnd;

        // Set the initial state to the start state

        int iCurrLexState = LEX_STATE_START;

        // Set the current operator state

        int iCurrOpCharIndex = 0;
        int iCurrOpStateIndex = 0;
        OpState CurrOpState;

        // Flag to determine when the lexeme is done

        int iLexemeDone = FALSE;

        // ---- Loop until a token is completed

        // Current character

        char cCurrChar;

        // Current position in the lexeme string buffer

        int iNextLexemeCharIndex = 0;

        // Should the current character be included in the lexeme?

        int iAddCurrChar;

        // Begin the loop

        while ( TRUE )
        {
            // Read the next character, and exit if the end of the source has been reached

            cCurrChar = GetNextChar ();
            if ( cCurrChar == '\0' )
                break;

            // Assume the character will be added to the lexeme

            iAddCurrChar = TRUE;

            // Depending on the current state of the lexer, handle the incoming character

            switch ( iCurrLexState )
            {
                // If an unknown state occurs, the token is invalid, so exit

                case LEX_STATE_UNKNOWN:

                    iLexemeDone = TRUE;

                    break;

                // The start state

                case LEX_STATE_START:

                    // Just loop past whitespace, and don't add it to the lexeme

                    if ( IsCharWhitespace ( cCurrChar ) )
                    {
                        ++ g_CurrLexerState.iCurrLexemeStart;
                        iAddCurrChar = FALSE;
                    }

                    // An integer is starting

                    else if ( IsCharNumeric ( cCurrChar ) )
                    {
                        iCurrLexState = LEX_STATE_INT;
                    }

                    // A float is starting

                    else if ( cCurrChar == '.' )
                    {
                        iCurrLexState = LEX_STATE_FLOAT;
                    }

                    // An identifier is starting

                    else if ( IsCharIdent ( cCurrChar ) )
                    {
                        iCurrLexState = LEX_STATE_IDENT;
                    }

                    // A delimiter has been read

                    else if ( IsCharDelim ( cCurrChar ) )
                    {
                        iCurrLexState = LEX_STATE_DELIM;
                    }

                    // An operator is starting

                    else if ( IsCharOpChar ( cCurrChar, 0 ) )
                    {
                        // Get the index of the initial operand state

                        iCurrOpStateIndex = GetOpStateIndex ( cCurrChar, 0, 0, 0 );
                        if ( iCurrOpStateIndex == -1 )
                            return TOKEN_TYPE_INVALID;

                        // Get the full state structure

                        CurrOpState = GetOpState ( 0, iCurrOpStateIndex );

                        // Move to the next character in the operator (1)

                        iCurrOpCharIndex = 1;

                        // Set the current operator

                        g_CurrLexerState.iCurrOp = CurrOpState.iIndex;

                        iCurrLexState = LEX_STATE_OP;
                    }

                    // A string is starting, but don't add the opening quote to the lexeme

                    else if ( cCurrChar == '"' )
                    {
                        iAddCurrChar = FALSE;
                        iCurrLexState = LEX_STATE_STRING;
                    }

                    // It's invalid

                    else
                        iCurrLexState = LEX_STATE_UNKNOWN;

                    break;

                // Integer

                case LEX_STATE_INT:

                    // If a numeric is read, keep the state as-is

                    if ( IsCharNumeric ( cCurrChar ) )
                    {
                        iCurrLexState = LEX_STATE_INT;
                    }

                    // If a radix point is read, the numeric is really a float

                    else if ( cCurrChar == '.' )
                    {
                        iCurrLexState = LEX_STATE_FLOAT;
                    }

                    // If whitespace or a delimiter is read, the lexeme is done

                    else if ( IsCharWhitespace ( cCurrChar ) || IsCharDelim ( cCurrChar ) )
                    {
                        iAddCurrChar = FALSE;
                        iLexemeDone = TRUE;
                    }

                    // Anything else is invalid

                    else
                        iCurrLexState = LEX_STATE_UNKNOWN;

                    break;

                // Floating-point

                case LEX_STATE_FLOAT:

                    // If a numeric is read, keep the state as-is

                    if ( IsCharNumeric ( cCurrChar ) )
                    {
                        iCurrLexState = LEX_STATE_FLOAT;
                    }

                    // If whitespace or a delimiter is read, the lexeme is done

                    else if ( IsCharWhitespace ( cCurrChar ) || IsCharDelim ( cCurrChar ) )
                    {
                        iLexemeDone = TRUE;
                        iAddCurrChar = FALSE;
                    }
    
                    // Anything else is invalid

                    else
                        iCurrLexState = LEX_STATE_UNKNOWN;
                    
                    break;

                // Identifier

                case LEX_STATE_IDENT:

                    // If an identifier character is read, keep the state as-is

                    if ( IsCharIdent ( cCurrChar ) )
                    {
                        iCurrLexState = LEX_STATE_IDENT;
                    }

                    // If whitespace or a delimiter is read, the lexeme is done

                    else if ( IsCharWhitespace ( cCurrChar ) || IsCharDelim ( cCurrChar ) )
                    {
                        iAddCurrChar = FALSE;
                        iLexemeDone = TRUE;
                    }

                    // Anything else is invalid

                    else
                        iCurrLexState = LEX_STATE_UNKNOWN;

                    break;

                // Operator

                case LEX_STATE_OP:

                    // If the current character within the operator has no substates, we're done

                    if ( CurrOpState.iSubStateCount == 0 )
                    {
                        iAddCurrChar = FALSE;
                        iLexemeDone = TRUE;
                        break;
                    }

                    // Otherwise, find out if the new character is a possible substate

                    if ( IsCharOpChar ( cCurrChar, iCurrOpCharIndex ) )                   
                    {
                        // Get the index of the next substate
                        
                        iCurrOpStateIndex = GetOpStateIndex ( cCurrChar, iCurrOpCharIndex, CurrOpState.iSubStateIndex, CurrOpState.iSubStateCount );
                        if ( iCurrOpStateIndex == -1 )
                        {
                            iCurrLexState = LEX_STATE_UNKNOWN;
                        }
                        else
                        {
                            // Get the next operator structure

                            CurrOpState = GetOpState ( iCurrOpCharIndex, iCurrOpStateIndex );

                            // Move to the next character in the operator

                            ++ iCurrOpCharIndex;

                            // Set the current operator

                            g_CurrLexerState.iCurrOp = CurrOpState.iIndex;
                        }
                    }

                    // If not, the lexeme is done

                    else
                    {
                        iAddCurrChar = FALSE;
                        iLexemeDone = TRUE;
                    }

                    break;

                // Delimiter

                case LEX_STATE_DELIM:

                    // Don't add whatever comes after the delimiter to the lexeme, because
                    // it's done

                    iAddCurrChar = FALSE;
                    iLexemeDone = TRUE;

                    break;

                // String

                case LEX_STATE_STRING:

                    // If the current character is a closing quote, finish the lexeme

                    if ( cCurrChar == '"' )
                    {
                        iAddCurrChar = FALSE;
                        iCurrLexState = LEX_STATE_STRING_CLOSE_QUOTE;
                    }
        
                    // If it's a newline, the string token is invalid

                    else if ( cCurrChar == '\n' )
                    {
                        iAddCurrChar = FALSE;
                        iCurrLexState = LEX_STATE_UNKNOWN;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -