cscan.c

来自「开放源码的编译器open watcom 1.6.0版的源代码」· C语言 代码 · 共 1,837 行 · 第 1/4 页

C
1,837
字号
/****************************************************************************
*
*                            Open Watcom Project
*
*    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
*
*  ========================================================================
*
*    This file contains Original Code and/or Modifications of Original
*    Code as defined in and that are subject to the Sybase Open Watcom
*    Public License version 1.0 (the 'License'). You may not use this file
*    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
*    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
*    provided with the Original Code and Modifications, and is also
*    available at www.sybase.com/developer/opensource.
*
*    The Original Code and all software distributed under the License are
*    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
*    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
*    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
*    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
*    NON-INFRINGEMENT. Please see the License for the specific language
*    governing rights and limitations under the License.
*
*  ========================================================================
*
* Description:  Lexical scanner for C++ compiler.
*
****************************************************************************/


#include "plusplus.h"

#include <assert.h>

#include "errdefns.h"
#include "preproc.h"
#include "stats.h"
#include "scan.h"
#include "escchars.h"
#include "memmgr.h"
#include "dbg.h"
#include "name.h"
#include "context.h"
#include "unicode.h"

#if defined(__DOS__) || defined(__OS2__) || defined(__NT__)
    #define SYS_EOF_CHAR    0x1A
#elif defined(__UNIX__)
    #undef SYS_EOF_CHAR
#else
    #error SYS_EOF_CHAR is not set for this system
#endif

#ifndef NDEBUG
#define _BufferOverrun (*((uint_32*)(&Buffer[sizeof(Buffer)-sizeof(uint_32)])))
#define BUFFER_OVERRUN_CHECK    (0x35791113)
#endif

static  void    nextMacroToken( void );
static  void    (*tokenSource)( void ) = nextMacroToken;

static  char    *ReScanPtr;

ExtraRptCtr( nextTokenCalls );
ExtraRptCtr( nextTokenSavedId );
ExtraRptCtr( nextTokenNormal );
ExtraRptCtr( lookPastRewrite );

typedef enum {
    SCAN_NAME = 0,      // identifier
    SCAN_WIDE,          // L"abc" or L'a' or Lname
    SCAN_NUM,           // number that starts with a digit
    SCAN_DELIM1,        // single character delimiter
    SCAN_DELIM12,       // @ or @@ token
    SCAN_DELIM12EQ,     // @, @@, or @= token
    SCAN_DELIM12EQ2EQ,  // @, @@, @=, or @@= token
    SCAN_DELIM1EQ,      // @ or @= token
    SCAN_SLASH,         // /, /=, // comment, or /* comment */
    SCAN_LT,            // <, <=, <<, <<=, <%, <:
    SCAN_PERCENT,       // %, %=, %>, %:, %:%:
    SCAN_COLON,         // :, ::, or :>
    SCAN_MINUS,         // -, -=, --, ->, or ->*
    SCAN_FLOAT,         // .
    SCAN_STRING,        // "string"
    SCAN_STRING2,       // "string" continued
    SCAN_LSTRING2,      // L"string" continued
    SCAN_CHARCONST,     // 'a'
    SCAN_CR,            // '\r'
    SCAN_NEWLINE,       // '\n'
    SCAN_WHITESPACE,    // all whitespace
    SCAN_INVALID,       // all other characters
    SCAN_EOF,           // end-of-file
    SCAN_MAX
} scan_class;

static uint_8 ClassTable[LCHR_MAX];

static char InitClassTable[] = {
    '\r',       SCAN_CR,
    '\n',       SCAN_NEWLINE,
    ' ',        SCAN_WHITESPACE,
    '\f',       SCAN_WHITESPACE,
    '\t',       SCAN_WHITESPACE,
    '\v',       SCAN_WHITESPACE,
    '\'',       SCAN_CHARCONST,
    '"',        SCAN_STRING,
    '(',        SCAN_DELIM1,
    ')',        SCAN_DELIM1,
    ',',        SCAN_DELIM1,
    ';',        SCAN_DELIM1,
    '?',        SCAN_DELIM1,
    '[',        SCAN_DELIM1,
    ']',        SCAN_DELIM1,
    '{',        SCAN_DELIM1,
    '}',        SCAN_DELIM1,
    '~',        SCAN_DELIM1,
    '.',        SCAN_FLOAT,
    '#',        SCAN_DELIM12,           // #    ##
    '=',        SCAN_DELIM1EQ,          // = ==
    '^',        SCAN_DELIM1EQ,          // ^ ^=
    '!',        SCAN_DELIM1EQ,          // ! !=
    '%',        SCAN_PERCENT,           // % %= %> %: %:%:
    '*',        SCAN_DELIM1EQ,          // * *=
    '&',        SCAN_DELIM12EQ,         // & &= &&
    '|',        SCAN_DELIM12EQ,         // | |= ||
    '+',        SCAN_DELIM12EQ,         // + += ++
    '<',        SCAN_LT,                // < <= << <<= <% <:
    '>',        SCAN_DELIM12EQ2EQ,      // > >= >> >>=
    '-',        SCAN_MINUS,             // - -= -- -> ->*
    '/',        SCAN_SLASH,             // / /=    // /**/
    ':',        SCAN_COLON,             // :    :: :>
    '_',        SCAN_NAME,
    'L',        SCAN_WIDE,
    '\0',       0
};

// #undef static
// #define static

#if TARGET_INT == 2
static unsigned_64 intMax   = I64Val( 0x00000000, 0x00007fff );
static unsigned_64 uintMax  = I64Val( 0x00000000, 0x0000ffff );
#else
static unsigned_64 intMax   = I64Val( 0x00000000, 0x7fffffff );
static unsigned_64 uintMax  = I64Val( 0x00000000, 0xffffffff );
#endif

#define prt_char( x )           if( CompFlags.cpp_output ) { PrtChar( x ); }

#define diagnose_lex_error( e ) \
        (!(e) && ( SkipLevel == NestLevel ) && ( PPState & PPS_NO_LEX_ERRORS ) == 0 )

void ReScanInit( char *ptr )
/**************************/
{
    ReScanPtr = ptr;
}

static int rescanBuffer( void )
{
    CurrChar = *ReScanPtr++;
    if( CurrChar == '\0' ) {
        CompFlags.rescan_buffer_done = 1;
    }
    return( CurrChar );
}

int ReScanToken( void )
/*********************/
{
    int saved_currchar;
    int (*saved_nextchar)();
    int saved_line;
    int saved_column;

    saved_line = TokenLine;
    saved_column = TokenColumn;
    saved_currchar = CurrChar;
    saved_nextchar = NextChar;
    CompFlags.rescan_buffer_done = 0;
    NextChar = rescanBuffer;
    NextChar();
    CurToken = ScanToken( 1 );
    --ReScanPtr;
    CurrChar = saved_currchar;
    NextChar = saved_nextchar;
    TokenLine = saved_line;
    TokenColumn = saved_column;
    if( CompFlags.rescan_buffer_done == 0 ) {
        return( 1 );
    }
    return( 0 );
}

void (*SetTokenSource( void (*source)( void ) ))( void )
/******************************************************/
{
    void (*last_source)( void );

    last_source = tokenSource;
    tokenSource = source;
    return( last_source );
}

void ResetTokenSource( void (*source)( void ) )
/*********************************************/
{
    tokenSource = source;
}

int NextToken( void )
/*******************/
{
#ifdef XTRA_RPT
    ExtraRptIncrementCtr( nextTokenCalls );
    if( tokenSource == nextMacroToken ) {
        ExtraRptIncrementCtr( nextTokenNormal );
    }
#endif
    if( CurToken == T_SAVED_ID ) {
        ExtraRptIncrementCtr( nextTokenSavedId );
        CurToken = LAToken;
        return( CurToken );
    }
    (*tokenSource)();
#ifndef NDEBUG
    CtxScanToken();
    DumpToken();
#endif
    return( CurToken );
}

static int saveNextChar( void )
{
    int c;

    c = NextChar();
    if( TokenLen < BUF_SIZE - 2 ) {
        Buffer[TokenLen] = c;
        ++TokenLen;
    } else if( TokenLen == BUF_SIZE - 2 ) {
        if( NestLevel == SkipLevel ) {
            CErr1( ERR_TOKEN_TRUNCATED );
        }
        ++TokenLen;
    }
    return( c );
}

static int scanHex( int expanding )
{
    int c;
    char char_class;
    struct {
        unsigned too_big : 1;
        unsigned at_least_one : 1;
    } flag;

    flag.too_big = FALSE;
    flag.at_least_one = FALSE;
    for(;;) {
        c = saveNextChar();
        char_class = CharSet[ c ];
        if(( char_class & (C_HX|C_DI) ) == 0 ) break;
        if( char_class & C_HX ) {
            c = (( c | HEX_MASK ) - HEX_BASE ) + 10 + '0';
        }
        if( U64Cnv16( &Constant64, c - '0' ) ) {
            flag.too_big = TRUE;
        }
        flag.at_least_one = TRUE;
    }
    if( ! flag.at_least_one ) {
        return( 0 );            /* indicate no characters matched after "0x" */
    }
    if( flag.too_big ) {
        if( diagnose_lex_error( expanding ) ) {
            CErr1( WARN_CONSTANT_TOO_BIG );
        }
    }
    return( 1 );                    /* indicate characters were matched */
}

static int idLookup( unsigned len, MEPTR *pmeptr )
{
    MEPTR fmentry;

    fmentry = MacroLookup( Buffer, len );
    if( fmentry != NULL ) {
        *pmeptr = fmentry;
        return( T_MACRO );
    }
    return( KwLookup( len ) );
}

static void prt_comment_char( int c )
{
    switch( c ) {
    case '\n':
        prt_char( c );
        break;
    case '\r':
    case LCHR_EOF:
        break;
    default:
        if( CompFlags.keep_comments  ) {
            prt_char( c );
        }
    }
}

static void scanCComment( void )
{
    int c;
    int prev_char;
    unsigned start_line;

    SrcFileCurrentLocation();
    start_line = TokenLine;
    CompFlags.scanning_c_comment = 1;
    if( CompFlags.cpp_output ) {
        prt_comment_char( '/' );
        prt_comment_char( '*' );
        c = '\0';
        for(;;) {
            for(;;) {
                prev_char = c;
                c = NextChar();
                prt_comment_char( c );
                if( c == '/' ) break;
                if( c == LCHR_EOF ) {
                    /* unterminated comment already detected in NextChar() */
                    CompFlags.scanning_c_comment = 0;
                    return;
                }
            }
            if( prev_char == '*' ) break;
            // get next character and see if it is '*' for nested comment
            c = NextChar();
            prt_comment_char( c );
            if( c == '*' ) {
                /* '*' may be just about to close this comment */
                c = NextChar();
                prt_comment_char( c );
                if( c == '/' ) break;
                if( c == LCHR_EOF ) {
                    /* unterminated comment already detected in NextChar() */
                    CompFlags.scanning_c_comment = 0;
                    return;
                }
                SrcFileSetErrLoc();
                CErr2( WARN_NESTED_COMMENT, start_line );
            }
        }
    } else {
        c = '\0';
        for(;;) {
            for(;;) {
                prev_char = c;
                c = NextChar();
                if( c == '/' ) break;
                if( c == LCHR_EOF ) {
                    /* unterminated comment already detected in NextChar() */
                    CompFlags.scanning_c_comment = 0;
                    return;
                }
            }
            if( prev_char == '*' ) break;
            // get next character and see if it is '*' for nested comment
            c = NextChar();
            if( c == '*' ) {
                /* '*' may be just about to close this comment */
                c = NextChar();
                if( c == '/' ) break;
                if( c == LCHR_EOF ) {
                    /* unterminated comment already detected in NextChar() */
                    CompFlags.scanning_c_comment = 0;
                    return;
                }
                SrcFileSetErrLoc();
                CErr2( WARN_NESTED_COMMENT, start_line );
            }
        }
    }
    CompFlags.scanning_c_comment = 0;
    NextChar();
}

static void scanCppComment( void )
{
    int c;

    CompFlags.scanning_cpp_comment = 1;
    if( CompFlags.cpp_output ) {
        prt_comment_char( '/' );
        prt_comment_char( '/' );
        for(;;) {
            c = NextChar();
            if( c == LCHR_EOF ) break;
            if( c == '\n' ) break;
            prt_comment_char( c );
        }
    } else {
        SrcFileScanCppComment();
    }
    CompFlags.scanning_cpp_comment = 0;
}

static int doESCChar( int c, int expanding, int char_type )
{
    unsigned n;
    unsigned i;
    int classification;

    classification = classify_escape_char( c );
    if( classification == ESCAPE_OCTAL ) {
        n = 0;
        for( i = 3; i > 0; --i ) {
            c = octal_dig( c );
            if( c == 8 ) break;
            n = n * 8 + c;
            c = saveNextChar();
        }
        if( n > 0377 && char_type != TYP_WCHAR ) {
            if( diagnose_lex_error( expanding ) ) {
                SrcFileSetErrLoc();
                CErr1( ANSI_INVALID_OCTAL_ESCAPE );
            }
            n &= 0377;
        }
    } else if( classification == ESCAPE_HEX ) {
        U64Clear( Constant64 );
        if( scanHex( expanding ) ) {
            n = U32FetchTrunc( Constant64 );
            if( n > 0x0ff && char_type != TYP_WCHAR ) {
                if( diagnose_lex_error( expanding ) ) {
                    SrcFileSetErrLoc();
                    CErr1( ANSI_INVALID_HEX_ESCAPE );
                }
                n &= 0x0ff;
            }
        } else {                    /*  '\xz' where z is not a hex char */
            n = 'x';
        }
    } else if( classification == ESCAPE_NONE ) {
        n = c;
        saveNextChar();
    } else {
        n = classification;
        saveNextChar();
    }
    return( n );
}

static int charConst( int char_type, int expanding )
{
    int c;
    int i;
    int token;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?