cscan.c
来自「开放源码的编译器open watcom 1.6.0版的源代码」· C语言 代码 · 共 1,837 行 · 第 1/4 页
C
1,837 行
/****************************************************************************
*
* Open Watcom Project
*
* Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
*
* ========================================================================
*
* This file contains Original Code and/or Modifications of Original
* Code as defined in and that are subject to the Sybase Open Watcom
* Public License version 1.0 (the 'License'). You may not use this file
* except in compliance with the License. BY USING THIS FILE YOU AGREE TO
* ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
* provided with the Original Code and Modifications, and is also
* available at www.sybase.com/developer/opensource.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
* ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
* NON-INFRINGEMENT. Please see the License for the specific language
* governing rights and limitations under the License.
*
* ========================================================================
*
* Description: Lexical scanner for C++ compiler.
*
****************************************************************************/
#include "plusplus.h"
#include <assert.h>
#include "errdefns.h"
#include "preproc.h"
#include "stats.h"
#include "scan.h"
#include "escchars.h"
#include "memmgr.h"
#include "dbg.h"
#include "name.h"
#include "context.h"
#include "unicode.h"
#if defined(__DOS__) || defined(__OS2__) || defined(__NT__)
#define SYS_EOF_CHAR 0x1A
#elif defined(__UNIX__)
#undef SYS_EOF_CHAR
#else
#error SYS_EOF_CHAR is not set for this system
#endif
#ifndef NDEBUG
#define _BufferOverrun (*((uint_32*)(&Buffer[sizeof(Buffer)-sizeof(uint_32)])))
#define BUFFER_OVERRUN_CHECK (0x35791113)
#endif
static void nextMacroToken( void );
static void (*tokenSource)( void ) = nextMacroToken;
static char *ReScanPtr;
ExtraRptCtr( nextTokenCalls );
ExtraRptCtr( nextTokenSavedId );
ExtraRptCtr( nextTokenNormal );
ExtraRptCtr( lookPastRewrite );
typedef enum {
SCAN_NAME = 0, // identifier
SCAN_WIDE, // L"abc" or L'a' or Lname
SCAN_NUM, // number that starts with a digit
SCAN_DELIM1, // single character delimiter
SCAN_DELIM12, // @ or @@ token
SCAN_DELIM12EQ, // @, @@, or @= token
SCAN_DELIM12EQ2EQ, // @, @@, @=, or @@= token
SCAN_DELIM1EQ, // @ or @= token
SCAN_SLASH, // /, /=, // comment, or /* comment */
SCAN_LT, // <, <=, <<, <<=, <%, <:
SCAN_PERCENT, // %, %=, %>, %:, %:%:
SCAN_COLON, // :, ::, or :>
SCAN_MINUS, // -, -=, --, ->, or ->*
SCAN_FLOAT, // .
SCAN_STRING, // "string"
SCAN_STRING2, // "string" continued
SCAN_LSTRING2, // L"string" continued
SCAN_CHARCONST, // 'a'
SCAN_CR, // '\r'
SCAN_NEWLINE, // '\n'
SCAN_WHITESPACE, // all whitespace
SCAN_INVALID, // all other characters
SCAN_EOF, // end-of-file
SCAN_MAX
} scan_class;
static uint_8 ClassTable[LCHR_MAX];
static char InitClassTable[] = {
'\r', SCAN_CR,
'\n', SCAN_NEWLINE,
' ', SCAN_WHITESPACE,
'\f', SCAN_WHITESPACE,
'\t', SCAN_WHITESPACE,
'\v', SCAN_WHITESPACE,
'\'', SCAN_CHARCONST,
'"', SCAN_STRING,
'(', SCAN_DELIM1,
')', SCAN_DELIM1,
',', SCAN_DELIM1,
';', SCAN_DELIM1,
'?', SCAN_DELIM1,
'[', SCAN_DELIM1,
']', SCAN_DELIM1,
'{', SCAN_DELIM1,
'}', SCAN_DELIM1,
'~', SCAN_DELIM1,
'.', SCAN_FLOAT,
'#', SCAN_DELIM12, // # ##
'=', SCAN_DELIM1EQ, // = ==
'^', SCAN_DELIM1EQ, // ^ ^=
'!', SCAN_DELIM1EQ, // ! !=
'%', SCAN_PERCENT, // % %= %> %: %:%:
'*', SCAN_DELIM1EQ, // * *=
'&', SCAN_DELIM12EQ, // & &= &&
'|', SCAN_DELIM12EQ, // | |= ||
'+', SCAN_DELIM12EQ, // + += ++
'<', SCAN_LT, // < <= << <<= <% <:
'>', SCAN_DELIM12EQ2EQ, // > >= >> >>=
'-', SCAN_MINUS, // - -= -- -> ->*
'/', SCAN_SLASH, // / /= // /**/
':', SCAN_COLON, // : :: :>
'_', SCAN_NAME,
'L', SCAN_WIDE,
'\0', 0
};
// #undef static
// #define static
#if TARGET_INT == 2
static unsigned_64 intMax = I64Val( 0x00000000, 0x00007fff );
static unsigned_64 uintMax = I64Val( 0x00000000, 0x0000ffff );
#else
static unsigned_64 intMax = I64Val( 0x00000000, 0x7fffffff );
static unsigned_64 uintMax = I64Val( 0x00000000, 0xffffffff );
#endif
#define prt_char( x ) if( CompFlags.cpp_output ) { PrtChar( x ); }
#define diagnose_lex_error( e ) \
(!(e) && ( SkipLevel == NestLevel ) && ( PPState & PPS_NO_LEX_ERRORS ) == 0 )
void ReScanInit( char *ptr )
/**************************/
{
ReScanPtr = ptr;
}
static int rescanBuffer( void )
{
CurrChar = *ReScanPtr++;
if( CurrChar == '\0' ) {
CompFlags.rescan_buffer_done = 1;
}
return( CurrChar );
}
int ReScanToken( void )
/*********************/
{
int saved_currchar;
int (*saved_nextchar)();
int saved_line;
int saved_column;
saved_line = TokenLine;
saved_column = TokenColumn;
saved_currchar = CurrChar;
saved_nextchar = NextChar;
CompFlags.rescan_buffer_done = 0;
NextChar = rescanBuffer;
NextChar();
CurToken = ScanToken( 1 );
--ReScanPtr;
CurrChar = saved_currchar;
NextChar = saved_nextchar;
TokenLine = saved_line;
TokenColumn = saved_column;
if( CompFlags.rescan_buffer_done == 0 ) {
return( 1 );
}
return( 0 );
}
void (*SetTokenSource( void (*source)( void ) ))( void )
/******************************************************/
{
void (*last_source)( void );
last_source = tokenSource;
tokenSource = source;
return( last_source );
}
void ResetTokenSource( void (*source)( void ) )
/*********************************************/
{
tokenSource = source;
}
int NextToken( void )
/*******************/
{
#ifdef XTRA_RPT
ExtraRptIncrementCtr( nextTokenCalls );
if( tokenSource == nextMacroToken ) {
ExtraRptIncrementCtr( nextTokenNormal );
}
#endif
if( CurToken == T_SAVED_ID ) {
ExtraRptIncrementCtr( nextTokenSavedId );
CurToken = LAToken;
return( CurToken );
}
(*tokenSource)();
#ifndef NDEBUG
CtxScanToken();
DumpToken();
#endif
return( CurToken );
}
static int saveNextChar( void )
{
int c;
c = NextChar();
if( TokenLen < BUF_SIZE - 2 ) {
Buffer[TokenLen] = c;
++TokenLen;
} else if( TokenLen == BUF_SIZE - 2 ) {
if( NestLevel == SkipLevel ) {
CErr1( ERR_TOKEN_TRUNCATED );
}
++TokenLen;
}
return( c );
}
static int scanHex( int expanding )
{
int c;
char char_class;
struct {
unsigned too_big : 1;
unsigned at_least_one : 1;
} flag;
flag.too_big = FALSE;
flag.at_least_one = FALSE;
for(;;) {
c = saveNextChar();
char_class = CharSet[ c ];
if(( char_class & (C_HX|C_DI) ) == 0 ) break;
if( char_class & C_HX ) {
c = (( c | HEX_MASK ) - HEX_BASE ) + 10 + '0';
}
if( U64Cnv16( &Constant64, c - '0' ) ) {
flag.too_big = TRUE;
}
flag.at_least_one = TRUE;
}
if( ! flag.at_least_one ) {
return( 0 ); /* indicate no characters matched after "0x" */
}
if( flag.too_big ) {
if( diagnose_lex_error( expanding ) ) {
CErr1( WARN_CONSTANT_TOO_BIG );
}
}
return( 1 ); /* indicate characters were matched */
}
static int idLookup( unsigned len, MEPTR *pmeptr )
{
MEPTR fmentry;
fmentry = MacroLookup( Buffer, len );
if( fmentry != NULL ) {
*pmeptr = fmentry;
return( T_MACRO );
}
return( KwLookup( len ) );
}
static void prt_comment_char( int c )
{
switch( c ) {
case '\n':
prt_char( c );
break;
case '\r':
case LCHR_EOF:
break;
default:
if( CompFlags.keep_comments ) {
prt_char( c );
}
}
}
static void scanCComment( void )
{
int c;
int prev_char;
unsigned start_line;
SrcFileCurrentLocation();
start_line = TokenLine;
CompFlags.scanning_c_comment = 1;
if( CompFlags.cpp_output ) {
prt_comment_char( '/' );
prt_comment_char( '*' );
c = '\0';
for(;;) {
for(;;) {
prev_char = c;
c = NextChar();
prt_comment_char( c );
if( c == '/' ) break;
if( c == LCHR_EOF ) {
/* unterminated comment already detected in NextChar() */
CompFlags.scanning_c_comment = 0;
return;
}
}
if( prev_char == '*' ) break;
// get next character and see if it is '*' for nested comment
c = NextChar();
prt_comment_char( c );
if( c == '*' ) {
/* '*' may be just about to close this comment */
c = NextChar();
prt_comment_char( c );
if( c == '/' ) break;
if( c == LCHR_EOF ) {
/* unterminated comment already detected in NextChar() */
CompFlags.scanning_c_comment = 0;
return;
}
SrcFileSetErrLoc();
CErr2( WARN_NESTED_COMMENT, start_line );
}
}
} else {
c = '\0';
for(;;) {
for(;;) {
prev_char = c;
c = NextChar();
if( c == '/' ) break;
if( c == LCHR_EOF ) {
/* unterminated comment already detected in NextChar() */
CompFlags.scanning_c_comment = 0;
return;
}
}
if( prev_char == '*' ) break;
// get next character and see if it is '*' for nested comment
c = NextChar();
if( c == '*' ) {
/* '*' may be just about to close this comment */
c = NextChar();
if( c == '/' ) break;
if( c == LCHR_EOF ) {
/* unterminated comment already detected in NextChar() */
CompFlags.scanning_c_comment = 0;
return;
}
SrcFileSetErrLoc();
CErr2( WARN_NESTED_COMMENT, start_line );
}
}
}
CompFlags.scanning_c_comment = 0;
NextChar();
}
static void scanCppComment( void )
{
int c;
CompFlags.scanning_cpp_comment = 1;
if( CompFlags.cpp_output ) {
prt_comment_char( '/' );
prt_comment_char( '/' );
for(;;) {
c = NextChar();
if( c == LCHR_EOF ) break;
if( c == '\n' ) break;
prt_comment_char( c );
}
} else {
SrcFileScanCppComment();
}
CompFlags.scanning_cpp_comment = 0;
}
static int doESCChar( int c, int expanding, int char_type )
{
unsigned n;
unsigned i;
int classification;
classification = classify_escape_char( c );
if( classification == ESCAPE_OCTAL ) {
n = 0;
for( i = 3; i > 0; --i ) {
c = octal_dig( c );
if( c == 8 ) break;
n = n * 8 + c;
c = saveNextChar();
}
if( n > 0377 && char_type != TYP_WCHAR ) {
if( diagnose_lex_error( expanding ) ) {
SrcFileSetErrLoc();
CErr1( ANSI_INVALID_OCTAL_ESCAPE );
}
n &= 0377;
}
} else if( classification == ESCAPE_HEX ) {
U64Clear( Constant64 );
if( scanHex( expanding ) ) {
n = U32FetchTrunc( Constant64 );
if( n > 0x0ff && char_type != TYP_WCHAR ) {
if( diagnose_lex_error( expanding ) ) {
SrcFileSetErrLoc();
CErr1( ANSI_INVALID_HEX_ESCAPE );
}
n &= 0x0ff;
}
} else { /* '\xz' where z is not a hex char */
n = 'x';
}
} else if( classification == ESCAPE_NONE ) {
n = c;
saveNextChar();
} else {
n = classification;
saveNextChar();
}
return( n );
}
static int charConst( int char_type, int expanding )
{
int c;
int i;
int token;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?