📄 cscan.c
字号:
/****************************************************************************
*
* Open Watcom Project
*
* Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
*
* ========================================================================
*
* This file contains Original Code and/or Modifications of Original
* Code as defined in and that are subject to the Sybase Open Watcom
* Public License version 1.0 (the 'License'). You may not use this file
* except in compliance with the License. BY USING THIS FILE YOU AGREE TO
* ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
* provided with the Original Code and Modifications, and is also
* available at www.sybase.com/developer/opensource.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
* ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
* NON-INFRINGEMENT. Please see the License for the specific language
* governing rights and limitations under the License.
*
* ========================================================================
*
* Description: C lexical analyzer. Closely coupled with preprocessor.
*
****************************************************************************/
#include "i64.h"
#include "cvars.h"
#include "scan.h"
#include "escchars.h"
#include "asciiout.h"
enum scan_class {
SCAN_NAME = 0, // identifier
SCAN_WIDE, // L"abc" or L'a' or Lname
SCAN_NUM, // number that starts with a digit
SCAN_QUESTION, // '?'
SCAN_SLASH, // '/'
SCAN_MINUS, // '-'
SCAN_EQUAL, // '='
SCAN_COLON, // ':'
SCAN_STAR, // '*'
SCAN_DELIM1, // single character delimiter
SCAN_DELIM2, // one, two, or three byte delimiter
SCAN_DOT, // .
SCAN_STRING, // "string"
SCAN_CHARCONST, // 'a'
SCAN_CR, // '\r'
SCAN_NEWLINE, // '\n'
SCAN_WHITESPACE, // all whitespace
SCAN_INVALID, // all other characters
SCAN_MACRO, // get next token from macro
SCAN_EOF // end-of-file
};
static char *ReScanPtr;
static int SavedCurrChar; // used when get tokens from macro
unsigned char ClassTable[260];
extern char TokValue[];
extern unsigned short __FAR UniCode[];
unsigned char InitClassTable[] = {
'\r', SCAN_CR,
'\n', SCAN_NEWLINE,
' ', SCAN_WHITESPACE,
'\f', SCAN_WHITESPACE,
'\t', SCAN_WHITESPACE,
'\v', SCAN_WHITESPACE,
'\'', SCAN_CHARCONST,
'"', SCAN_STRING,
'(', SCAN_DELIM1,
')', SCAN_DELIM1,
',', SCAN_DELIM1,
';', SCAN_DELIM1,
'?', SCAN_QUESTION,
'/', SCAN_SLASH,
'-', SCAN_MINUS,
'=', SCAN_EQUAL,
':', SCAN_COLON,
'*', SCAN_STAR,
'[', SCAN_DELIM1,
']', SCAN_DELIM1,
'{', SCAN_DELIM1,
'}', SCAN_DELIM1,
'~', SCAN_DELIM1,
'.', SCAN_DOT,
'!', SCAN_DELIM2,
'#', SCAN_DELIM2,
'%', SCAN_DELIM2,
'&', SCAN_DELIM2,
'+', SCAN_DELIM2,
'<', SCAN_DELIM2,
'>', SCAN_DELIM2,
'^', SCAN_DELIM2,
'|', SCAN_DELIM2,
'_', SCAN_NAME,
'L', SCAN_WIDE,
'\0', 0
};
static void UnGetChar( int c );
static int ScanString( void );
static int CharConst( int char_type );
static void ScanComment( void );
void ReScanInit( char *ptr ) /* 28-oct-92 */
{
ReScanPtr = ptr;
}
char *ReScanPos()
{
return( ReScanPtr );
}
/*
* Needs to be global so that it works with the 9.0 overlay manager
*/
int ReScanBuffer()
{
CurrChar = *ScanCharPtr++;
if( CurrChar == '\0' ) {
CompFlags.rescan_buffer_done = 1;
}
return( CurrChar );
}
int SaveNextChar()
{
int c;
c = NextChar();
if( TokenLen >= BufSize - 2 )
EnlargeBuffer( TokenLen * 2 );
Buffer[TokenLen] = c;
++TokenLen;
return( c );
}
unsigned hashpjw( char *s )
{
unsigned h;
char c;
h = *s++;
if( h != 0 ) {
c = *s++;
if( c != '\0' ) {
h = ( h << 4 ) + c;
for(;;) {
h &= 0x0fff;
c = *s++;
if( c == '\0' ) break;
h = ( h << 4 ) + c;
h = ( h ^ (h >> 12) ) & 0x0fff;
c = *s++;
if( c == '\0' ) break;
h = ( h << 4 ) + c;
h = h ^ (h >> 12);
}
}
}
return( h );
}
int CalcHash( char *id, int len )
{
unsigned hash;
hash = len + TokValue[ id[ FIRST_INDEX ] - ' ' ] * FIRST_SCALE;
#if LAST_INDEX > 0
if( len >= LAST_INDEX+1 ) {
hash += TokValue[ id[len-(LAST_INDEX+1)] - ' ' ] * LAST_SCALE;
}
#else
hash += TokValue[ id[len-(LAST_INDEX+1)] - ' ' ] * LAST_SCALE;
#endif
hash &= KEYWORD_HASH_MASK;
#ifdef KEYWORD_HASH_EXTRA
if( hash >= KEYWORD_HASH ) {
hash -= KEYWORD_HASH;
}
#endif
KwHashValue = hash;
hash = hashpjw( id );
HashValue = hash % SYM_HASH_SIZE;
#if ( MACRO_HASH_SIZE > 0x0ff0 ) && ( MACRO_HASH_SIZE < 0x0fff )
hash &= 0x0fff;
if( hash >= MACRO_HASH_SIZE ) {
hash -= MACRO_HASH_SIZE;
}
#else
hash = hash % MACRO_HASH_SIZE;
#endif
MacHashValue = hash;
return( HashValue );
}
int KwLookup( const char *buf )
{
int hash;
char *keyword;
/* lookup id in keyword table */
hash = KwHashValue + FIRST_KEYWORD;
if( hash == T_INLINE && !CompFlags.extensions_enabled && !CompFlags.c99_extensions )
hash = T_ID;
if( !CompFlags.c99_extensions ) {
switch( hash ) {
case T_RESTRICT:
case T__COMPLEX:
case T__IMAGINARY:
case T__BOOL:
case T___OW_IMAGINARY_UNIT:
hash = T_ID;
}
}
keyword = Tokens[ hash ];
if( *keyword == buf[0] ) {
if( strcmp( keyword, buf ) == 0 ) return( hash );
}
/* not in keyword table, so must be just an identifier */
return( T_ID );
}
int IdLookup( const char *buf )
{
MEPTR mentry;
mentry = MacroLookup( buf );
if( mentry != NULL ) { /* if this is a macro */
NextMacro = mentry; /* save pointer to it */
return( T_MACRO );
}
return( KwLookup( buf ) );
}
int doScanName()
{
int token;
union {
int c;
unsigned char uc;
} u;
char *scanptr;
char *p;
u.c = CurrChar;
// we know that NextChar will be pointing to GetNextChar()
// so it is safe to inline the function here.
// NextChar could also be pointing to ReScanBuffer().
p = &Buffer[TokenLen - 1];
for(;;) {
scanptr = ScanCharPtr;
for(;;) {
if( (CharSet[u.c] & (C_AL | C_DI)) == 0 ) break;
*p++ = u.uc;
u.uc = *scanptr++;
if( (CharSet[u.c] & (C_AL | C_DI)) == 0 ) break;
*p++ = u.uc;
u.uc = *scanptr++;
if( (CharSet[u.c] & (C_AL | C_DI)) == 0 ) break;
*p++ = u.uc;
u.uc = *scanptr++;
if( (CharSet[u.c] & (C_AL | C_DI)) == 0 ) break;
*p++ = u.uc;
u.uc = *scanptr++;
if( (CharSet[u.c] & (C_AL | C_DI)) == 0 ) break;
*p++ = u.uc;
u.uc = *scanptr++;
if( (CharSet[u.c] & (C_AL | C_DI)) == 0 ) break;
*p++ = u.uc;
u.uc = *scanptr++;
if( (CharSet[u.c] & (C_AL | C_DI)) == 0 ) break;
*p++ = u.uc;
u.uc = *scanptr++;
if( (CharSet[u.c] & (C_AL | C_DI)) == 0 ) break;
*p++ = u.uc;
u.uc = *scanptr++;
if( p >= &Buffer[BufSize - 16] ) {
char *oldbuf = Buffer;
EnlargeBuffer( BufSize * 2 );
p += Buffer - oldbuf;
}
}
ScanCharPtr = scanptr;
if( (CharSet[u.c] & C_EX) == 0 ) break;
u.c = GetCharCheck( u.c );
if( (CharSet[u.c] & (C_AL | C_DI)) == 0 ) break;
}
CurrChar = u.c;
if( p >= &Buffer[BufSize - 18] ) {
char *oldbuf = Buffer;
EnlargeBuffer( BufSize * 2 );
p += Buffer - oldbuf;
}
*p = '\0';
TokenLen = p - Buffer;
CalcHash( Buffer, TokenLen );
if( CompFlags.doing_macro_expansion ) return( T_ID );
if( CompFlags.pre_processing == 2 ) return( T_ID );
token = IdLookup( Buffer );
if( token == T_MACRO ) {
if( CompFlags.cpp_output ) {
PrtChar( ' ' ); /* put white space in front */
}
if( NextMacro->macro_defn == 0 ) {
return( SpecialMacro( NextMacro ) );
}
NextMacro->macro_flags |= MACRO_REFERENCED; /* 04-apr-94 */
/* if macro requires parameters and next char is not a '('
then this is not a macro */
if( NextMacro->parm_count != 0 ) {
SkipAhead();
if( CurrChar != '(' ) {
if( CompFlags.cpp_output ) { /* 30-sep-90 */
Buffer[TokenLen++] = ' ';
Buffer[TokenLen] = '\0';
return( T_ID );
}
return( KwLookup( Buffer ) );
}
}
DoMacroExpansion(); /* start macro expansion */
GetMacroToken();
token = CurToken;
#if 0
if( MacroPtr != NULL ) {
SavedCurrChar = CurrChar;
CurrChar = MACRO_CHAR;
}
#endif
if( token == T_NULL ) token = T_WHITE_SPACE;
}
return( token );
}
int ScanName()
{
Buffer[0] = CurrChar;
TokenLen = 1;
return( doScanName() );
}
int ScanWide() // scan something that starts with L
{
int c;
int token;
Buffer[0] = 'L';
c = NextChar();
Buffer[1] = c;
TokenLen = 2;
if( c == '"' ) { // L"abc"
token = ScanString();
CompFlags.wide_char_string = 1;
} else if( c == '\'' ) { // L'a'
token = CharConst( TYPE_WCHAR );
} else { // regular identifier
token = doScanName();
}
return( token );
}
int ScanDotSomething( int c )
{
if( c == '.' ) {
c = SaveNextChar();
if( c == '.' ) {
NextChar();
return( T_DOT_DOT_DOT );
}
CurrChar = '.';
UnGetChar( c );
}
Buffer[1] = '\0';
TokenLen = 1;
return( T_DOT );
}
int doScanFloat()
{
int c;
BadTokenInfo = 0;
c = CurrChar;
if( c == '.' ) {
while( (c = SaveNextChar()) >= '0' && c <= '9' ) ;
if( TokenLen == 2 ) { /* .? */
return ScanDotSomething( c );
}
}
CurToken = T_CONSTANT;
if( c == 'e' || c == 'E' ) {
c = SaveNextChar();
if( c == '+' || c == '-' ) {
c = SaveNextChar();
}
if( c < '0' || c > '9' ) {
CurToken = T_BAD_TOKEN;
BadTokenInfo = ERR_INVALID_FLOATING_POINT_CONSTANT;
}
while( c >= '0' && c <= '9' ) {
c = SaveNextChar();
}
}
if( c == 'f' || c == 'F' ) {
NextChar();
ConstType = TYPE_FLOAT;
} else if( c == 'l' || c == 'L' ) {
NextChar();
if( CompFlags.use_long_double ) {
ConstType = TYPE_LONG_DOUBLE;
} else {
ConstType = TYPE_DOUBLE;
}
} else {
--TokenLen;
ConstType = TYPE_DOUBLE;
}
Buffer[TokenLen] = '\0';
return( CurToken );
}
static void doScanAsmToken( void )
{
char *scanptr;
char *p;
int c;
p = &Buffer[TokenLen];
c = NextChar();
for(;;) {
scanptr = ScanCharPtr;
for(;;) {
if( (CharSet[c] & (C_AL | C_DI)) == 0 ) break;
*p++ = c;
c = *scanptr++;
if( (CharSet[c] & (C_AL | C_DI)) == 0 ) break;
*p++ = c;
c = *scanptr++;
if( (CharSet[c] & (C_AL | C_DI)) == 0 ) break;
*p++ = c;
c = *scanptr++;
if( (CharSet[c] & (C_AL | C_DI)) == 0 ) break;
*p++ = c;
c = *scanptr++;
if( (CharSet[c] & (C_AL | C_DI)) == 0 ) break;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -