lexxer.c

来自「开放源码的编译器open watcom 1.6.0版的源代码」· C语言 代码 · 共 410 行

C
410
字号
/****************************************************************************
*
*                            Open Watcom Project
*
*    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
*
*  ========================================================================
*
*    This file contains Original Code and/or Modifications of Original
*    Code as defined in and that are subject to the Sybase Open Watcom
*    Public License version 1.0 (the 'License'). You may not use this file
*    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
*    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
*    provided with the Original Code and Modifications, and is also
*    available at www.sybase.com/developer/opensource.
*
*    The Original Code and all software distributed under the License are
*    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
*    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
*    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
*    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
*    NON-INFRINGEMENT. Please see the License for the specific language
*    governing rights and limitations under the License.
*
*  ========================================================================
*
* Description:  WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE
*               DESCRIBE IT HERE!
*
****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>
#include "global.h"
#include "mem.h"
#include "error.h"
#include "keywords.h"
#include "hashtab.h"
#include "types.h"
#include "sruinter.h"
#include "ytab.gh"
#include "filelist.h"

extern void SetComment(void);

extern YYSTYPE yylval;

FILE    *yyin;

enum {
    CH_RET,     /* must be 0, single character return   */
    CH_ID,      /* identifier state                     */
    CH_INT,     /* integer state                        */
    CH_LIT,     /* literal string state                 */
    CH_COM,     /* comment state                        */
    CH_WS       /* white space                          */
};

/* cell definition for the fsm */
typedef struct fsm_cell {
    char        new_state;      /* new state on the transition  */
    char        class;          /* new class after transition   */
} fsm_cell;

/* commonly use constants */
#define LINE_INC                ( 8 * 1024 )
#define MAX_STATES              10
#define MAX_CHAR                256
#define CASE_SHIFT              'a' - 'A'

#define LS_INIT                 0
#define LS_NORMAL               1
#define LS_END_OF_FILE          -1

/* fsm table */
static fsm_cell fsmTable[MAX_STATES][MAX_CHAR];

/* macro for setting up the table */
#define _SetTable( state, ch, new, cls ) \
        (fsmTable[state][ch].new_state = new, fsmTable[state][ch].class = cls)

static int      lexStatus = LS_INIT;    /* initial lex status           */
static int      yyTextSize = 0;         /* size of current token buffer */
static char     *yyText = NULL;         /* token buffer                 */
static char     *yyLine = NULL;         /* Current line buffer          */
static int      yyLineCnt = 0;          /* Current line size            */
static char     idBuffer[LINE_INC];     /* delayed token destroy buffer */
static int      idPtr;                  /* pointer to current token     */
static hash_tab keyTable;               /* hash table of keywords       */
static BOOL     yyLineFini;             /* have we finished a line      */

static void     destroyLex( void );
static void     initLex( void );
static void     putBackChar( int c );
static int      nextChar( void );
static int      checkKeyWord( long hash, const char *id, int id_len, int *kval);
static char     *add2IdBuffer( char *src );

/*
 * main lexxer routine
 */
int yylex() {
/***********/

    int         c;
    int         state = 0;
    int         class = CH_RET;
    int         new_state;
    int         new_class;
    int         ccnt = 0;
    int         hash = 0;

    yyLineFini = FALSE;

    /* do initilization and eof checking */
    if( lexStatus == LS_INIT ) {
        initLex();
        lexStatus = LS_NORMAL;
    } else if( lexStatus == LS_END_OF_FILE ) {
        return( 0 );
    };

    /* Run the fsm for next token */
    for( ;; ) {
        c = nextChar();
        if( c == EOF ) {
            lexStatus = LS_END_OF_FILE;
            return( FI_EOF );
        }

        new_state = fsmTable[state][c].new_state;
        new_class = fsmTable[state][c].class;
        if( ( new_state == 0 ) && ( new_class == CH_RET ) ) {
            break;
        }

        hash = HashByte( hash, tolower( c ) );
        yyText[ ccnt++ ] = tolower( c );  // power builder is case insensitive
        if( ccnt > ( yyTextSize - 16 ) ) { // 16 byte safety zone
            yyTextSize += LINE_INC;
            yyText = MemRealloc( yyText, yyTextSize );
        }
        class = new_class;
        state = new_state;
    }

    /* process token */
    yyLine[ yyLineCnt ] = 0;
    yyText[ ccnt ] = 0;
    switch( class ) {
    case( CH_INT ):                             /* integer */
        putBackChar( c );
        yylval.o_value = atoi( yyText );
        return( CT_INTEGER );
    case( CH_ID ):                              /* identifier or keyword */
        putBackChar( c );
        switch( checkKeyWord( hash, yyText, ccnt, &c ) ) {
        case( ID_IDENTIFIER ):
            yylval.o_string = add2IdBuffer( yyText );
            return( ID_IDENTIFIER );
        default:
            /* if its not an identifier, it is a keyword */
            yylval.o_type = c;
            return( c );
        }
    case( CH_LIT ):                             /* string literal */
        yyText[ ccnt++ ] = c;
        yyText[ ccnt ] = 0;
        yylval.o_string = add2IdBuffer( yyText );
        return( CT_STRING );
    case( CH_COM ):                             /* comment */
        SetComment();
        if( c == '\n' ) {
            FinishLine();
            yyLineFini = TRUE;
            return( 0 ); /* execute parse */
        }
        /* else c == '/' */
        return( yylex() );
    case( CH_WS ):                              /* white space */
        putBackChar( c );
        return( yylex() );
    default:                                    /* single character */
        if( ( c == '\n' ) || ( c == ';' ) ) {
            FinishLine();
            return( 0 );
        }
        return( c );
    }
}


char *GetParsedLine( void ) {
/***************************/

    return( yyLine );
}


BOOL LineFinished( void ) {
/*************************/

    return( yyLineFini );
}


void    FinishLine( void ) {
/**************************/

    /* if necessary terminate line */
    if( yyLineCnt ) {
        yyLine[ yyLineCnt ] = 0;
        yyLineCnt = 0;
    }
}


void    GetToEOS(void) {
/**********************/

    int         c;

    /* process to end of statement, used for error handling */

    do {
        c = nextChar();
    } while( ( c != ';' ) && ( c != '\n' ) && ( c != EOF ) );

    if( c != EOF ) {
        putBackChar( c );
        yyLineCnt++;
    } else {
        yyLineCnt--;
    }

    yyLine[ yyLineCnt ] = 0;
}


static void initLex( void ) {
/****************************/

    int         x = 0;
    int         xs = 0;

    memset( fsmTable, 0, sizeof( fsm_cell ) * MAX_STATES * MAX_CHAR );

    /* Set white space transitions */
    _SetTable( 0, ' ', 9, CH_WS );
    _SetTable( 0, '\t', 9, CH_WS );
    _SetTable( 0, '\r', 9, CH_WS );
    _SetTable( 9, ' ', 9, CH_WS );
    _SetTable( 9, '\t', 9, CH_WS );
    _SetTable( 9, '\r', 9, CH_WS );

    /* set integer transitions */
    for( x = '0'; x <= '9'; x++ ) {
        _SetTable( 0, x, 1, CH_INT );
        _SetTable( 1, x, 1, CH_INT );
        _SetTable( 2, x, 2, CH_ID );
    }

    /* set identifier transitions */
    for( xs = CASE_SHIFT + 'A' , x = 'A'; x <= 'Z'; x++, xs++ ) {
        _SetTable( 0, x, 2, CH_ID );
        _SetTable( 1, x, 2, CH_ID );
        _SetTable( 2, x, 2, CH_ID );

        _SetTable( 0, xs, 2, CH_ID );
        _SetTable( 1, xs, 2, CH_ID );
        _SetTable( 2, xs, 2, CH_ID );
    }
    _SetTable( 0, '$', 2, CH_ID );
    _SetTable( 0, '#', 2, CH_ID );
    _SetTable( 0, '_', 2, CH_ID );
    _SetTable( 0, '-', 2, CH_ID );
    _SetTable( 0, '%', 2, CH_ID );

    _SetTable( 1, '$', 2, CH_ID );
    _SetTable( 1, '#', 2, CH_ID );
    _SetTable( 1, '_', 2, CH_ID );
    _SetTable( 1, '-', 2, CH_ID );
    _SetTable( 1, '%', 2, CH_ID );

    _SetTable( 2, '$', 2, CH_ID );
    _SetTable( 2, '#', 2, CH_ID );
    _SetTable( 2, '_', 2, CH_ID );
    _SetTable( 2, '-', 2, CH_ID );
    _SetTable( 2, '%', 2, CH_ID );

    /* set comment transitions */
    _SetTable( 0, '/', 3, CH_RET );
    _SetTable( 3, '/', 4, CH_COM );
    _SetTable( 3, '*', 5, CH_COM );
    for( x = 0; x < MAX_CHAR; x++ ) {
        _SetTable( 4, x, 4, CH_COM );
        _SetTable( 5, x, 5, CH_COM );
        _SetTable( 6, x, 5, CH_COM );
    }
    _SetTable( 4, '\n', 0, CH_RET );
    _SetTable( 5, '*', 6, CH_COM );
    _SetTable( 6, '/', 0, CH_RET );

    /* literal transitions */
    _SetTable( 0, '"', 7, CH_LIT );
    for( x = 0; x < MAX_CHAR; x++ ) {
        _SetTable( 7, x, 7, CH_LIT );
        _SetTable( 8, x, 7, CH_LIT );
    }
    _SetTable( 7, '~', 8, CH_LIT );
    _SetTable( 7, '"', 0, CH_RET );

    /* load keywords into hash table */
    keyTable = NewHashTable( HASH_PRIME );
    x = 0;
    while( Statements[x].key != NULL ) {
        InsertHashValue( keyTable, Statements[x].key, strlen(Statements[x].key),
                         &Statements[x] );
        x++;
    }

    yyTextSize = LINE_INC;
    yyText = MemMalloc( LINE_INC );
    yyLine = MemMalloc( LINE_INC );
    idPtr = 0;
    idBuffer[ 0 ] = 0;
//    atexit( destroyLex );
}


static void     putBackChar( int c ) {
/***********************************/

    assert( yyin );

    ungetc( c, yyin );
    yyLineCnt--;
}


static int      nextChar( void ) {
/********************************/

    int         c;

    assert( yyin );

    errno = 0;
    c = fgetc( yyin );
    if( ( c == EOF ) && errno ) {
        WigCloseFile( yyin );
        Error( FILE_READ_ERR );
    }
    yyLine[ yyLineCnt++ ] = c;
    return( c );
}


static void     destroyLex( void ) {
/**********************************/

        MemFree( yyText );
        MemFree( yyLine );
        DestroyHashTable( keyTable );
}


static int      checkKeyWord( long hash, const char *id, int id_len,
                              int *kval ) {
/*****************************************/

    keyword             *tmp;

    /* check if identifier is a keyword */

    assert( id );
    assert( kval );

    tmp = FindHashEntry( keyTable, hash, id, id_len );
    if( !tmp ) {
        return( ID_IDENTIFIER );
    } else {
        *kval = tmp->id;
        return( tmp->id );
    }
}


static char *add2IdBuffer( char *src ) {
/**************************************/

    int         len;

    assert( src );

    /* add token to identifier buffer for delayed destruction */

    len = strlen( src ) + 1;
    if( idPtr + len >= LINE_INC ) {
        idPtr = 0;
    }
    strcpy( idBuffer + idPtr, src );
    idPtr += len;
    return( idBuffer + idPtr - len );
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?