📄 getsym.c

📁 一款拥有一定历史的C语言编译器
💻 C
📖 第 1 页 / 共 4 页
字号:
12 3 4 下一页
/*
 * C compiler
 * ==========
 *
 * Copyright 1989, 1990, 1991 Christoph van Wuellen.
 * Credits to Matthew Brandt.
 * All commercial rights reserved.
 *
 * This compiler may be redistributed as long there is no
 * commercial interest. The compiler must not be redistributed
 * without its full sources. This notice must stay intact.
 *
 * History:
 *
 * 1989   starting an 68000 C compiler, starting with material
 *		  originally by M. Brandt
 * 1990   68000 C compiler further bug fixes
 *		  started i386 port (December)
 * 1991   i386 port finished (January)
 *		  further corrections in the front end and in the 68000
 *		  code generator.
 *		  The next port will be a SPARC port
 */

/*****************************************************************************/

#include "chdr.h"
#include "expr.h"
#include "cglbdec.h"
#include "proto.h"

/********************************************************* Macro Definitions */

/* macros for querying character type */
#define is_digit(c) (chtable[(c)] & A_DIGIT)
#define is_idch(c)	(chtable[(c)] & A_IDCHAR)
#define is_octal(c) (chtable[(c)] & A_OCTAL)
#define is_hex(c)	(chtable[(c)] & A_HEX)
#define is_space(c) (chtable[(c)] & A_SPACE)
#define is_print(c) (chtable[(c)] & A_PRINT)

#define END_OF_BUFFER	((CHAR)'\0')

#define END_STRING	9999	/* end of string encountered */
#define MAXKEY	((HASH) 631)	/* hash table size */
#define BUFLEN		512	/* length of input buffer */
#define ID_LENGTH	127	/* usual maximum length of identifier */
#define NIL_WORD	((XWORD *)0)

/********************************************************** Type Definitions */

/* Attributes of input characters */

typedef unsigned char Attribute;

#define A_DIGIT  ((Attribute) 1)	/* character is a decimal digit */
#define A_IDCHAR ((Attribute) 2)	/* character is identifier character */
#define A_OCTAL  ((Attribute) 4)	/* character is an octal digit */
#define A_HEX	 ((Attribute) 8)	/* character is a hex digit */
#define A_SPACE  ((Attribute) 16)	/* character is a space character */
#define A_PRINT  ((Attribute) 32)	/* character is printable */

typedef struct _word
{
    const CHAR *spelling;	/* characters of the string */
    SIZE    length;		/* length of the string */
    TOKEN   sy;			/* string type */
    struct _word *next;		/* next string in the hash chain */
} XWORD  , *WPTR;
typedef unsigned HASH;		/* value generated by hash function */

/********************************************************** Static Variables */

static BOOL overflow;		/* indicator for constant overflow */
static WPTR hashtable[MAXKEY];	/* hash table */

/*
 *	 The input buffer has several pointers which control its operation; the
 *	 objective is to try to make it as efficient as possible to process
 *	 individual characters.  The primary way that this is done is to
 *	 reduce the number of times that it is necessary to check to see if
 *	 the end of the currently read input has been reached.
 *
 *	 bufstart		 This points to the start of the input buffer.	If the
 *					 buffer is enlarged then this value could change.
 *
 *	 bufend 		 This points to the byte BEYOND the end of the input
 *					 buffer.  It is used to mark the physical end of the
 *					 input buffer - the end of the characters read almost
 *					 certainly occurs before this point.
 *
 *	 buflimit		 This points to the byte beyond the current point
 *					 where input has been read into the input buffer.  This
 *					 always points to a \0 byte.
 *
 *	 symstart		 This points to the start of the current symbol.
 *
 *	 bufcur 		 The current position in the input buffer.
 *
 *			 symstart   bufcur
 *			 |	    |
 *			 |	    |
 *			\|/ 	   \|/
 *	  +----------------------------------+
 *	  |                              \0  |
 *	  +----------------------------------+
 *	 /|\		  /|\		 /|\		/|\
 *	  | 		   |		  | 		 |
 *	  | 		   |		  | 		 |
 *	  bufstart	   bufmove	  buflimit	 bufend
 */

static CHAR *bufstart = NIL_CHAR;	/* start of the input buffer */
static CHAR *bufmove = NIL_CHAR;	/* mid point of the input buffer */
static CHAR *bufend = NIL_CHAR;	/* end of the input buffer */
static CHAR *buflimit = NIL_CHAR;	/* input buffer filled to this point */
static SIZE buflen = (SIZE) 0;	/* length of the input buffer */
static CHAR *symstart = NIL_CHAR;	/* start of the current symbol */
static CHAR *bufcur = NIL_CHAR;	/* current character in the buffer */
static int end_of_file = FALSE;	/* flag to indicate end of file */
static CHAR *strstart = NIL_CHAR;	/* buffer for strings */
static SIZE strbuflen = (SIZE) 0;	/* length of the string buffer */

static Attribute table[257];	/* table to implement ctype(3) funcs */
static Attribute *chtable = &table[1];	/* efficient access to table */

static const CHAR *pp_pragma;	/* pre-processor keyword pragma */
static const CHAR *pp_ident;	/* pre-processor keyword ident */
static const CHAR *pp_file;	/* pre-processor keyword file */
static const CHAR *pp_line;	/* pre-processor keyword line */

/*********************************************** Static Function Definitions */

static const CHAR *found P_ ((const CHAR *, SIZE, BOOL));
static const CHAR *insert P_ ((const CHAR *, SIZE, TOKEN, unsigned));
static HASH hash P_ ((const CHAR *, SIZE));
static int getsch P_ ((BOOL));
static unsigned radix36 P_ ((CHAR));
static void getdecimal P_ ((void));
static void getexp P_ ((void));
static void getfrac P_ ((void));
static void gethex P_ ((void));
static void gethexexp P_ ((void));
static void gethexfrac P_ ((void));
static void getid P_ ((void));
static void getnum P_ ((void));
static void getoctal P_ ((void));
static void nextch P_ ((void));
static void nextline P_ ((void));
static void preprocessor_directive P_ ((void));
static void skip_space P_ ((void));
static void test_int P_ ((unsigned));

/*****************************************************************************/

/*
 *	setattributes() - defines attributes for specified characters
 */
static void setattributes P2 (const CHAR *, str, Attribute, attr)
{
    for (; *str; str++) {
	chtable[(int) *str] = (Attribute) (chtable[(int) *str] | attr);
    }
}


/*
 *	hash() - calculate the key for the hash function
 */
static HASH hash P2 (const CHAR *, p, SIZE, length)
{
    register HASH sum;

    for (sum = (HASH) 0; length; length--)
	sum += (sum << 5) + (HASH) (*p++);
    return (sum % MAXKEY);
}


/*
 *	insert() - insert a word into the hash table
 */
static const CHAR *insert P4 (const CHAR *, p, SIZE, length, TOKEN, sy,
			      HASH, keyno)
{
    WPTR    wp;
    CHAR   *s;
    const CHAR *end;

    wp = (WPTR) galloc (sizeof (XWORD) + (size_t) length + (size_t) 1);
    wp->next = hashtable[keyno];
    hashtable[keyno] = wp;
    wp->sy = sy;
    wp->length = length;
    s = (CHAR *) wp + sizeof (XWORD);
    wp->spelling = (const CHAR *) s;
    for (end = p + length; p < end;) {
	*s++ = *p++;
    }
    *s = (CHAR) '\0';

    return wp->spelling;
}


/*
 *	quick_insert() - insert word into hash table.
 *
 *	Insert a word into the hash table ... but there is no need to
 *	put the symbol spelling into the spelling table.
 */
static const CHAR *quick_insert P3 (const CHAR *, p, SIZE, length, TOKEN, sy)
{
    HASH    keyno = hash (p, length);
    WPTR    wp = (WPTR) galloc (sizeof (XWORD));

    wp->next = hashtable[keyno];
    hashtable[keyno] = wp;
    wp->sy = sy;
    wp->length = length;
    wp->spelling = p;
    return wp->spelling;
}

/*
 *	found() - lookup a keyword in the string table.
 *
 *	Check to see if the name is a keyword.
 *	If it is a keyword then set lastst to the token.
 *
 *	A pointer to the string in the spelling table for the given name is
 *	returned.  This means that the same name always points to the same
 *	place in the spelling table - thus to compare names in the rest of the
 *	compiler it is only necessary to compare pointers!
 */
static const CHAR *found P3 (const CHAR *, p, SIZE, length, BOOL,
			     is_identifier)
{
    register WPTR wp;
    HASH    keyno = hash (p, length);

    for (wp = hashtable[keyno]; wp; wp = wp->next) {
	register SIZE len = wp->length;

	if (length == len) {
	    register const CHAR *s1 = p;
	    register const CHAR *s2 = wp->spelling;

	    while (*s1++ == *s2++) {
		len--;
		if (len == (SIZE) 0) {
		    lastst = wp->sy;
#ifdef FACIST
#ifndef SYNTAX_CORRECT
		    if (is_identifier && lastst == kw_id) {
			message (WARN_KEYWORD, wp->spelling);
			lastst = tk_id;
		    }
#endif /* SYNTAX_CORRECT */
#endif /* FACIST */
		    return (wp->spelling);
		}
	    }
	}
    }
    return insert (p, length, lastst = tk_id, keyno);
}

/*
 *	nextline() - read next buffer from source file
 *
 *	An end-of-buffer character has been encountered in the input buffer.
 *	This could either be a genuine end-of-buffer character or else just
 *	a zero character in the current input buffer.
 *
 *	A dummy zero character is always placed one character beyond
 *	the characters read into the input buffer.	This allows the
 *	check for the end of the input buffer be performed only when
 *	this character occurs and not after every character.
 */
static void nextline P0 (void)
{
    if (bufcur >= buflimit) {
	/*
	 *       End of input currently read encountered
	 */
	SIZE    len;

	if (symstart >= bufmove) {
	    /*
	     *      The start of the symbol occurs near the end of the input
	     *      buffer.   Move the symbol to the start of the input
	     *      buffer.
	     */
	    CHAR   *p1, *p2;

	    for (p1 = bufstart, p2 = symstart; p2 <= bufcur;) {
		*p1++ = *p2++;
	    }
	    symstart = bufstart;
	    bufcur = p1 - 1;
	}
	/*
	 *      The start of the symbol is near the start of the input
	 *      buffer.   The symbol is therefore potentially a long
	 *      symbol so extend the size of the buffer.
	 */
	if (((int) (bufend - bufcur)) < BUFLEN) {
	    SIZE    len2 = (SIZE) (symstart - bufstart);

	    len = (SIZE) (bufcur - bufstart) - (SIZE) 1;
	    buflen += (SIZE) BUFLEN;
	    bufstart =
		(CHAR *) realloc (bufstart, (size_t) (buflen + (SIZE) 1));
	    bufcur = bufstart + len;
	    symstart = bufstart + len2;
	}
	len =
	    (SIZE) fread ((void *) bufcur, (size_t) 1, (size_t) BUFLEN,
			  input);
	if (len == (SIZE) 0) {
	    end_of_file = TRUE;
	    *bufcur = END_OF_BUFFER;
	    return;
	}
	bufend = bufstart + buflen;
	bufmove = bufstart + ID_LENGTH;
	buflimit = bufcur + len;
	*buflimit = END_OF_BUFFER;
    }
}


/*
 *	 initsym() - Initialize the scanner tables.
 */
void initsym P0 (void)
{
    static XWORD keywords[] = {
	{(const CHAR *) "auto", (SIZE) 4, kw_auto, NIL_WORD},
	{(const CHAR *) "break", (SIZE) 5, kw_break, NIL_WORD},
	{(const CHAR *) "case", (SIZE) 4, kw_case, NIL_WORD},
	{(const CHAR *) "char", (SIZE) 4, kw_char, NIL_WORD},
	{(const CHAR *) "continue", (SIZE) 8, kw_continue, NIL_WORD},
	{(const CHAR *) "default", (SIZE) 7, kw_default, NIL_WORD},
	{(const CHAR *) "do", (SIZE) 2, kw_do, NIL_WORD},
	{(const CHAR *) "else", (SIZE) 4, kw_else, NIL_WORD},
	{(const CHAR *) "enum", (SIZE) 4, kw_enum, NIL_WORD},
	{(const CHAR *) "extern", (SIZE) 6, kw_extern, NIL_WORD},
	{(const CHAR *) "for", (SIZE) 3, kw_for, NIL_WORD},
	{(const CHAR *) "goto", (SIZE) 4, kw_goto, NIL_WORD},
	{(const CHAR *) "if", (SIZE) 2, kw_if, NIL_WORD},
	{(const CHAR *) "int", (SIZE) 3, kw_int, NIL_WORD},
	{(const CHAR *) "long", (SIZE) 4, kw_long, NIL_WORD},
	{(const CHAR *) "register", (SIZE) 8, kw_register, NIL_WORD},
	{(const CHAR *) "return", (SIZE) 6, kw_return, NIL_WORD},
	{(const CHAR *) "short", (SIZE) 5, kw_short, NIL_WORD},
	{(const CHAR *) "sizeof", (SIZE) 6, kw_sizeof, NIL_WORD},
	{(const CHAR *) "static", (SIZE) 6, kw_static, NIL_WORD},
	{(const CHAR *) "struct", (SIZE) 6, kw_struct, NIL_WORD},
	{(const CHAR *) "switch", (SIZE) 6, kw_switch, NIL_WORD},
	{(const CHAR *) "typedef", (SIZE) 7, kw_typedef, NIL_WORD},
	{(const CHAR *) "union", (SIZE) 5, kw_union, NIL_WORD},
	{(const CHAR *) "unsigned", (SIZE) 8, kw_unsigned, NIL_WORD},
	{(const CHAR *) "void", (SIZE) 4, kw_void, NIL_WORD},
	{(const CHAR *) "while", (SIZE) 5, kw_while, NIL_WORD},
#ifdef FLOAT_KEYWORDS
	{(const CHAR *) "double", (SIZE) 6, kw_double, NIL_WORD},
	{(const CHAR *) "float", (SIZE) 5, kw_float, NIL_WORD},
#endif /* FLOAT_KEYWORDS */
#ifdef TYPEOF
	{(const CHAR *) "__typeof__", (SIZE) 10, kw_typeof, NIL_WORD},
#endif /* TYPEOF */
#ifdef FACIST
	/* C++ keywords and operators */
	{(const CHAR *) "bool", (SIZE) 4, kw_id, NIL_WORD},
	{(const CHAR *) "catch", (SIZE) 5, kw_id, NIL_WORD},
	{(const CHAR *) "class", (SIZE) 5, kw_id, NIL_WORD},
	{(const CHAR *) "const_cast", (SIZE) 10, kw_id, NIL_WORD},
	{(const CHAR *) "delete", (SIZE) 6, kw_id, NIL_WORD},
	{(const CHAR *) "dynamic_cast", (SIZE) 12, kw_id, NIL_WORD},
	{(const CHAR *) "false", (SIZE) 5, kw_id, NIL_WORD},
	{(const CHAR *) "friend", (SIZE) 6, kw_id, NIL_WORD},
	{(const CHAR *) "inline", (SIZE) 6, kw_id, NIL_WORD},
	{(const CHAR *) "mutable", (SIZE) 7, kw_id, NIL_WORD},
	{(const CHAR *) "namespace", (SIZE) 9, kw_id, NIL_WORD},
	{(const CHAR *) "new", (SIZE) 3, kw_id, NIL_WORD},
	{(const CHAR *) "operator", (SIZE) 8, kw_id, NIL_WORD},
	{(const CHAR *) "private", (SIZE) 7, kw_id, NIL_WORD},
	{(const CHAR *) "public", (SIZE) 6, kw_id, NIL_WORD},
	{(const CHAR *) "reinterpret_cast", (SIZE) 16, kw_id, NIL_WORD},
	{(const CHAR *) "static_cast", (SIZE) 11, kw_id, NIL_WORD},
	{(const CHAR *) "template", (SIZE) 8, kw_id, NIL_WORD},
	{(const CHAR *) "this", (SIZE) 4, kw_id, NIL_WORD},
	{(const CHAR *) "throw", (SIZE) 5, kw_id, NIL_WORD},
	{(const CHAR *) "true", (SIZE) 4, kw_id, NIL_WORD},
	{(const CHAR *) "try", (SIZE) 3, kw_id, NIL_WORD},
	{(const CHAR *) "typeid", (SIZE) 6, kw_id, NIL_WORD},
	{(const CHAR *) "using", (SIZE) 5, kw_id, NIL_WORD},
	{(const CHAR *) "virtual", (SIZE) 7, kw_id, NIL_WORD},

	{(const CHAR *) "bitand", (SIZE) 6, kw_id, NIL_WORD},
	{(const CHAR *) "and", (SIZE) 3, kw_id, NIL_WORD},
	{(const CHAR *) "bitor", (SIZE) 5, kw_id, NIL_WORD},
	{(const CHAR *) "or", (SIZE) 2, kw_id, NIL_WORD},
	{(const CHAR *) "xor", (SIZE) 3, kw_id, NIL_WORD},
	{(const CHAR *) "compl", (SIZE) 5, kw_id, NIL_WORD},
	{(const CHAR *) "and_eq", (SIZE) 6, kw_id, NIL_WORD},
	{(const CHAR *) "or_eq", (SIZE) 5, kw_id, NIL_WORD},
	{(const CHAR *) "xor_eq", (SIZE) 6, kw_id, NIL_WORD},
	{(const CHAR *) "not", (SIZE) 3, kw_id, NIL_WORD},
	{(const CHAR *) "not_eq", (SIZE) 6, kw_id, NIL_WORD},
#endif /* FACIST */
	{NIL_CHAR, (SIZE) 0, (TOKEN) 0, NIL_WORD}
    };
    static XWORD c90_keywords[] = {
	{(const CHAR *) "const", (SIZE) 5, kw_const, NIL_WORD},
	{(const CHAR *) "signed", (SIZE) 6, kw_signed, NIL_WORD},
	{(const CHAR *) "volatile", (SIZE) 8, kw_volatile, NIL_WORD},
	{NIL_CHAR, (SIZE) 0, (TOKEN) 0, NIL_WORD}
    };

    static XWORD c99_keywords[] = {
	{(const CHAR *) "_Bool", (SIZE) 5, kw_bool, NIL_WORD},
	{(const CHAR *) "_Complex", (SIZE) 8, kw_complex, NIL_WORD},
	{(const CHAR *) "_Imaginary", (SIZE) 10, kw_imaginary, NIL_WORD},
	{(const CHAR *) "_Pragma", (SIZE) 7, kw_pragma, NIL_WORD},
	{(const CHAR *) "inline", (SIZE) 6, kw_inline, NIL_WORD},
	{(const CHAR *) "restrict", (SIZE) 8, kw_restrict, NIL_WORD},
	{NIL_CHAR, (SIZE) 0, (TOKEN) 0, NIL_WORD}
    };

    WPTR    wp;
    HASH    keyno;

    for (wp = &keywords[0]; wp->spelling != NIL_CHAR; wp++) {
	keyno = hash (wp->spelling, wp->length);
	wp->next = hashtable[keyno];
	hashtable[keyno] = wp;
    }
    if (lang_option >= LANG_C90) {
	for (wp = &c90_keywords[0]; wp->spelling != NIL_CHAR; wp++) {
	    keyno = hash (wp->spelling, wp->length);
	    wp->next = hashtable[keyno];
	    hashtable[keyno] = wp;
	}
    }
    if (lang_option >= LANG_C99) {
	for (wp = &c99_keywords[0]; wp->spelling != NIL_CHAR; wp++) {
	    keyno = hash (wp->spelling, wp->length);
	    wp->next = hashtable[keyno];
	    hashtable[keyno] = wp;
	}
    }
#ifdef ASM
    if (asm_option) {
	VOIDCAST quick_insert ((const CHAR *) "asm", (SIZE) 3, kw_asm);
    }
#ifdef FACIST
    else {
	VOIDCAST quick_insert ((const CHAR *) "asm", (SIZE) 3, kw_id);
    }
#endif /* FACIST */
#endif /* ASM */

    /*
     *      Special names recognised by lower parts of the compiler in
     *      order to do special actions.
     */
    alloca_name = quick_insert ((const CHAR *) "alloca", (SIZE) 6, tk_id);
    printf_name = quick_insert ((const CHAR *) "printf", (SIZE) 6, tk_id);
    fprintf_name = quick_insert ((const CHAR *) "fprintf", (SIZE) 7, tk_id);
    sprintf_name = quick_insert ((const CHAR *) "sprintf", (SIZE) 7, tk_id);
    scanf_name = quick_insert ((const CHAR *) "scanf", (SIZE) 5, tk_id);
    fscanf_name = quick_insert ((const CHAR *) "fscanf", (SIZE) 6, tk_id);
    sscanf_name = quick_insert ((const CHAR *) "sscanf", (SIZE) 6, tk_id);
    func_name = quick_insert ((const CHAR *) "__func__", (SIZE) 8, tk_id);
    main_name = quick_insert ((const CHAR *) "main", (SIZE) 4, tk_id);

    /*
     *      Names used for pre-processor directives
     */
    pp_pragma = quick_insert ((const CHAR *) "pragma", (SIZE) 6, tk_id);
    pp_ident = quick_insert ((const CHAR *) "ident", (SIZE) 5, tk_id);
    pp_line = quick_insert ((const CHAR *) "line", (SIZE) 4, tk_id);
    pp_file = quick_insert ((const CHAR *) "file", (SIZE) 4, tk_id);

#ifdef TOPSPEED
    /*
     *      TopSpeed keywords
     */
    if (topspeed_option) {
	VOIDCAST quick_insert ((const CHAR *) "cdecl", (SIZE) 5, kw_cdecl);
    }
#endif /* TOPSPEED */

    /* set up attributes */
    setattributes ((const CHAR *) "01234567",
		   (Attribute) (A_PRINT | A_IDCHAR | A_HEX | A_DIGIT |
				A_OCTAL));
    setattributes ((const CHAR *) "89",
		   (Attribute) (A_PRINT | A_IDCHAR | A_HEX | A_DIGIT));
    setattributes ((const CHAR *) "ABCDEFabcdef",
		   (Attribute) (A_PRINT | A_IDCHAR | A_HEX));
    setattributes ((const CHAR *) "GHIJKLMNOPQRSTUVWXYZghijklmnopqrstuvwxyz_",
		   (Attribute) (A_PRINT | A_IDCHAR));
    setattributes ((const CHAR *) " \f\n\r\t\v", A_SPACE);

    total_errors = 0;

    /*
     *      Initialize the input buffer
     */
    buflen = (SIZE) (BUFLEN + ID_LENGTH);
    bufstart = (CHAR *) malloc ((size_t) (buflen + (SIZE) 1));
    bufcur = bufstart;		/* at start of input buffer */
    bufend = bufstart + buflen;	/* pointer beyond end of input buffer */
    bufmove = bufstart;		/* buffer won't be extended */

    bufstart[0] = (CHAR) '\n';
    bufstart[1] = END_OF_BUFFER;
    buflimit = bufstart + 1;

    strbuflen = (SIZE) BUFLEN;
    strstart = (CHAR *) malloc ((size_t) strbuflen);
}

/*
 *	endsym() - deletes the scanner tables
 */
void endsym P0 (void)
{
    if (strstart)
	free (strstart);
    strstart = NULL;
    if (bufstart)
	free (bufstart);
    bufstart = NULL;
}

/*
 *	new_line() - this routine processes newline characters.
 */
static void new_line P0 (void)
{
    *bufcur = (CHAR) '\0';
    nextch ();
    ++act_line;
    if (*bufcur == (CHAR) '#') {
	preprocessor_directive ();
    } else {
	act_linetxt = bufcur;
    }
}
12 3 4 下一页
💿 文件大小 463 K
👤 上传用户 zxk756921815
📂 所属分类编译器/解释器
🏷️ 相关标签

#C语言 #编译器
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -