📄 getsym.c
字号:
/*
* C compiler
* ==========
*
* Copyright 1989, 1990, 1991 Christoph van Wuellen.
* Credits to Matthew Brandt.
* All commercial rights reserved.
*
* This compiler may be redistributed as long there is no
* commercial interest. The compiler must not be redistributed
* without its full sources. This notice must stay intact.
*
* History:
*
* 1989 starting an 68000 C compiler, starting with material
* originally by M. Brandt
* 1990 68000 C compiler further bug fixes
* started i386 port (December)
* 1991 i386 port finished (January)
* further corrections in the front end and in the 68000
* code generator.
* The next port will be a SPARC port
*/
/*****************************************************************************/
#include "chdr.h"
#include "expr.h"
#include "cglbdec.h"
#include "proto.h"
/********************************************************* Macro Definitions */
/* macros for querying character type */
#define is_digit(c) (chtable[(c)] & A_DIGIT)
#define is_idch(c) (chtable[(c)] & A_IDCHAR)
#define is_octal(c) (chtable[(c)] & A_OCTAL)
#define is_hex(c) (chtable[(c)] & A_HEX)
#define is_space(c) (chtable[(c)] & A_SPACE)
#define is_print(c) (chtable[(c)] & A_PRINT)
#define END_OF_BUFFER ((CHAR)'\0')
#define END_STRING 9999 /* end of string encountered */
#define MAXKEY ((HASH) 631) /* hash table size */
#define BUFLEN 512 /* length of input buffer */
#define ID_LENGTH 127 /* usual maximum length of identifier */
#define NIL_WORD ((XWORD *)0)
/********************************************************** Type Definitions */
/* Attributes of input characters */
typedef unsigned char Attribute;
#define A_DIGIT ((Attribute) 1) /* character is a decimal digit */
#define A_IDCHAR ((Attribute) 2) /* character is identifier character */
#define A_OCTAL ((Attribute) 4) /* character is an octal digit */
#define A_HEX ((Attribute) 8) /* character is a hex digit */
#define A_SPACE ((Attribute) 16) /* character is a space character */
#define A_PRINT ((Attribute) 32) /* character is printable */
typedef struct _word
{
const CHAR *spelling; /* characters of the string */
SIZE length; /* length of the string */
TOKEN sy; /* string type */
struct _word *next; /* next string in the hash chain */
} XWORD , *WPTR;
typedef unsigned HASH; /* value generated by hash function */
/********************************************************** Static Variables */
static BOOL overflow; /* indicator for constant overflow */
static WPTR hashtable[MAXKEY]; /* hash table */
/*
* The input buffer has several pointers which control its operation; the
* objective is to try to make it as efficient as possible to process
* individual characters. The primary way that this is done is to
* reduce the number of times that it is necessary to check to see if
* the end of the currently read input has been reached.
*
* bufstart This points to the start of the input buffer. If the
* buffer is enlarged then this value could change.
*
* bufend This points to the byte BEYOND the end of the input
* buffer. It is used to mark the physical end of the
* input buffer - the end of the characters read almost
* certainly occurs before this point.
*
* buflimit This points to the byte beyond the current point
* where input has been read into the input buffer. This
* always points to a \0 byte.
*
* symstart This points to the start of the current symbol.
*
* bufcur The current position in the input buffer.
*
* symstart bufcur
* | |
* | |
* \|/ \|/
* +----------------------------------+
* | \0 |
* +----------------------------------+
* /|\ /|\ /|\ /|\
* | | | |
* | | | |
* bufstart bufmove buflimit bufend
*/
static CHAR *bufstart = NIL_CHAR; /* start of the input buffer */
static CHAR *bufmove = NIL_CHAR; /* mid point of the input buffer */
static CHAR *bufend = NIL_CHAR; /* end of the input buffer */
static CHAR *buflimit = NIL_CHAR; /* input buffer filled to this point */
static SIZE buflen = (SIZE) 0; /* length of the input buffer */
static CHAR *symstart = NIL_CHAR; /* start of the current symbol */
static CHAR *bufcur = NIL_CHAR; /* current character in the buffer */
static int end_of_file = FALSE; /* flag to indicate end of file */
static CHAR *strstart = NIL_CHAR; /* buffer for strings */
static SIZE strbuflen = (SIZE) 0; /* length of the string buffer */
static Attribute table[257]; /* table to implement ctype(3) funcs */
static Attribute *chtable = &table[1]; /* efficient access to table */
static const CHAR *pp_pragma; /* pre-processor keyword pragma */
static const CHAR *pp_ident; /* pre-processor keyword ident */
static const CHAR *pp_file; /* pre-processor keyword file */
static const CHAR *pp_line; /* pre-processor keyword line */
/*********************************************** Static Function Definitions */
static const CHAR *found P_ ((const CHAR *, SIZE, BOOL));
static const CHAR *insert P_ ((const CHAR *, SIZE, TOKEN, unsigned));
static HASH hash P_ ((const CHAR *, SIZE));
static int getsch P_ ((BOOL));
static unsigned radix36 P_ ((CHAR));
static void getdecimal P_ ((void));
static void getexp P_ ((void));
static void getfrac P_ ((void));
static void gethex P_ ((void));
static void gethexexp P_ ((void));
static void gethexfrac P_ ((void));
static void getid P_ ((void));
static void getnum P_ ((void));
static void getoctal P_ ((void));
static void nextch P_ ((void));
static void nextline P_ ((void));
static void preprocessor_directive P_ ((void));
static void skip_space P_ ((void));
static void test_int P_ ((unsigned));
/*****************************************************************************/
/*
* setattributes() - defines attributes for specified characters
*/
static void setattributes P2 (const CHAR *, str, Attribute, attr)
{
for (; *str; str++) {
chtable[(int) *str] = (Attribute) (chtable[(int) *str] | attr);
}
}
/*
* hash() - calculate the key for the hash function
*/
static HASH hash P2 (const CHAR *, p, SIZE, length)
{
register HASH sum;
for (sum = (HASH) 0; length; length--)
sum += (sum << 5) + (HASH) (*p++);
return (sum % MAXKEY);
}
/*
* insert() - insert a word into the hash table
*/
static const CHAR *insert P4 (const CHAR *, p, SIZE, length, TOKEN, sy,
HASH, keyno)
{
WPTR wp;
CHAR *s;
const CHAR *end;
wp = (WPTR) galloc (sizeof (XWORD) + (size_t) length + (size_t) 1);
wp->next = hashtable[keyno];
hashtable[keyno] = wp;
wp->sy = sy;
wp->length = length;
s = (CHAR *) wp + sizeof (XWORD);
wp->spelling = (const CHAR *) s;
for (end = p + length; p < end;) {
*s++ = *p++;
}
*s = (CHAR) '\0';
return wp->spelling;
}
/*
* quick_insert() - insert word into hash table.
*
* Insert a word into the hash table ... but there is no need to
* put the symbol spelling into the spelling table.
*/
static const CHAR *quick_insert P3 (const CHAR *, p, SIZE, length, TOKEN, sy)
{
HASH keyno = hash (p, length);
WPTR wp = (WPTR) galloc (sizeof (XWORD));
wp->next = hashtable[keyno];
hashtable[keyno] = wp;
wp->sy = sy;
wp->length = length;
wp->spelling = p;
return wp->spelling;
}
/*
* found() - lookup a keyword in the string table.
*
* Check to see if the name is a keyword.
* If it is a keyword then set lastst to the token.
*
* A pointer to the string in the spelling table for the given name is
* returned. This means that the same name always points to the same
* place in the spelling table - thus to compare names in the rest of the
* compiler it is only necessary to compare pointers!
*/
static const CHAR *found P3 (const CHAR *, p, SIZE, length, BOOL,
is_identifier)
{
register WPTR wp;
HASH keyno = hash (p, length);
for (wp = hashtable[keyno]; wp; wp = wp->next) {
register SIZE len = wp->length;
if (length == len) {
register const CHAR *s1 = p;
register const CHAR *s2 = wp->spelling;
while (*s1++ == *s2++) {
len--;
if (len == (SIZE) 0) {
lastst = wp->sy;
#ifdef FACIST
#ifndef SYNTAX_CORRECT
if (is_identifier && lastst == kw_id) {
message (WARN_KEYWORD, wp->spelling);
lastst = tk_id;
}
#endif /* SYNTAX_CORRECT */
#endif /* FACIST */
return (wp->spelling);
}
}
}
}
return insert (p, length, lastst = tk_id, keyno);
}
/*
* nextline() - read next buffer from source file
*
* An end-of-buffer character has been encountered in the input buffer.
* This could either be a genuine end-of-buffer character or else just
* a zero character in the current input buffer.
*
* A dummy zero character is always placed one character beyond
* the characters read into the input buffer. This allows the
* check for the end of the input buffer be performed only when
* this character occurs and not after every character.
*/
static void nextline P0 (void)
{
if (bufcur >= buflimit) {
/*
* End of input currently read encountered
*/
SIZE len;
if (symstart >= bufmove) {
/*
* The start of the symbol occurs near the end of the input
* buffer. Move the symbol to the start of the input
* buffer.
*/
CHAR *p1, *p2;
for (p1 = bufstart, p2 = symstart; p2 <= bufcur;) {
*p1++ = *p2++;
}
symstart = bufstart;
bufcur = p1 - 1;
}
/*
* The start of the symbol is near the start of the input
* buffer. The symbol is therefore potentially a long
* symbol so extend the size of the buffer.
*/
if (((int) (bufend - bufcur)) < BUFLEN) {
SIZE len2 = (SIZE) (symstart - bufstart);
len = (SIZE) (bufcur - bufstart) - (SIZE) 1;
buflen += (SIZE) BUFLEN;
bufstart =
(CHAR *) realloc (bufstart, (size_t) (buflen + (SIZE) 1));
bufcur = bufstart + len;
symstart = bufstart + len2;
}
len =
(SIZE) fread ((void *) bufcur, (size_t) 1, (size_t) BUFLEN,
input);
if (len == (SIZE) 0) {
end_of_file = TRUE;
*bufcur = END_OF_BUFFER;
return;
}
bufend = bufstart + buflen;
bufmove = bufstart + ID_LENGTH;
buflimit = bufcur + len;
*buflimit = END_OF_BUFFER;
}
}
/*
* initsym() - Initialize the scanner tables.
*/
void initsym P0 (void)
{
static XWORD keywords[] = {
{(const CHAR *) "auto", (SIZE) 4, kw_auto, NIL_WORD},
{(const CHAR *) "break", (SIZE) 5, kw_break, NIL_WORD},
{(const CHAR *) "case", (SIZE) 4, kw_case, NIL_WORD},
{(const CHAR *) "char", (SIZE) 4, kw_char, NIL_WORD},
{(const CHAR *) "continue", (SIZE) 8, kw_continue, NIL_WORD},
{(const CHAR *) "default", (SIZE) 7, kw_default, NIL_WORD},
{(const CHAR *) "do", (SIZE) 2, kw_do, NIL_WORD},
{(const CHAR *) "else", (SIZE) 4, kw_else, NIL_WORD},
{(const CHAR *) "enum", (SIZE) 4, kw_enum, NIL_WORD},
{(const CHAR *) "extern", (SIZE) 6, kw_extern, NIL_WORD},
{(const CHAR *) "for", (SIZE) 3, kw_for, NIL_WORD},
{(const CHAR *) "goto", (SIZE) 4, kw_goto, NIL_WORD},
{(const CHAR *) "if", (SIZE) 2, kw_if, NIL_WORD},
{(const CHAR *) "int", (SIZE) 3, kw_int, NIL_WORD},
{(const CHAR *) "long", (SIZE) 4, kw_long, NIL_WORD},
{(const CHAR *) "register", (SIZE) 8, kw_register, NIL_WORD},
{(const CHAR *) "return", (SIZE) 6, kw_return, NIL_WORD},
{(const CHAR *) "short", (SIZE) 5, kw_short, NIL_WORD},
{(const CHAR *) "sizeof", (SIZE) 6, kw_sizeof, NIL_WORD},
{(const CHAR *) "static", (SIZE) 6, kw_static, NIL_WORD},
{(const CHAR *) "struct", (SIZE) 6, kw_struct, NIL_WORD},
{(const CHAR *) "switch", (SIZE) 6, kw_switch, NIL_WORD},
{(const CHAR *) "typedef", (SIZE) 7, kw_typedef, NIL_WORD},
{(const CHAR *) "union", (SIZE) 5, kw_union, NIL_WORD},
{(const CHAR *) "unsigned", (SIZE) 8, kw_unsigned, NIL_WORD},
{(const CHAR *) "void", (SIZE) 4, kw_void, NIL_WORD},
{(const CHAR *) "while", (SIZE) 5, kw_while, NIL_WORD},
#ifdef FLOAT_KEYWORDS
{(const CHAR *) "double", (SIZE) 6, kw_double, NIL_WORD},
{(const CHAR *) "float", (SIZE) 5, kw_float, NIL_WORD},
#endif /* FLOAT_KEYWORDS */
#ifdef TYPEOF
{(const CHAR *) "__typeof__", (SIZE) 10, kw_typeof, NIL_WORD},
#endif /* TYPEOF */
#ifdef FACIST
/* C++ keywords and operators */
{(const CHAR *) "bool", (SIZE) 4, kw_id, NIL_WORD},
{(const CHAR *) "catch", (SIZE) 5, kw_id, NIL_WORD},
{(const CHAR *) "class", (SIZE) 5, kw_id, NIL_WORD},
{(const CHAR *) "const_cast", (SIZE) 10, kw_id, NIL_WORD},
{(const CHAR *) "delete", (SIZE) 6, kw_id, NIL_WORD},
{(const CHAR *) "dynamic_cast", (SIZE) 12, kw_id, NIL_WORD},
{(const CHAR *) "false", (SIZE) 5, kw_id, NIL_WORD},
{(const CHAR *) "friend", (SIZE) 6, kw_id, NIL_WORD},
{(const CHAR *) "inline", (SIZE) 6, kw_id, NIL_WORD},
{(const CHAR *) "mutable", (SIZE) 7, kw_id, NIL_WORD},
{(const CHAR *) "namespace", (SIZE) 9, kw_id, NIL_WORD},
{(const CHAR *) "new", (SIZE) 3, kw_id, NIL_WORD},
{(const CHAR *) "operator", (SIZE) 8, kw_id, NIL_WORD},
{(const CHAR *) "private", (SIZE) 7, kw_id, NIL_WORD},
{(const CHAR *) "public", (SIZE) 6, kw_id, NIL_WORD},
{(const CHAR *) "reinterpret_cast", (SIZE) 16, kw_id, NIL_WORD},
{(const CHAR *) "static_cast", (SIZE) 11, kw_id, NIL_WORD},
{(const CHAR *) "template", (SIZE) 8, kw_id, NIL_WORD},
{(const CHAR *) "this", (SIZE) 4, kw_id, NIL_WORD},
{(const CHAR *) "throw", (SIZE) 5, kw_id, NIL_WORD},
{(const CHAR *) "true", (SIZE) 4, kw_id, NIL_WORD},
{(const CHAR *) "try", (SIZE) 3, kw_id, NIL_WORD},
{(const CHAR *) "typeid", (SIZE) 6, kw_id, NIL_WORD},
{(const CHAR *) "using", (SIZE) 5, kw_id, NIL_WORD},
{(const CHAR *) "virtual", (SIZE) 7, kw_id, NIL_WORD},
{(const CHAR *) "bitand", (SIZE) 6, kw_id, NIL_WORD},
{(const CHAR *) "and", (SIZE) 3, kw_id, NIL_WORD},
{(const CHAR *) "bitor", (SIZE) 5, kw_id, NIL_WORD},
{(const CHAR *) "or", (SIZE) 2, kw_id, NIL_WORD},
{(const CHAR *) "xor", (SIZE) 3, kw_id, NIL_WORD},
{(const CHAR *) "compl", (SIZE) 5, kw_id, NIL_WORD},
{(const CHAR *) "and_eq", (SIZE) 6, kw_id, NIL_WORD},
{(const CHAR *) "or_eq", (SIZE) 5, kw_id, NIL_WORD},
{(const CHAR *) "xor_eq", (SIZE) 6, kw_id, NIL_WORD},
{(const CHAR *) "not", (SIZE) 3, kw_id, NIL_WORD},
{(const CHAR *) "not_eq", (SIZE) 6, kw_id, NIL_WORD},
#endif /* FACIST */
{NIL_CHAR, (SIZE) 0, (TOKEN) 0, NIL_WORD}
};
static XWORD c90_keywords[] = {
{(const CHAR *) "const", (SIZE) 5, kw_const, NIL_WORD},
{(const CHAR *) "signed", (SIZE) 6, kw_signed, NIL_WORD},
{(const CHAR *) "volatile", (SIZE) 8, kw_volatile, NIL_WORD},
{NIL_CHAR, (SIZE) 0, (TOKEN) 0, NIL_WORD}
};
static XWORD c99_keywords[] = {
{(const CHAR *) "_Bool", (SIZE) 5, kw_bool, NIL_WORD},
{(const CHAR *) "_Complex", (SIZE) 8, kw_complex, NIL_WORD},
{(const CHAR *) "_Imaginary", (SIZE) 10, kw_imaginary, NIL_WORD},
{(const CHAR *) "_Pragma", (SIZE) 7, kw_pragma, NIL_WORD},
{(const CHAR *) "inline", (SIZE) 6, kw_inline, NIL_WORD},
{(const CHAR *) "restrict", (SIZE) 8, kw_restrict, NIL_WORD},
{NIL_CHAR, (SIZE) 0, (TOKEN) 0, NIL_WORD}
};
WPTR wp;
HASH keyno;
for (wp = &keywords[0]; wp->spelling != NIL_CHAR; wp++) {
keyno = hash (wp->spelling, wp->length);
wp->next = hashtable[keyno];
hashtable[keyno] = wp;
}
if (lang_option >= LANG_C90) {
for (wp = &c90_keywords[0]; wp->spelling != NIL_CHAR; wp++) {
keyno = hash (wp->spelling, wp->length);
wp->next = hashtable[keyno];
hashtable[keyno] = wp;
}
}
if (lang_option >= LANG_C99) {
for (wp = &c99_keywords[0]; wp->spelling != NIL_CHAR; wp++) {
keyno = hash (wp->spelling, wp->length);
wp->next = hashtable[keyno];
hashtable[keyno] = wp;
}
}
#ifdef ASM
if (asm_option) {
VOIDCAST quick_insert ((const CHAR *) "asm", (SIZE) 3, kw_asm);
}
#ifdef FACIST
else {
VOIDCAST quick_insert ((const CHAR *) "asm", (SIZE) 3, kw_id);
}
#endif /* FACIST */
#endif /* ASM */
/*
* Special names recognised by lower parts of the compiler in
* order to do special actions.
*/
alloca_name = quick_insert ((const CHAR *) "alloca", (SIZE) 6, tk_id);
printf_name = quick_insert ((const CHAR *) "printf", (SIZE) 6, tk_id);
fprintf_name = quick_insert ((const CHAR *) "fprintf", (SIZE) 7, tk_id);
sprintf_name = quick_insert ((const CHAR *) "sprintf", (SIZE) 7, tk_id);
scanf_name = quick_insert ((const CHAR *) "scanf", (SIZE) 5, tk_id);
fscanf_name = quick_insert ((const CHAR *) "fscanf", (SIZE) 6, tk_id);
sscanf_name = quick_insert ((const CHAR *) "sscanf", (SIZE) 6, tk_id);
func_name = quick_insert ((const CHAR *) "__func__", (SIZE) 8, tk_id);
main_name = quick_insert ((const CHAR *) "main", (SIZE) 4, tk_id);
/*
* Names used for pre-processor directives
*/
pp_pragma = quick_insert ((const CHAR *) "pragma", (SIZE) 6, tk_id);
pp_ident = quick_insert ((const CHAR *) "ident", (SIZE) 5, tk_id);
pp_line = quick_insert ((const CHAR *) "line", (SIZE) 4, tk_id);
pp_file = quick_insert ((const CHAR *) "file", (SIZE) 4, tk_id);
#ifdef TOPSPEED
/*
* TopSpeed keywords
*/
if (topspeed_option) {
VOIDCAST quick_insert ((const CHAR *) "cdecl", (SIZE) 5, kw_cdecl);
}
#endif /* TOPSPEED */
/* set up attributes */
setattributes ((const CHAR *) "01234567",
(Attribute) (A_PRINT | A_IDCHAR | A_HEX | A_DIGIT |
A_OCTAL));
setattributes ((const CHAR *) "89",
(Attribute) (A_PRINT | A_IDCHAR | A_HEX | A_DIGIT));
setattributes ((const CHAR *) "ABCDEFabcdef",
(Attribute) (A_PRINT | A_IDCHAR | A_HEX));
setattributes ((const CHAR *) "GHIJKLMNOPQRSTUVWXYZghijklmnopqrstuvwxyz_",
(Attribute) (A_PRINT | A_IDCHAR));
setattributes ((const CHAR *) " \f\n\r\t\v", A_SPACE);
total_errors = 0;
/*
* Initialize the input buffer
*/
buflen = (SIZE) (BUFLEN + ID_LENGTH);
bufstart = (CHAR *) malloc ((size_t) (buflen + (SIZE) 1));
bufcur = bufstart; /* at start of input buffer */
bufend = bufstart + buflen; /* pointer beyond end of input buffer */
bufmove = bufstart; /* buffer won't be extended */
bufstart[0] = (CHAR) '\n';
bufstart[1] = END_OF_BUFFER;
buflimit = bufstart + 1;
strbuflen = (SIZE) BUFLEN;
strstart = (CHAR *) malloc ((size_t) strbuflen);
}
/*
* endsym() - deletes the scanner tables
*/
void endsym P0 (void)
{
if (strstart)
free (strstart);
strstart = NULL;
if (bufstart)
free (bufstart);
bufstart = NULL;
}
/*
* new_line() - this routine processes newline characters.
*/
static void new_line P0 (void)
{
*bufcur = (CHAR) '\0';
nextch ();
++act_line;
if (*bufcur == (CHAR) '#') {
preprocessor_directive ();
} else {
act_linetxt = bufcur;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -