📄 clex.c

📁 一个基于C++的语法分析类
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
#ifndef INCLUDED_STREAM#include <stream.h>#endif#ifndef INCLUDED_STRING#include <string.h>#endif#ifndef INCLUDED_STDLIB#include <stdlib.h>#endif#ifndef INCLUDED_ASSERT#include <assert.h>#endif#ifndef INCLUDED_CTYPE#include <ctype.h>#endif#include "clex.h"// get string value tables, sym_str[] and keyword[] :#define CLEX_IMPLEMENTATION 1#include "clex_sym.h"/*******************************************************************************                                                                             **  KWTABLE -- keyword hash table (internal use only)                          **     KWtable implements a collision-free hash table of C++ keywords.  The    **     table size and hash function are computed by use of a standalone C      **     program, kwhash.c, included in this directory.                          **                                                                             *******************************************************************************/#define U_short unsigned short#define U_char  unsigned charstruct KWtable    {    enum { HASHSIZE = 131 };  // as computed by kwhash.c, for a=9,b=2,c=2    struct  {            char* kwp;            Clex_sym sym;            } kwhash[HASHSIZE];    KWtable(char**);    U_short hash(const U_char*, U_short len);    void insert(char*, Clex_sym);    Clex_sym lookup(char*, short len);    };static KWtable kwt = KWtable(keywords); // keywords[] defined in Clex_sym.hKWtable::KWtable (char** kwl)    {    short int i;    for (i = 0; i < HASHSIZE; ++i)        kwhash[i].kwp = NULL;    for (i = 0; i < CLEX_NUMKEYS; ++i)        insert(kwl[i], KEYWORD_S + i);    // rely on assert() to prevent hash collisions -- may need    //  a new hash function or table size when keyword added.    }// the values used in the following hash function, and HASHSIZE, were// determined by use of the standalone C program kwhash.c, to// ensure that no collisions occur.inlineU_short KWtable::hash (const U_char* cp, U_short len)    {    return (((U_short)cp[0]         ) ^            ((U_short)cp[1]     << 9) ^            ((U_short)cp[len-1] << 2) ^            (len                << 2) ) % HASHSIZE;    }void KWtable::insert (char* cp, Clex_sym s)    {    U_short h = hash(cp, strlen(cp));    assert(kwt.kwhash[h].kwp == NULL);  // collisions not permitted.    kwt.kwhash[h].kwp = cp;    kwt.kwhash[h].sym = s;    }Clex_sym KWtable::lookup (char* cp, short len)    {    if (len < 2 || len > 9) return (IDENT_S);    short h = hash(cp, len);    if (kwt.kwhash[h].kwp == NULL) return (IDENT_S);    if (strcmp(kwt.kwhash[h].kwp, cp)) return (IDENT_S);    return (kwt.kwhash[h].sym);    }/*******************************************************************************                                                                             **  CLEX -- c++ lexical scanner                                               **                                                                             *******************************************************************************/// CONSTRUCTOR Clex://   The argument block_brack, if TRUE, dictates that the contents//   of square brackets "[]" be returned as a string in the string//   buffer.  If false, square brackets are treated as simple tokens.Clex::Clex (FILE* f, Boolean b)    {    fp = f;    block_brack = b;    filename[0] = '\0';    bufsiz = 0; buf[0] = '\0';    // prime the pipeline:    line_num = 0;    look = '\n';    // be prepared to handle '#' as first char    }Clex_sym Clex::num (char c)    {    Clex_sym s = NUM_S;    bufsiz = 0;    put_in_buf(c);    while (isdigit(look))        buf_one();    // hexadecimal    if (bufsiz == 1 && *buf == '0' && (look == 'x' || look == 'X'))        {        do { buf_one(); }            while (isxdigit(look));        if (look == 'L' || look == 'l' || look == 'U' || look == 'u')            buf_one();        return terminate(s);        }    // long or unsigned    if (look == 'L' || look == 'l' || look == 'U' || look == 'u')        { buf_one(); return terminate(NUM_S); }    // floating point    else if (look == '.')        {        s = FLOATNUM_S;        do { buf_one(); }            while (isdigit(look));        }    // scientific notation    if (look == 'e' || look == 'E')         {         s = FLOATNUM_S;         do { buf_one(); }            while (isdigit(look));         }    else        return terminate(s);    if (look == '+' || look == '-')         do { buf_one(); }            while (isdigit(look));    return terminate(s);    }Clex_sym Clex::ident (char first)    {    register Boolean maybe_kw = TRUE;    register short bs = 0;    buf[bs++] = first;    while (isalnum(look) || look == '_' || look == '$')        {        // note: this function accounts for 30% of the total scan time        if (maybe_kw && (isupper(look) || look == '_' ))            maybe_kw = FALSE;        buf[bs++] = look;       // don't worry about overflow        eat_one();        }    buf[bs] = '\0';    bufsiz = bs;    if (maybe_kw)        return kwt.lookup(buf, bufsiz);    return IDENT_S;    }Clex_sym Clex::quote (char c, Clex_sym s, Clex_mode m)    {    if (m == CL_NONE)        bufsiz = 0;    while (look != c)        {        if (look == EOF)            { return terminate(ERROR_EOF_S); }        else if (look == '\n')            { return terminate(ERROR_EOLN_S); }        else if (look == '\\')            {            eat_one();            if (look == '\n')                { eat_one(); eoln(m|CL_QUOTE); continue; }            else if (look == EOF)                { return terminate(ERROR_EOF_S); }            else                put_in_buf('\\');   // this handles \' and \" too.            }        buf_one();        }    eat_one();  // eat the closing quote    return terminate(s);    }// lbrack() accumulates the contents between "[" and "]" into//  the string buffer, handling syntactically quoted strings,//  comments, and nested brackets.  Note that lbrack() is//  called recursively in the case of nested brackets.Clex_sym Clex::lbrack (Clex_mode m)    {    if (m == CL_NONE)        bufsiz = 0;    while (look != ']')        {        if (look == EOF)            return terminate(ERROR_EOF_S);        else if (look == '\n')            { eat_one(); eoln(m|CL_BRACK); }        else if (look == '[')            {            buf_one();            if (lbrack(m|CL_BRACK) == ERROR_EOF_S)                return ERROR_EOF_S;     // already cleaned up.            else put_in_buf(']');            }        else if (look == '\'' || look == '"')            {            char c = look;            buf_one();            (void) quote(c, NONE_S, m|CL_BRACK);            put_in_buf(c);            }        else if (look == '/')           // maybe a comment            {            eat_one();            if (look == '/')                line_comment();            else if (look == '*')                {                block_comment(m|CL_BRACK);                if (look == EOF) return terminate(ERROR_EOF_S);                }            else                        // stash the '/' and the char after                { put_in_buf('/'); buf_one(); }            }        else                            // just a character to save            buf_one();        }    eat_one(); // eat the ']'.
12 下一页
💿 文件大小 12 K
👤 上传用户 bobar
📂 所属分类编译器/解释器
🏷️ 相关标签

#分
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -