📄 clex.c
字号:
#ifndef INCLUDED_STREAM#include <stream.h>#endif#ifndef INCLUDED_STRING#include <string.h>#endif#ifndef INCLUDED_STDLIB#include <stdlib.h>#endif#ifndef INCLUDED_ASSERT#include <assert.h>#endif#ifndef INCLUDED_CTYPE#include <ctype.h>#endif#include "clex.h"// get string value tables, sym_str[] and keyword[] :#define CLEX_IMPLEMENTATION 1#include "clex_sym.h"/******************************************************************************* ** KWTABLE -- keyword hash table (internal use only) ** KWtable implements a collision-free hash table of C++ keywords. The ** table size and hash function are computed by use of a standalone C ** program, kwhash.c, included in this directory. ** *******************************************************************************/#define U_short unsigned short#define U_char unsigned charstruct KWtable { enum { HASHSIZE = 131 }; // as computed by kwhash.c, for a=9,b=2,c=2 struct { char* kwp; Clex_sym sym; } kwhash[HASHSIZE]; KWtable(char**); U_short hash(const U_char*, U_short len); void insert(char*, Clex_sym); Clex_sym lookup(char*, short len); };static KWtable kwt = KWtable(keywords); // keywords[] defined in Clex_sym.hKWtable::KWtable (char** kwl) { short int i; for (i = 0; i < HASHSIZE; ++i) kwhash[i].kwp = NULL; for (i = 0; i < CLEX_NUMKEYS; ++i) insert(kwl[i], KEYWORD_S + i); // rely on assert() to prevent hash collisions -- may need // a new hash function or table size when keyword added. }// the values used in the following hash function, and HASHSIZE, were// determined by use of the standalone C program kwhash.c, to// ensure that no collisions occur.inlineU_short KWtable::hash (const U_char* cp, U_short len) { return (((U_short)cp[0] ) ^ ((U_short)cp[1] << 9) ^ ((U_short)cp[len-1] << 2) ^ (len << 2) ) % HASHSIZE; }void KWtable::insert (char* cp, Clex_sym s) { U_short h = hash(cp, strlen(cp)); assert(kwt.kwhash[h].kwp == NULL); // collisions not permitted. kwt.kwhash[h].kwp = cp; kwt.kwhash[h].sym = s; }Clex_sym KWtable::lookup (char* cp, short len) { if (len < 2 || len > 9) return (IDENT_S); short h = hash(cp, len); if (kwt.kwhash[h].kwp == NULL) return (IDENT_S); if (strcmp(kwt.kwhash[h].kwp, cp)) return (IDENT_S); return (kwt.kwhash[h].sym); }/******************************************************************************* ** CLEX -- c++ lexical scanner ** *******************************************************************************/// CONSTRUCTOR Clex:// The argument block_brack, if TRUE, dictates that the contents// of square brackets "[]" be returned as a string in the string// buffer. If false, square brackets are treated as simple tokens.Clex::Clex (FILE* f, Boolean b) { fp = f; block_brack = b; filename[0] = '\0'; bufsiz = 0; buf[0] = '\0'; // prime the pipeline: line_num = 0; look = '\n'; // be prepared to handle '#' as first char }Clex_sym Clex::num (char c) { Clex_sym s = NUM_S; bufsiz = 0; put_in_buf(c); while (isdigit(look)) buf_one(); // hexadecimal if (bufsiz == 1 && *buf == '0' && (look == 'x' || look == 'X')) { do { buf_one(); } while (isxdigit(look)); if (look == 'L' || look == 'l' || look == 'U' || look == 'u') buf_one(); return terminate(s); } // long or unsigned if (look == 'L' || look == 'l' || look == 'U' || look == 'u') { buf_one(); return terminate(NUM_S); } // floating point else if (look == '.') { s = FLOATNUM_S; do { buf_one(); } while (isdigit(look)); } // scientific notation if (look == 'e' || look == 'E') { s = FLOATNUM_S; do { buf_one(); } while (isdigit(look)); } else return terminate(s); if (look == '+' || look == '-') do { buf_one(); } while (isdigit(look)); return terminate(s); }Clex_sym Clex::ident (char first) { register Boolean maybe_kw = TRUE; register short bs = 0; buf[bs++] = first; while (isalnum(look) || look == '_' || look == '$') { // note: this function accounts for 30% of the total scan time if (maybe_kw && (isupper(look) || look == '_' )) maybe_kw = FALSE; buf[bs++] = look; // don't worry about overflow eat_one(); } buf[bs] = '\0'; bufsiz = bs; if (maybe_kw) return kwt.lookup(buf, bufsiz); return IDENT_S; }Clex_sym Clex::quote (char c, Clex_sym s, Clex_mode m) { if (m == CL_NONE) bufsiz = 0; while (look != c) { if (look == EOF) { return terminate(ERROR_EOF_S); } else if (look == '\n') { return terminate(ERROR_EOLN_S); } else if (look == '\\') { eat_one(); if (look == '\n') { eat_one(); eoln(m|CL_QUOTE); continue; } else if (look == EOF) { return terminate(ERROR_EOF_S); } else put_in_buf('\\'); // this handles \' and \" too. } buf_one(); } eat_one(); // eat the closing quote return terminate(s); }// lbrack() accumulates the contents between "[" and "]" into// the string buffer, handling syntactically quoted strings,// comments, and nested brackets. Note that lbrack() is// called recursively in the case of nested brackets.Clex_sym Clex::lbrack (Clex_mode m) { if (m == CL_NONE) bufsiz = 0; while (look != ']') { if (look == EOF) return terminate(ERROR_EOF_S); else if (look == '\n') { eat_one(); eoln(m|CL_BRACK); } else if (look == '[') { buf_one(); if (lbrack(m|CL_BRACK) == ERROR_EOF_S) return ERROR_EOF_S; // already cleaned up. else put_in_buf(']'); } else if (look == '\'' || look == '"') { char c = look; buf_one(); (void) quote(c, NONE_S, m|CL_BRACK); put_in_buf(c); } else if (look == '/') // maybe a comment { eat_one(); if (look == '/') line_comment(); else if (look == '*') { block_comment(m|CL_BRACK); if (look == EOF) return terminate(ERROR_EOF_S); } else // stash the '/' and the char after { put_in_buf('/'); buf_one(); } } else // just a character to save buf_one(); } eat_one(); // eat the ']'.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -