📄 scanner.cc

📁 PL/0源码
💻 CC
字号:
//////////////////////////////////////////////////////////////////////////////////  scanner.cc//#include <strstream>#include <ctype.h>#include "tokens.h"#include "scanner.h"#include "scantab.h"static string_number_t string_hash_table[HASH_TABLE_SIZE];//////////  Forward references for static functions://static void insert_hash_table(string_number_t &loc);//////////  Establishes initial values for static variables.//  Does one sanity check and returns the number of tokens so the//  caller can do another sanity check.//int init_scanner(void){    int     offset;    short   hash;    char *  sp;    // Initialize the scanner token hash table.    for (int i = 0; i < HASH_TABLE_SIZE; i++)    string_hash_table[i] = -1;    for (offset = 0; st_string_space[offset]; offset += strlen(sp) + 1) {    hash = offset;    insert_hash_table(hash);    sp = st_string_space + offset;    }    assert(MAJ_IDENT == st_id_major_num);    return st_num_terminals;   // return so can check with parser}//////////  Inserts a character into the next available slot in StringSpace.//  Checks against overflow.  Converts all lower-case characters to upper//  case, on the assumption that case is insignificant.//static void insert_string_space(char ch){    if (st_used_strings == MAX_STRING_SPACE) {        ostrstream ost;        ost << "Out of room in scanner string storage area: "            << st_used_strings            << " of "            << MAX_STRING_SPACE;        die_compiler(ost.str());    }    st_string_space[st_used_strings++] = toupper(ch);}//////////  Computes a hash function of the input string by modulo division.  Treats//  character string as a very long integer, and divides one 'place' at a time.//static hash_index_t hash(string_number_t location){    hash_index_t        remainder = 1;    while (st_string_space[location] != '\0') {    remainder = (((remainder-1) * MAX_CHAR_CODE +              st_string_space[location]) % (HASH_TABLE_SIZE)) + 1;    location++;    }    return remainder;}//////////  Compares two strings and determines if they are the same.//static bool equal_strings(string_number_t s1, string_number_t s2){    for (; st_string_space[s1] == st_string_space[s2]; s1++, s2++) {    if (st_string_space[s1] == '\0') {        return true;    }    }    return false;}//////////  Put token into the hash table. Guards against hash table overflow.//  loc comes in with the index in st_string_space of the token to be inserted.//  This token MUST BE the last valid thing currently in st_string_space.//  If a token is a copy of one already found, the new version is removed, by//  decrementing st_used_strings.  In that event, parameter loc is modified//  to index of the existing version.//static void insert_hash_table(string_number_t &loc){    hash_index_t        hash_value;    hash_index_t        bucket;    hash_value = hash(loc);    bucket = hash_value;    for (;;) {        if (string_hash_table[bucket - 1] == -1) {        string_hash_table[bucket-1] = loc;        break;    }    else { // this bucket is used -- by the same token?            if (equal_strings(loc, string_hash_table[bucket - 1])) {        st_used_strings = loc;        // discard duplicate        loc = string_hash_table[bucket - 1]; // return existing copy        break;        }            else // we have a collision and must re-hash        {                bucket = (bucket + hash_value) % HASH_TABLE_SIZE;                if (bucket == hash_value) {            // we have looked at every bucket                    ostrstream ost;                    ost << "Out of room in scanner hash table: "                        << bucket                        << "=="                        << hash_value;                    die_compiler(ost.str());                }            }    }    }}//////////  Allows external routines to insert identifiers into StringSpace and//  the HashTable.  Used to set up pre-defined objects.//string_number_t define_ident(const char *ident){    string_number_t     start;    start = st_used_strings;    for (int i = 0; ident[i]; i++)        insert_string_space(ident[i]);    insert_string_space('\0');    insert_hash_table(start);    return start;}//////////  Copes with 'illegal' states in the scanner pt_table//  caused by invalid input or insufficient look-ahead.//  Returns a new value for the state vector, to replace//  the old value, which was 'illegal.'////  state_vector_t is a new value for the state vector,//  designed to allow (nearly) normal continuation of the algorithm.////  If the issue_error was the result of invalid input, the//  illegal token will be removed from StringSpace.//  If the issue_error was due to the inability of the tables//  to provide for look-ahead, the character mistakenly//  consumed is returned to the input buffer.////  Note that this procedure works by means of fooling the// rest of the scanner into thinking that it had a valid// terminal state all along.//static int      error_chars;static int      nonwhite_since_t;static void handle_table_error(state_vector_t *state_vector,                               scanner_state_t prev_terminal,                               char current_char, int chars_seen,                               int chars_appended){    if (chars_seen == 0 // ie. current state is st_start_state     && current_char == FILE_END) {    // we need to return End-Of-File to whoever called the scanner    // note that an issue_error can never occur in a terminal state    state_vector->action = HALT_NO_APPEND;    state_vector->major = st_end_of_file;    state_vector->minor = 0;    }    else {    if ((prev_terminal >= 0)&&(prev_terminal != st_start_state)) {        // we have had a terminal state in this token        unread_chars(chars_seen);            state_vector->action = HALT_REUSE;        st_used_strings = st_used_strings - chars_appended;        state_vector->major                = st_state_info[(int)prev_terminal].default_major;        state_vector->minor                = st_state_info[(int)prev_terminal].default_minor;    }        else // we have a lexical issue_error        {        error_chars = error_chars + nonwhite_since_t + 1;        st_used_strings = st_used_strings - chars_appended;        state_vector->action = HALT_NO_APPEND;        state_vector->major = MAJ_SPACE;        state_vector->minor = 0;    }    }}//////////  Looks up an identifier in the st_reserver_words array, and returns//  its major and minor token numbers.  Does nothing at all if identifier//  is NOT a reserve word.  Uses a simple binary search.//static  void find_reserve_word(string_number_t location,                               major_token_t& major_num,                               major_token_t& minor_num){    int                 low;    int                 middle;    int                 high;    string_number_t     current_space_point;    low = 0;    high = st_num_reserved_words - 1;    if (location > st_max_reserve_location) {    return;    }    for (;;) {    middle = (low + high) / 2;    current_space_point = st_reserved_words[middle].text_handle;    if (location == current_space_point)    // we are done        break;                              // leave loop    else        if (location < current_space_point)        high = middle - 1;              // search lower half        else        low = middle + 1;               // search upper half    }    major_num = st_reserved_words[middle].major;    minor_num = st_reserved_words[middle].minor;}//////////  Lexical Scanner://  Retrieves next token from input file and returns descriptive information.//  Never returns white space.  Returns an actual token for end-of-file, one//  larger than largest real token.//major_token_t token_get(token_attrib_t& attr){    string_number_t     token_start;    scanner_state_t     new_state;    scanner_state_t     prev_terminal;    char_class_t        next_class;    int                 chars_seen;    int                 chars_appended;    char                next_char;    state_vector_t      state_vector;    scanner_act_t       next_action;    bool                exist_lower_case;    error_chars = 0;    do {        token_start = st_used_strings;        exist_lower_case = false;        prev_terminal = -1; // in case we bomb on early char        chars_seen = 0;        nonwhite_since_t = 0;        chars_appended = 0;        new_state = st_start_state;    next_char = read_char();        attr.location = get_source_location();        do {            next_class = st_char_class[next_char];            state_vector = st_state_matrix[new_state][next_class];            if (state_vector.action == ERROR)                handle_table_error(&state_vector, prev_terminal, next_char,                                   chars_seen, chars_appended);            next_action = state_vector.action;        if (next_action == MOVE_APPEND || next_action == MOVE_NO_APPEND) {                if (st_state_info[(int)new_state].terminal) {                    prev_terminal = new_state;                    chars_seen = 0;                    nonwhite_since_t = 0;                    chars_appended = 0;                }                new_state = state_vector.next_state;                chars_seen++;                if (next_char != ' ' && next_char != '\t' && next_char != '\n')                    nonwhite_since_t = nonwhite_since_t + 1;                if (next_action == MOVE_APPEND) {                    if (next_char >= 'a' && next_char <= 'z')                        exist_lower_case = true;                    insert_string_space(next_char);                    chars_appended++;                }                next_char = read_char();        }    } while (next_action == MOVE_APPEND || next_action == MOVE_NO_APPEND);        if (next_action == HALT_APPEND) {            if (next_char >= 'a' && next_char <= 'z')                exist_lower_case = true;            insert_string_space(next_char);        }        else {            if (next_action == HALT_REUSE) { // return next_char                unread_chars(1);            }        }        attr.major = state_vector.major;        attr.minor = state_vector.minor;        insert_string_space('\0');     // separates tokens        if (token_start != st_used_strings) {            // we didn't toss the whole thing            insert_hash_table(token_start);            attr.text_handle = token_start;        }        else {            attr.text_handle = 0;        }        if (attr.major == st_id_major_num            && attr.minor == st_id_minor_num)            find_reserve_word(attr.text_handle, attr.major, attr.minor);    } while (attr.major == MAJ_SPACE);    if (error_chars > 0 && !scanner_peeking) {    done_correcting();        if (error_chars == 1) {            issue_warning(attr.location,                           "Invalid token.  1 non-white character ignored.");        }        else {            ostrstream ost;            ost << "Invalid token.  "                << error_chars                << " non-white characters ignored.";            issue_warning(attr.location, ost.str());        }    }    return attr.major;}//////////  Pretend an inserted token was really scanned.  Pick an appropriate minor//  number for those tokens that have them.//token_attrib_t token_fake(major_token_t tok){    token_attrib_t      rtn;    rtn.major = tok;    rtn.text_handle = 0;    rtn.location = get_source_location();   // close enough    switch (tok) {        case MAJ_ADDOP:            rtn.minor = MIN_PLUS; break;        case MAJ_MULOP:            rtn.minor = MIN_TIMES; break;        case MAJ_RELOP:            rtn.minor = MIN_NE; break;        case MAJ_EQ:            rtn.minor = MIN_EQ; break;        default:            rtn.minor = 0; break;    }    return rtn;}//////////  Returns a pointer to the character string for a token attribute.//const char * token_attrib_t::get_text(void) const{    return token_chars(text_handle);}//////////  Return (non-tossed) text of a token//string token_text(string_number_t where){    if (where) {        // not a faked token        return string(st_string_space + where);    }    return string("");}//////////  Return a character pointer to the string's space.//const char *token_chars(string_number_t where){    if (where != 0) {      // not a faked token        return st_string_space + where;    }    return "";}// End of File
💿 文件大小 168 K
👤 上传用户 zzxt
📂 所属分类编译器/解释器
🏷️ 相关标签

#PL #源码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -