📄 parser.cc
字号:
////////////////////////////////////////////////////////////////////////////////// parser.cc//// The parser gets symbols from the scanner by calling 'get_next_token',// which returns the next symbol to be parsed.//// The corrector looks ahead at upcoming symbols. These symbols must// be available for normal use by get_next_token at a later time. To this// end, they may be kept in a buffer, or they may be physically// rescanned a second time. The current implementation uses// rescanning. There is a read-ahead buffer, but it is used to hold// insertions only. Get_next_token, called by the scanner, peels// tokens off this insertion buffer if possible, before calling token_get.// If real symbols were kept in a buffer, some provision would have to// be made to keep semantic information (such as the characters in an// identifier) around until the symbol is actually used by the parser.//// Side-effects of the scanner, such as symbol-table manipulations and// listing output, should be avoided during peeking. Only the input// buffering routines keep track of whether we are scanner_peeking or not.// The parser tells them when to start and stop. When peeking starts,// inpbuf saves the current line and token starting column. When the// corrector is done looking ahead, inpbuf restores the saved// context.//// After a correction has been made, all the inserted symbols plus// at least one symbol in the input will be comsumed by the parser.// If this is not the case, an internal error has occurred, most likely// in the corrector.//#include <limits.h>#include <string>#include "scanner.h"#include "tokens.h"#include "actions.h"#include "attributes.h"#include "parser.h"#include "parsetab.h"#define MAX_STACK 500#define PROD_MARKER INT_MIN // marks end of production on stackstruct read_ahead_t { // Read Ahead Buffer records major_token_t token; read_ahead_t * next;};static token_attrib_t synthetic_attrs;// Semantic information on the most recently-read tokenstatic int parse_stack[MAX_STACK + 1];static int stack_pointer;static int to_consume;// Number of tokens that must be consumed before we see another// syntax error. If we don't see this many, we've run into an error// in the "corrected" input, indicating an internal error in the// parser or corrector.static read_ahead_t * read_ahead_buffer;// Head of buffer of inserted tokensstatic bool check_flag;// True iff we can look down the stack through symbols that// can generate epsilon and end up at either a terminal matching// the one on the input or a non-terminal that does not generate// epsilon. Initially false. Set true only in checkprediction,// when we return true. Set false only in parse, when we// match a token.static int seen_so_far;// How many symbols of current RHS have been popped////////// Called from main program to initialize the parser.//int init_parser(void){ return pt_num_terminals; // Return the number of parser terminals.}////////// Index a sparse (sym X term) array; return nil if entry missing//static sparse_table_t * find_sparse_table (sparse_table_t **tab, symbol_index_t sym, major_token_t term){ for (sparse_table_t *p = tab[sym]; p != NULL; p = p->next) { if (p->term == term) return p; } return NULL;}#define MAX_CORRECTION 512struct correction_t { // insertion in progress int srCost; major_token_t string[MAX_CORRECTION + 1]; int length;};static string symbol_string (read_ahead_t * p){ return string(pt_string_space, pt_symbol_table[(int)p->token].start, pt_symbol_table[(int)p->token].length);}static void insert_tokens (correction_t fix){ read_ahead_t * ptr; read_ahead_t * qtr; int index; to_consume = fix.length + 1; if (fix.length >= 1) { ptr = new read_ahead_t; ptr->token = fix.string[1]; ptr->next = read_ahead_buffer; display_insertion(symbol_string(ptr)); read_ahead_buffer = ptr; qtr = ptr; for (index = 2; index <= fix.length; index++) { ptr = new read_ahead_t; ptr->token = fix.string[index]; ptr->next = qtr->next; qtr->next = ptr; qtr = ptr; display_insertion(symbol_string (ptr)); } }}////////// copy source to dest, converting type of string//static void copy_symbol (insert_string_t source, correction_t *dest){ insert_sindex_t i; if ((dest->length - source.first + source.last + 1) > MAX_CORRECTION) { die_compiler("Symbol too long to be copied into correction."); } for (i = source.first; i <= source.last; i++) { dest->length++; dest->string[dest->length] = pt_insert_space[i]; } dest->srCost += + source.cost;}////////// Compute the least-cost insertion to make err_term legal given the// current parse stack.//static void least_cost_insert (major_token_t err_term, correction_t *insert){ int stack_loc = stack_pointer; int sym; // positive or negative symbol_index_t int running_cost = 0; int min_cost = pt_infinity; int this_cost; int min_pos; sparse_table_t * table_ptr; sparse_table_t * min_table_ptr = NULL; do { sym = parse_stack[stack_loc]; if (sym > 0) { table_ptr = find_sparse_table(pt_prefix_table, sym, err_term); if (table_ptr == NULL) this_cost = pt_infinity; else this_cost = running_cost + table_ptr->insertion.cost; if (this_cost < min_cost) { min_cost = this_cost; min_pos = stack_loc; min_table_ptr = table_ptr; } running_cost = running_cost + pt_cost_table[sym].cost; } stack_loc--; } while (running_cost < min_cost && stack_loc >= 0); if (min_cost < pt_infinity) { // expand insertion insert->srCost = 0; insert->length = 0; for (stack_loc = stack_pointer; stack_loc > min_pos; stack_loc--) { sym = parse_stack[stack_loc]; if (sym > 0) copy_symbol(pt_cost_table[sym], insert); } sym = parse_stack[min_pos]; if (min_table_ptr == NULL) insert->srCost = pt_infinity; else copy_symbol(min_table_ptr->insertion, insert); } else insert->srCost = pt_infinity;}////////// Find the least cost correction to get us out of this error situation//static void least_cost_corrector(symbol_index_t token){ correction_t insertion; correction_t save_insert; symbol_index_t next_sym; int del_cost; int min_cost; int del_count; int delete_point; token_attrib_t dummy_attrs; if (to_consume >= 0 || read_ahead_buffer != NULL) die_compiler("** INTERNAL ERROR: parse error in corrected input."); prepare_to_correct(synthetic_attrs.location); // tell the input buffering routines we're working on a syntax error least_cost_insert(token, &save_insert); del_cost = pt_delete_costs[token]; min_cost = save_insert.srCost; delete_point = 0; del_count = 0; next_sym = token; while (del_cost < min_cost) { // consider deletions until accumulated cost is too high next_sym = token_get(dummy_attrs); del_count++; least_cost_insert(next_sym, &insertion); // find insert necessary if deletion made if (del_cost + insertion.srCost < min_cost) { // a better correction is found; remember it delete_point = del_count; save_insert = insertion; min_cost = del_cost + insertion.srCost; } del_cost += pt_delete_costs[next_sym]; } if (min_cost >= pt_infinity) die_compiler("***** AMAZING ERROR: No correction possible."); done_peeking(); // tell the input buffering routines we are done looking // ahead and are about to modify the input if (delete_point > 0) { to_consume = 1; display_deletions(delete_point); } insert_tokens(save_insert); done_correcting(); // tell the input buffering routines to throw away old context}////////// Return the next token:// If read_ahead_buffer is empty then read new token and return it.// (leaves the token read on the buffer)// Else pop one off of buffer.// (buffer is used to hold insertions only)// Positions the old line buffer same as current buffer.//static symbol_index_t get_next_token(void){ symbol_index_t tok; if (read_ahead_buffer != NULL) { read_ahead_t * ptr; tok = read_ahead_buffer->token; synthetic_attrs = token_fake(tok); ptr = read_ahead_buffer; read_ahead_buffer = read_ahead_buffer->next; delete ptr; } else tok = token_get(synthetic_attrs); if (to_consume >= 0) to_consume--; return tok;}////////// Push action onto parse stack.//static void push_action (prod_index_t prod){ production_t * p; int i; int numSyms; p = &pt_productions[prod]; if (stack_pointer + p->length + 1 > MAX_STACK) { die_compiler("Parser stack overflow."); } numSyms = 0; parse_stack[++stack_pointer] = PROD_MARKER; // mark end of production for (i = p->start; i <= p->start + p->length - 1; i++) { parse_stack[++stack_pointer] = pt_prod_space[i]; if (pt_prod_space[i] > 0) numSyms++; } start_production (numSyms, seen_so_far-1); // tell semantic routines how far into the current RHS is // the symbol that we are about to expand (and that we // just popped off the parse stack, incrementing SeenSoFar) seen_so_far = 0;}////////// Pop one item from the stack.//static void pop_action (void){ assert(stack_pointer >=1); // forbid parser stack underflow if (parse_stack[stack_pointer] > 0) seen_so_far++; // saw a symbol, not an action num stack_pointer--;}////////// Return true if an epsilon prediction on term is ok.//static bool check_predication (major_token_t term){ int ptr = stack_pointer; int sym; int parse_action; sparse_table_t * table_ptr; bool rtn = true; for (;;) { sym = parse_stack[ptr]; if (sym < 0) { // action routine ptr--; if (ptr == 0) break; } else if (sym <= pt_num_terminals) { // terminal if (sym == term) { check_flag = true; break; } return false; } else { // non-terminal table_ptr = find_sparse_table(pt_table, sym, term); if (table_ptr == NULL) { return false; } parse_action = table_ptr->parse_action; if (parse_action > 0) { // production cannot be in error check_flag = true; break; } // otherwise might be in error; keep looking ptr--; if (ptr == 0) break; } } return rtn;}////////// Main parsing routine, called by driver//void parse (void){ int pAction; int sym; symbol_index_t token; sparse_table_t * table_ptr; read_ahead_buffer = NULL; seen_so_far = 1; // pretend we just popped 'GOAL' off the stack check_flag = false; stack_pointer = 0; push_action(pt_num_productions); // augmenting production token = get_next_token(); for (;;) { sym = parse_stack[stack_pointer]; // negative sym => action routine // zero sym => error // positive sym => terminal or non-terminal if (sym == PROD_MARKER) { pop_action(); // MUST pop before end; pop changes seen so far end_production(seen_so_far); } else if (sym < 0) { // action number do_action(-sym); pop_action(); } else { // symbol if (sym <= pt_num_terminals) { // terminal if (sym == token) { init_token(synthetic_attrs, seen_so_far); pop_action(); check_flag = false; if (token == pt_num_terminals) // end of file return; else token = get_next_token(); } else { // error: token does not match least_cost_corrector(token); token = get_next_token(); } } // if terminal else { // non-terminal table_ptr = find_sparse_table(pt_table, sym, token); if (table_ptr == NULL) { // cannot predict least_cost_corrector(token); token = get_next_token(); } else { pAction = table_ptr->parse_action; if (pAction < 0) { // can generate epsilon if (check_flag || check_predication(token)) { pop_action(); push_action(-pAction); } else { least_cost_corrector(token); token = get_next_token(); } } else { // cannot generate epsilon pop_action(); push_action(pAction); } } } } }}// End of File
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -