📄 lr_parser.java
字号:
/** * This method is called if it is determined that syntax error recovery has * been unsuccessful. Here in the base class we report a fatal error. * * @param cur_token * the current lookahead Symbol. */ public void unrecovered_syntax_error(Symbol cur_token) throws java.lang.Exception { report_fatal_error("Couldn't repair and continue parse", cur_token); } /* . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ /** * Fetch an action from the action table. The table is broken up into rows, * one per state (rows are indexed directly by state number). Within each * row, a list of index, value pairs are given (as sequential entries in the * table), and the list is terminated by a default entry (denoted with a * Symbol index of -1). To find the proper entry in a row we do a linear or * binary search (depending on the size of the row). * * @param state * the state index of the action being accessed. * @param sym * the Symbol index of the action being accessed. */ protected final short get_action(int state, int sym) { short tag; int first, last, probe; short[] row = action_tab[state]; /* linear search if we are < 10 entries */ if (row.length < 20) for (probe = 0; probe < row.length; probe++) { /* is this entry labeled with our Symbol or the default? */ tag = row[probe++]; if (tag == sym || tag == -1) { /* return the next entry */ return row[probe]; } } /* otherwise binary search */ else { first = 0; last = (row.length - 1) / 2 - 1; /* * leave out trailing default * entry */ while (first <= last) { probe = (first + last) / 2; if (sym == row[probe * 2]) return row[probe * 2 + 1]; else if (sym > row[probe * 2]) first = probe + 1; else last = probe - 1; } /* not found, use the default at the end */ return row[row.length - 1]; } /* * shouldn't happened, but if we run off the end we return the default * (error == 0) */ return 0; } /* . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ /** * Fetch a state from the reduce-goto table. The table is broken up into * rows, one per state (rows are indexed directly by state number). Within * each row, a list of index, value pairs are given (as sequential entries * in the table), and the list is terminated by a default entry (denoted * with a Symbol index of -1). To find the proper entry in a row we do a * linear search. * * @param state * the state index of the entry being accessed. * @param sym * the Symbol index of the entry being accessed. */ protected final short get_reduce(int state, int sym) { short tag; short[] row = reduce_tab[state]; /* if we have a null row we go with the default */ if (row == null) return -1; for (int probe = 0; probe < row.length; probe++) { /* is this entry labeled with our Symbol or the default? */ tag = row[probe++]; if (tag == sym || tag == -1) { /* return the next entry */ return row[probe]; } } /* if we run off the end we return the default (error == -1) */ return -1; } /* . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ /** * This method provides the main parsing routine. It returns only when * done_parsing() has been called (typically because the parser has * accepted, or a fatal error has been reported). See the header * documentation for the class regarding how shift/reduce parsers operate * and how the various tables are used. */ public Symbol parse() throws java.lang.Exception { /* the current action code */ int act; /* the Symbol/stack element returned by a reduce */ Symbol lhs_sym = null; /* information about production being reduced with */ short handle_size, lhs_sym_num; /* set up direct reference to tables to drive the parser */ production_tab = production_table(); action_tab = action_table(); reduce_tab = reduce_table(); /* initialize the action encapsulation object */ init_actions(); /* do user initialization */ user_init(); /* get the first token */ cur_token = scan(); /* push dummy Symbol with start state to get us underway */ stack.removeAllElements(); stack.push(new Symbol(0, start_state())); tos = 0; /* continue until we are told to stop */ for (_done_parsing = false; !_done_parsing;) { /* Check current token for freshness. */ if (cur_token.used_by_parser) throw new Error("Symbol recycling detected (fix your scanner)."); /* current state is always on the top of the stack */ /* look up action out of the current state with the current input */ act = get_action(((Symbol) stack.peek()).parse_state, cur_token.sym); /* decode the action -- > 0 encodes shift */ if (act > 0) { /* shift to the encoded state by pushing it on the stack */ cur_token.parse_state = act - 1; cur_token.used_by_parser = true; stack.push(cur_token); tos++; /* advance to the next Symbol */ cur_token = scan(); } /* if its less than zero, then it encodes a reduce action */ else if (act < 0) { /* perform the action for the reduce */ lhs_sym = do_action((-act) - 1, this, stack, tos); /* look up information about the production */ lhs_sym_num = production_tab[(-act) - 1][0]; handle_size = production_tab[(-act) - 1][1]; /* pop the handle off the stack */ for (int i = 0; i < handle_size; i++) { stack.pop(); tos--; } /* look up the state to go to from the one popped back to */ act = get_reduce(((Symbol) stack.peek()).parse_state, lhs_sym_num); /* shift to that state */ lhs_sym.parse_state = act; lhs_sym.used_by_parser = true; stack.push(lhs_sym); tos++; } /* finally if the entry is zero, we have an error */ else if (act == 0) { /* call user syntax error reporting routine */ syntax_error(cur_token); /* try to error recover */ if (!error_recovery(false)) { /* if that fails give up with a fatal syntax error */ unrecovered_syntax_error(cur_token); /* just in case that wasn't fatal enough, end parse */ done_parsing(); } else { lhs_sym = (Symbol) stack.peek(); } } } return lhs_sym; } /* . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ /** * Write a debugging message to System.err for the debugging version of the * parser. * * @param mess * the text of the debugging message. */ public void debug_message(String mess) { System.err.println(mess); } /* . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ /** Dump the parse stack for debugging purposes. */ public void dump_stack() { if (stack == null) { debug_message("# Stack dump requested, but stack is null"); return; } debug_message("============ Parse Stack Dump ============"); /* dump the stack */ for (int i = 0; i < stack.size(); i++) { debug_message("Symbol: " + ((Symbol) stack.elementAt(i)).sym + " State: " + ((Symbol) stack.elementAt(i)).parse_state); } debug_message("=========================================="); } /* . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ /** * Do debug output for a reduce. * * @param prod_num * the production we are reducing with. * @param nt_num * the index of the LHS non terminal. * @param rhs_size * the size of the RHS. */ public void debug_reduce(int prod_num, int nt_num, int rhs_size) { debug_message("# Reduce with prod #" + prod_num + " [NT=" + nt_num + ", " + "SZ=" + rhs_size + "]"); } /* . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ /** * Do debug output for shift. * * @param shift_tkn * the Symbol being shifted onto the stack. */ public void debug_shift(Symbol shift_tkn) { debug_message("# Shift under term #" + shift_tkn.sym + " to state #" + shift_tkn.parse_state); } /* . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ /** * Do debug output for stack state. [CSA] */ public void debug_stack() { StringBuffer sb = new StringBuffer("## STACK:"); for (int i = 0; i < stack.size(); i++) { Symbol s = (Symbol) stack.elementAt(i); sb.append(" <state " + s.parse_state + ", sym " + s.sym + ">"); if ((i % 3) == 2 || (i == (stack.size() - 1))) { debug_message(sb.toString()); sb = new StringBuffer(" "); } } } /* . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ /** * Perform a parse with debugging output. This does exactly the same things * as parse(), except that it calls debug_shift() and debug_reduce() when * shift and reduce moves are taken by the parser and produces various other * debugging messages. */ public Symbol debug_parse() throws java.lang.Exception { /* the current action code */ int act; /* the Symbol/stack element returned by a reduce */ Symbol lhs_sym = null; /* information about production being reduced with */ short handle_size, lhs_sym_num; /* set up direct reference to tables to drive the parser */ production_tab = production_table(); action_tab = action_table(); reduce_tab = reduce_table(); debug_message("# Initializing parser"); /* initialize the action encapsulation object */ init_actions(); /* do user initialization */ user_init(); /* the current Symbol */ cur_token = scan(); debug_message("# Current Symbol is #" + cur_token.sym); /* push dummy Symbol with start state to get us underway */ stack.removeAllElements(); stack.push(new Symbol(0, start_state())); tos = 0; /* continue until we are told to stop */ for (_done_parsing = false; !_done_parsing;) { /* Check current token for freshness. */ if (cur_token.used_by_parser) throw new Error("Symbol recycling detected (fix your scanner)."); /* current state is always on the top of the stack */ // debug_stack(); /* look up action out of the current state with the current input */ act = get_action(((Symbol) stack.peek()).parse_state, cur_token.sym); /* decode the action -- > 0 encodes shift */ if (act > 0) { /* shift to the encoded state by pushing it on the stack */ cur_token.parse_state = act - 1; cur_token.used_by_parser = true; debug_shift(cur_token); stack.push(cur_token); tos++; /* advance to the next Symbol */ cur_token = scan(); debug_message("# Current token is " + cur_token); } /* if its less than zero, then it encodes a reduce action */ else if (act < 0) { /* perform the action for the reduce */ lhs_sym = do_action((-act) - 1, this, stack, tos); /* look up information about the production */ lhs_sym_num = production_tab[(-act) - 1][0]; handle_size = production_tab[(-act) - 1][1]; debug_reduce((-act) - 1, lhs_sym_num, handle_size); /* pop the handle off the stack */ for (int i = 0; i < handle_size; i++) { stack.pop(); tos--; } /* look up the state to go to from the one popped back to */ act = get_reduce(((Symbol) stack.peek()).parse_state, lhs_sym_num); debug_message("# Reduce rule: top state " + ((Symbol) stack.peek()).parse_state + ", lhs sym " + lhs_sym_num + " -> state " + act); /* shift to that state */ lhs_sym.parse_state = act; lhs_sym.used_by_parser = true; stack.push(lhs_sym); tos++; debug_message("# Goto state #" + act); } /* finally if the entry is zero, we have an error */ else if (act == 0) { /* call user syntax error reporting routine */ syntax_error(cur_token); /* try to error recover */ if (!error_recovery(true)) { /* if that fails give up with a fatal syntax error */ unrecovered_syntax_error(cur_token); /* just in case that wasn't fatal enough, end parse */ done_parsing(); } else { lhs_sym = (Symbol) stack.peek(); } } } return lhs_sym; } /* . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ /* Error recovery code */ /* . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ /** * Attempt to recover from a syntax error. This returns false if recovery * fails, true if it succeeds. Recovery happens in 4 steps. First we pop the * parse stack down to a point at which we have a shift out of the top-most * state on the error Symbol. This represents the initial error recovery * configuration. If no such configuration is found, then we fail. Next a * small number of "lookahead" or "parse ahead" Symbols are read into a * buffer. The size of this buffer is determined by error_sync_size() and
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -