parser.java

来自「JDK1.4编译器后端」· Java 代码 · 共 673 行 · 第 1/2 页
JAVA
673 行
		private void initStack() throws IOException
		{
			if (states == null || states.length < Parser.this.states.length)
			{
				states = new short[Parser.this.states.length];
				min_top = 0;
			}
			System.arraycopy(Parser.this.states, min_top, states, min_top, (top = Parser.this.top) + 1);
		}

		private void increaseStackCapacity()
		{
			short[] new_states = new short[states.length * 2];
			System.arraycopy(states, 0, new_states, 0, states.length);
			states = new_states;
		}

		private void shift(short state)
		{
			if (++top == states.length)
				increaseStackCapacity();
			states[top] = state;
		}

		private short reduce(int rule_id)
		{
			int rule_info = tables.rule_infos[rule_id];
			int rhs_size = rule_info & 0xFFFF;
			top -= rhs_size;
			min_top = Math.min(min_top, top);
			return (short) (rule_info >>> 16);
		}
	}

	/** The automaton tables. */
	private final ParsingTables tables;

	/** Cached ID of the ACCEPT action. */
	private final short accept_action_id;

	/** The parser's stack. */
	private short[] states;

	/** Index of the stack's top element, i.e. it's = -1 when the stack is empty; */
	private int top;

	/** The stack of shifted symbols. */
	protected Symbol[] _symbols;

	/** Parsing events notification "gateway" */
	protected Events report;
	

	protected Parser(ParsingTables tables)
	{
		this.tables = tables;
		this.accept_action_id = (short) ~tables.rule_infos.length;
		this.states = new short[256];
	}

    /**
     * Parses a source and returns a semantic value of the accepted nonterminal
     * 
     * @param source of tokens - a Scanner
     * @return semantic value of the accepted nonterminal
     */
	public Object parse(Scanner source) throws IOException, Parser.Exception
	{
		init();
		return parse(new TokenStream(source));
	}
    
    /**
     * Parses a source and returns a semantic value of the accepted nonterminal.
     * Before parsing starts injects alternative goal marker into the source to
     * indicate that an alternative goal should be matched.
     * 
     * @param source of tokens - a Scanner
     * @param alt_goal_marker_id ID of a token like symbol that will be used as a marker
     * @return semantic value of the accepted nonterminal
     */
    public Object parse(Scanner source, short alt_goal_marker_id) throws IOException, Parser.Exception
    {
        init();
        TokenStream in = new TokenStream(source, new Symbol(alt_goal_marker_id));
        return parse(in);
    }
    
    private Object parse(TokenStream in) throws IOException, Parser.Exception
    {
        while (true)
        {
            Symbol token = in.nextToken();
            while (true)
            {
                short act = tables.findParserAction(states[top], token.id);
                if (act > 0)
                {
                    shift(token, act);
                    break;
                }
                else if (act == accept_action_id)
                {
                    Symbol goal = _symbols[top];
                    _symbols = null; // drop this stack to prevent loitering
                    return goal.value;
                }
                else if (act < 0)
                {
                    Symbol nt = reduce(~act);
                    act = tables.findNextState(states[top], nt.id);
                    if (act > 0)
                    {
                        shift(nt, act);
                    }
                    else if (act == accept_action_id)
                    {
                        _symbols = null; // no loitering
                        return nt.value;
                    }
                    else
                    {
                        throw new IllegalStateException("Cannot shift a nonterminal");
                    }
                }
                else // act == 0, i.e. this is an error
                {
                    report.syntaxError(token);
                    recoverFromError(token, in);
                    break; // because error recovery altered token stream - parser needs to refetch the next token
                }
            }
        }
    }

	/**
	 * Invoke actual reduce action routine.
	 * Method must be implemented by a generated parser
	 * 
	 * @param rule_num ID of a reduce action routine to invoke
	 * @param offset to the symbol before first action routine argument
	 * @return reduced nonterminal
	 */
	protected abstract Symbol invokeReduceAction(int rule_num, int offset);

	/**
	 * Performs stacks and, if not initialized yet, reduce actions array initialization.
	 */
	private void init()
	{
		if (report == null) report = new Events();
		
		_symbols = new Symbol[states.length];
		top = 0; // i.e. it's not empty
		_symbols[top] = new Symbol("none"); // need a symbol here for a default reduce on the very first erroneous token  
		states[top] = 1; // initial/first state
	}

	/**
	 * Increases the stack capacity if it has no room for new entries.
	 */
	private void increaseStackCapacity()
	{
		short[] new_states = new short[states.length * 2];
		System.arraycopy(states, 0, new_states, 0, states.length);
		states = new_states;

		Symbol[] new_stack = new Symbol[states.length];
		System.arraycopy(_symbols, 0, new_stack, 0, _symbols.length);
		_symbols = new_stack;
	}

	/**
	 * Shift a symbol to stack and go to a new state 
	 * 
	 * @param sym
	 *            symbol that will be shifted
	 * @param goto_state
	 *            to switch to
	 */
	private void shift(Symbol sym, short goto_state)
	{
		if (++top == states.length)
			increaseStackCapacity();
		_symbols[top] = sym;
		states[top] = goto_state;
	}

	/**
	 * Perform a reduce action.
	 * 
	 * @param rule_id
	 *            Number of the production by which to reduce
	 * @return nonterminal created by a reduction
	 */
	private Symbol reduce(int rule_id)
	{
		int rule_info = tables.rule_infos[rule_id];
		int rhs_size = rule_info & 0xFFFF;

		top -= rhs_size;
		Symbol lhs_sym = invokeReduceAction(rule_id, top);
		lhs_sym.id = (short) (rule_info >>> 16);
		if (rhs_size == 0)
		{
			lhs_sym.start = lhs_sym.end = _symbols[top].end;
		}
		else
		{
			lhs_sym.start = _symbols[top + 1].start;
			lhs_sym.end = _symbols[top + rhs_size].end;
		}
		return lhs_sym;
	}

    /**
     * Implements parsing error recovery. Tries several simple approches first, like deleting "bad" token
     * or replacing the latter with one of the expected in his state (if possible). If simple methods did
     * not work tries to do error phrase recovery.
     * 
     * It is expected that normally descendand parsers do not need to alter this method. In same cases though
     * they may want to override it if they need a different error recovery strategy. 
     * 
     * @param token a lookahead terminal symbol that messed parsing 
     * @param in token stream
     * @throws IOException propagated from a scanner if it has issues with the source
     * @throws Parser.Exception if Parser cannot recover
     */
	protected void recoverFromError(Symbol token, TokenStream in) throws IOException, Parser.Exception
	{
		if (token.id == 0) // end of input
			throw new Parser.Exception("Cannot recover from the syntax error");
		
		Simulator sim = new Simulator();
		in.mark(3);
		if (sim.parse(in)) // just delete "token" from the stream
		{
			in.reset();
			report.unexpectedTokenRemoved(token);
			return;
		}
		short current_state = states[top];
		if (!tables.compressed) // then try other simple recoveries
		{
			short first_term_id = tables.findFirstTerminal(current_state);
			if (first_term_id >= 0)
			{
				Symbol term = new Symbol(first_term_id, _symbols[top].end, token.start);
				in.insert(term, token); // insert expected terminal before unexpected one
				in.reset();
				if (sim.parse(in))
				{
					in.reset();
					report.missingTokenInserted(term);
					return;
				}
				
				int offset = tables.actn_offsets[current_state];
				
				for (short term_id = (short) (first_term_id + 1); term_id < tables.n_term; term_id++)
				{
					int index = offset + term_id;
					if (index >= tables.lookaheads.length)
						break;
					if (tables.lookaheads[index] == term_id)
					{
						term.id = term_id;
						in.reset();
						if (sim.parse(in))
						{
							in.reset();
							report.missingTokenInserted(term);
							return;
						}
					}
				}
				in.remove(1); // unexpected token, i.e. alter stream as if we replaced 
				              // an unexpected token to an expected terminal
				term.start = token.start;
				term.end = token.end;
				
				for (short term_id = first_term_id; term_id < tables.n_term; term_id++)
				{
					int index = offset + term_id;
					if (index >= tables.lookaheads.length)
						break;
					if (tables.lookaheads[index] == term_id)
					{
						term.id = term_id;
						in.reset();
						if (sim.parse(in))
						{
							in.reset();
							report.misspelledTokenReplaced(term);
							return;
						}
					}
				}
				in.remove(0); // simple recoveries failed - remove all stream changes 
			}
		}
		// Simple recoveries failed or are not applicable. Next step is an error phrase recovery.
		/*
		 * Find a state where parser can shift "error" symbol. Discard already reduced (and shifted)
		 * productions, which are part of a phrase where unexpected terminal is found. (Note that if
		 * "error" symbol was not used by a grammar, in the end the entire input becomes an error phrase,
		 * and ... parser won't recover from it :)
		 */
		Symbol first_sym = token, last_sym = token;
		short goto_state;
		while ((goto_state = tables.findNextState(states[top], tables.error_symbol_id)) <= 0)
		{
			// parser cannot shift "error" in this state, so use the top symbol
			// as the leftmost symbol of an error phrase
			first_sym = _symbols[top];
			// and go to the previous state
			if (--top < 0)
				throw new Parser.Exception("Cannot recover from the syntax error");
		}
		Symbol error = new Symbol(tables.error_symbol_id, first_sym.start, last_sym.end); // the end is temporary
		shift(error, goto_state);

		in.reset();
		while (!sim.parse(in))
		{
			last_sym = in.remove(0);
			if (last_sym.id == 0) // EOF
				throw new Parser.Exception("Cannot recover from the syntax error");
			in.reset();
		}
		error.end = last_sym.end;
		in.reset();
		report.errorPhraseRemoved(error);
	}
}
parser.java - 源码说明

本页面展示了「JDK1.4编译器后端」中的 parser.java 源码文件，采用 Java 编程语言编写，共 673 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与JDK相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?