📄 codegenerator.java

📁 SRI international 发布的OAA框架软件
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
package antlr_oaa;

/* ANTLR Translator Generator
 * Project led by Terence Parr at http://www.jGuru.com
 * Software rights: http://www.antlr.org/RIGHTS.html
 *
 * $Id: CodeGenerator.java,v 1.1 2002/11/08 17:38:13 agno Exp $
 */

import java.io.PrintWriter; // SAS: for proper text i/o
import java.io.IOException;
import java.io.FileWriter;  // SAS: for proper text i/o
import antlr_oaa.collections.impl.Vector;
import antlr_oaa.collections.impl.BitSet;

/**A generic ANTLR code generator.  All code generators
 * Derive from this class.
 *
 * <p>
 * A CodeGenerator knows about a Grammar data structure and
 * a grammar analyzer.  The Grammar is walked to generate the
 * appropriate code for both a parser and lexer (if present).
 * This interface may change slightly so that the lexer is
 * itself living inside of a Grammar object (in which case,
 * this class generates only one recognizer).  The main method
 * to call is <tt>gen()</tt>, which initiates all code gen.
 *
 * <p>
 * The interaction of the code generator with the analyzer is
 * simple: each subrule block calls deterministic() before generating
 * code for the block.  Method deterministic() sets lookahead caches
 * in each Alternative object.  Technically, a code generator
 * doesn't need the grammar analyzer if all lookahead analysis
 * is done at runtime, but this would result in a slower parser.
 *
 * <p>
 * This class provides a set of support utilities to handle argument
 * list parsing and so on.
 *
 * @author  Terence Parr, John Lilley
 * @version 2.00a
 * @see     antlr_oaa.JavaCodeGenerator
 * @see     antlr_oaa.DiagnosticCodeGenerator
 * @see     antlr_oaa.LLkAnalyzer
 * @see     antlr_oaa.Grammar
 * @see     antlr_oaa.AlternativeElement
 * @see     antlr_oaa.Lookahead
 */
public abstract class CodeGenerator {
    /** Current tab indentation for code output */
    protected int tabs=0;
    /** Current output Stream */
    transient protected PrintWriter currentOutput; // SAS: for proper text i/o
    /** The grammar for which we generate code */
    protected Grammar grammar = null;
    /** List of all bitsets that must be dumped.  These are Vectors of BitSet. */
    protected Vector bitsetsUsed;
    /** The antlr Tool */
    protected Tool tool;
    /** The grammar behavior */
    protected DefineGrammarSymbols behavior;
    /** The LLk analyzer */
    protected LLkGrammarAnalyzer analyzer;
    /** Object used to format characters in the target language.
     * subclass must initialize this to the language-specific formatter
     */
    protected CharFormatter charFormatter;

    /** Use option "codeGenDebug" to generate debugging output */
    protected boolean DEBUG_CODE_GENERATOR = false;

    /** Default values for code-generation thresholds */
    protected static final int DEFAULT_MAKE_SWITCH_THRESHOLD = 2;
    protected static final int DEFAULT_BITSET_TEST_THRESHOLD = 4;

    /** This is a hint for the language-specific code generator.
     * A switch() or language-specific equivalent will be generated instead
     * of a series of if/else statements for blocks with number of alternates
     * greater than or equal to this number of non-predicated LL(1) alternates.
     * This is modified by the grammar option "codeGenMakeSwitchThreshold"
     */
    protected int makeSwitchThreshold = DEFAULT_MAKE_SWITCH_THRESHOLD;

    /** This is a hint for the language-specific code generator.
     * A bitset membership test will be generated instead of an
     * ORed series of LA(k) comparisions for lookahead sets with
     * degree greater than or equal to this value.
     * This is modified by the grammar option "codeGenBitsetTestThreshold"
     */
    protected int bitsetTestThreshold = DEFAULT_BITSET_TEST_THRESHOLD;
	
    private static boolean OLD_ACTION_TRANSLATOR = true;

    public static String TokenTypesFileSuffix = "TokenTypes";
    public static String TokenTypesFileExt = ".txt";

    /** Construct code generator base class */
    public CodeGenerator() {}

    /** Output a String to the currentOutput stream.
     * Ignored if string is null.
     * @param s The string to output
     */
    protected void _print(String s) {
	if (s != null) {
	    currentOutput.print(s);
	}
    }

    /** Print an action without leading tabs, attempting to
     * preserve the current indentation level for multi-line actions
     * Ignored if string is null.
     * @param s The action string to output
     */
    protected void _printAction(String s) {
	if (s == null) {
	    return;
	}

	// Skip leading newlines, tabs and spaces
	int start = 0;
	while (start < s.length() && Character.isSpaceChar(s.charAt(start)) )
	    {
		start++;
	    }

	// Skip leading newlines, tabs and spaces
	int end = s.length()-1;
	while ( end > start && Character.isSpaceChar(s.charAt(end)) ) 
	    {
		end--;
	    }

	char c=0;
	for (int i = start; i <= end;)
	    {
		c = s.charAt(i);
		i++;
		boolean newline = false;
		switch (c)
		    {
		    case '\n':
			newline=true;
			break;
		    case '\r':
			if ( i<=end && s.charAt(i)=='\n' ) {
			    i++;
			}
			newline=true;
			break;
		    default: 
			currentOutput.print(c); 
			break;
		    }
		if ( newline ) {
		    currentOutput.println(); 
		    printTabs();
				// Absorb leading whitespace
		    while (i <= end && Character.isSpaceChar(s.charAt(i)) ) {
			i++;
		    }
		    newline=false;
		}
	    }
	currentOutput.println();
    }

    /** Output a String followed by newline, to the currentOutput stream.
     * Ignored if string is null.
     * @param s The string to output
     */
    protected void _println(String s) {
	if (s != null) {
	    currentOutput.println(s);
	}
    }

    /** Test if a set element array represents a contiguous range.
     * @param elems The array of elements representing the set, usually from BitSet.toArray().
     * @return true if the elements are a contiguous range (with two or more).
     */
    public static boolean elementsAreRange(int[] elems) {
	if (elems.length==0) {
	    return false;
	}
	int begin = elems[0];
	int end = elems[elems.length-1];
	if ( elems.length<=2 ) {
	    // Not enough elements for a range expression
	    return false;
	}
	if ( end-begin+1 > elems.length ) {
	    // The set does not represent a contiguous range
	    return false;
	}
	int v = begin+1;
	for (int i=1; i<elems.length-1; i++) {
	    if ( v != elems[i] ) {
				// The set does not represent a contiguous range
		return false;
	    }
	    v++;
	}
	return true;
    }

    /** Get the identifier portion of an argument-action token.
     * The ID of an action is assumed to be a trailing identifier.
     * Specific code-generators may want to override this
     * if the language has unusual declaration syntax.
     * @param t The action token
     * @return A string containing the text of the identifier
     */
    protected String extractIdOfAction(Token t) {
	return extractIdOfAction(t.getText(), t.getLine());
    }

    /** Get the identifier portion of an argument-action.
     * The ID of an action is assumed to be a trailing identifier.
     * Specific code-generators may want to override this
     * if the language has unusual declaration syntax.
     * @param s The action text
     * @param line Line used for error reporting.
     * @return A string containing the text of the identifier
     */
    protected String extractIdOfAction(String s, int line) {
	s = removeAssignmentFromDeclaration(s);
	// Search back from the end for a non alphanumeric.  That marks the
	// beginning of the identifier
	for (int i = s.length()-2; i >=0; i--)
	    {
		// TODO: make this work for language-independent identifiers?
		if (!Character.isLetterOrDigit(s.charAt(i)) && s.charAt(i) != '_')
		    {
				// Found end of type part
			return s.substring(i+1);
		    }
	    }
	// Something is bogus, but we cannot parse the language-specific
	// actions any better.  The compiler will have to catch the problem.
	tool.warning("Ill-formed action", grammar.getFilename(), line);
	return "";
    }

    /** Get the type string out of an argument-action token.
     * The type of an action is assumed to precede a trailing identifier
     * Specific code-generators may want to override this
     * if the language has unusual declaration syntax.
     * @param t The action token
     * @return A string containing the text of the type
     */
    protected String extractTypeOfAction(Token t) {
	return extractTypeOfAction(t.getText(), t.getLine());
    }

    /** Get the type portion of an argument-action.
     * The type of an action is assumed to precede a trailing identifier
     * Specific code-generators may want to override this
     * if the language has unusual declaration syntax.
     * @param s The action text
     * @param line Line used for error reporting.
     * @return A string containing the text of the type
     */
    protected String extractTypeOfAction(String s, int line) {
	s = removeAssignmentFromDeclaration(s);
	// Search back from the end for a non alphanumeric.  That marks the
	// beginning of the identifier
	for (int i = s.length()-2; i >=0; i--)
	    {
		// TODO: make this work for language-independent identifiers?
		if (!Character.isLetterOrDigit(s.charAt(i)) && s.charAt(i) != '_')
		    {
				// Found end of type part
			return s.substring(0,i+1);
		    }
	    }
	// Something is bogus, but we cannot parse the language-specific
	// actions any better.  The compiler will have to catch the problem.
	tool.warning("Ill-formed action", grammar.getFilename(), line);
	return "";
    }

    /** Generate the code for all grammars
     */
    public abstract void gen();

    /** Generate code for the given grammar element.
     * @param action The {...} action to generate
     */
    public abstract void gen(ActionElement action);

    /** Generate code for the given grammar element.
     * @param blk The "x|y|z|..." block to generate
     */
    public abstract void gen(AlternativeBlock blk);

    /** Generate code for the given grammar element.
     * @param end The block-end element to generate.  Block-end
     * elements are synthesized by the grammar parser to represent
     * the end of a block.
     */
    public abstract void gen(BlockEndElement end);

    /** Generate code for the given grammar element.
     * @param atom The character literal reference to generate
     */
    public abstract void gen(CharLiteralElement atom);

    /** Generate code for the given grammar element.
     * @param r The character-range reference to generate
     */
    public abstract void gen(CharRangeElement r);

    /** Generate the code for a parser */
    public abstract void gen(LexerGrammar g) throws IOException;

    /** Generate code for the given grammar element.
     * @param blk The (...)+ block to generate
     */
    public abstract void gen(OneOrMoreBlock blk);

    /** Generate the code for a parser */
    public abstract void gen(ParserGrammar g) throws IOException;

    /** Generate code for the given grammar element.
     * @param rr The rule-reference to generate
     */
    public abstract void gen(RuleRefElement rr);

    /** Generate code for the given grammar element.
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -