📄 sathercodegenerator.java

📁 SRI international 发布的OAA框架软件
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
package antlr_oaa;

/* ANTLR Translator Generator
 * Project led by Terence Parr at http://www.jGuru.com
 * Software rights: http://www.antlr.org/RIGHTS.html
 *
 * $Id: SatherCodeGenerator.java,v 1.1 2002/11/08 17:37:42 agno Exp $
 */

import java.util.Enumeration;
import java.util.Hashtable;
import antlr_oaa.collections.impl.BitSet;
import antlr_oaa.collections.impl.Vector;
import java.io.PrintWriter; //SAS: changed for proper text file io
import java.io.IOException;
import java.io.FileWriter;

/**Generate MY_PARSER.sa, MY_LEXER.sa and MY_PARSER_TOKENTYPES.sa */

public class SatherCodeGenerator extends CodeGenerator 
{
    // non-zero if inside syntactic predicate generation
    protected int syntacticPredLevel = 0;
	
	// Are we generating ASTs (for parsers and tree parsers) right now?
    protected boolean genAST = false;

    // Are we saving the text consumed (for lexers) right now?
    protected boolean saveText = false;

    // Grammar parameters set up to handle different grammar classes.
    // These are used to get instanceof tests out of code generation
    String labeledElementType;
    String labeledElementASTType;
    String labeledElementInit;
    String commonExtraArgs;
    String commonExtraParams;
    String commonLocalVars;
    String lt1Value;
    String exceptionThrown;
    String throwNoViable;

	// Tracks the rule being generated.  Used for mapTreeId
    RuleBlock currentRule;
    // Tracks the rule or labeled subrule being generated.  Used for AST generation.
    String currentASTResult;
    // Mapping between the ids used in the current alt, and the
    // names of variables used to represent their AST values.
    Hashtable treeVariableMap = new Hashtable();
    // Count of unnamed generated variables
    int astVarNumber = 1;
    // Special value used to mark duplicate in treeVariableMap
    protected static final String NONUNIQUE = new String();

    public static final int caseSizeThreshold = 127; // ascii is max

    private Vector semPreds;

    /** Create a Java code-generator using the given Grammar.
	 * The caller must still call setTool, setBehavior, and setAnalyzer
	 * before generating code.
	 */
    public SatherCodeGenerator() {
	super();
	charFormatter = new SatherCharFormatter();
    }
    /** Adds a semantic predicate string to the sem pred vector
	    These strings will be used to build an array of sem pred names
	    when building a debugging parser.  This method should only be
	    called when the debug option is specified
	 */
    protected int addSemPred(String predicate) {
	semPreds.appendElement(predicate);
	return semPreds.size()-1;
    }
    public void exitIfError() {
	if (tool.hasError) {
	    System.out.println("Exiting due to errors.");
	    System.exit(1);
	}
    }
    /**Generate the parser, lexer, treeparser, and token types in Java */
    public void gen() {
	// Do the code generation
	try {
	    // Loop over all grammars
	    Enumeration grammarIter = behavior.grammars.elements();
	    while (grammarIter.hasMoreElements()) {
		Grammar g = (Grammar)grammarIter.nextElement();
				// Connect all the components to each other
		g.setGrammarAnalyzer(analyzer);
		g.setCodeGenerator(this);
		analyzer.setGrammar(g);
				// To get right overloading behavior across hetrogeneous grammars
		setupGrammarParameters(g);
		g.generate();
		exitIfError();
	    }

	    // Loop over all token managers (some of which are lexers)
	    Enumeration tmIter = behavior.tokenManagers.elements();
	    while (tmIter.hasMoreElements()) {
		TokenManager tm = (TokenManager)tmIter.nextElement();
		if (!tm.isReadOnly()) {
		    // Write the token manager tokens as Java
		    // this must appear before genTokenInterchange so that
		    // labels are set on string literals
		    genTokenTypes(tm);
		    // Write the token manager tokens as plain text
		    genTokenInterchange(tm);
		}
		exitIfError();
	    }
	}
	catch (IOException e) {
	    System.out.println(e.getMessage());
	}
    }
    /** Generate code for the given grammar element.
	 * @param blk The {...} action to generate
	 */
    public void gen(ActionElement action) {
	if ( DEBUG_CODE_GENERATOR ) System.out.println("genAction("+action+")");
	if ( action.isSemPred ) {
	    genSemPred(action.actionText, action.line);
	}
	else {
	    if ( grammar.hasSyntacticPredicate ) {
		println("if ( input_state.guessing = 0 ) then");
		tabs++;
	    }

	    ActionTransInfo tInfo = new ActionTransInfo();
	    String actionStr = processActionForTreeSpecifiers(action.actionText, action.getLine(), currentRule, tInfo);
			
	    if ( tInfo.refRuleRoot!=null ) {
				// Somebody referenced "#rule", make sure translated var is valid
				// assignment to #rule is left as a ref also, meaning that assignments
				// with no other refs like "#rule = foo();" still forces this code to be
				// generated (unnecessarily).
		println(tInfo.refRuleRoot + " := current_ast.root;");
	    }
			
	    // dump the translated action
	    printAction(actionStr);
			
	    if ( tInfo.assignToRoot ) {
				// Somebody did a "#rule=", reset internal currentAST.root
		println("current_ast.root := "+ tInfo.refRuleRoot + ";");
				// reset the child pointer too to be last sibling in sibling list
		println("if ( ~void( " + tInfo.refRuleRoot + " ) and ~void( "
			+ tInfo.refRuleRoot + ".first_child ) ) then" );
		tabs++;
		println("current_ast.child := " + tInfo.refRuleRoot + ".first_child");
		tabs--;
		println("else");
		tabs++;
		println("current_ast.child := " + tInfo.refRuleRoot + ";");
		tabs--;
		println("end; -- if");
		println("current_ast.advance_child_to_end;");
	    }
		
	    if ( grammar.hasSyntacticPredicate ) {
		tabs--;
		println("end; -- if");
	    }
	}
    }
    /** Generate code for the given grammar element.
	 * @param blk The "x|y|z|..." block to generate
	 */
    public void gen(AlternativeBlock blk) {
	if ( DEBUG_CODE_GENERATOR ) System.out.println("gen("+blk+")");
	//		println("{");
	genBlockPreamble(blk);

	// Tell AST generation to build subrule result
	String saveCurrentASTResult = currentASTResult;
	if (blk.getLabel() != null) {
	    currentASTResult = blk.getLabel();
	}

	boolean ok = grammar.theLLkAnalyzer.deterministic(blk);
		
	JavaBlockFinishingInfo howToFinish = genCommonBlock(blk, true);
	genBlockFinish(howToFinish, throwNoViable);
	println("");

	//		println("}");

	// Restore previous AST generation
	currentASTResult = saveCurrentASTResult;
    }
    /** Generate code for the given grammar element.
	 * @param blk The block-end element to generate.  Block-end
	 * elements are synthesized by the grammar parser to represent
	 * the end of a block.
	 */
    public void gen(BlockEndElement end) {
	if ( DEBUG_CODE_GENERATOR ) System.out.println("genRuleEnd("+end+")");
    }
    /** Generate code for the given grammar element.
	 * @param blk The character literal reference to generate
	 */
    public void gen(CharLiteralElement atom) {
	if ( DEBUG_CODE_GENERATOR ) System.out.println("genChar("+atom+")");
		
	if ( atom.getLabel()!=null ) {
	    println(atom.getLabel() + " := " + lt1Value + ";");
	}
		
	boolean oldsaveText = saveText;
	saveText = saveText && atom.getAutoGenType()==GrammarElement.AUTO_GEN_NONE;
	genMatch(atom);
	saveText = oldsaveText;
    }
    /** Generate code for the given grammar element.
	 * @param blk The character-range reference to generate
	 */
    public void gen(CharRangeElement r) {
	if ( r.getLabel()!=null  && syntacticPredLevel == 0) {
	    println(r.getLabel() + " := " + lt1Value + ";");
	}
	println("match_range( " + r.beginText+ ", " + r.endText+ " );");
    }
    /** Generate the lexer Java file */
    public  void gen(LexerGrammar g) throws IOException {
	// If debugging, create a new sempred vector for this grammar
	if (g.debuggingOutput)
	    semPreds = new Vector();
			
	setGrammar(g);
	if (!(grammar instanceof LexerGrammar)) {
	    tool.panic("Internal error generating lexer");
	}

	// SAS: moved output creation to method so a subclass can change
	//      how the output is generated (for VAJ interface)
	setupOutput(grammar.getClassName());

	genAST = false;	// no way to gen trees.
	saveText = true;	// save consumed characters.

	tabs=0;

	// Generate header common to all Java output files
	genHeader();
	// Do not use printAction because we assume tabs==0
	println(behavior.getHeaderAction(""));

	// Generate user-defined lexer file preamble
	println(grammar.preambleAction.getText());

	// Generate lexer class definition
	String sup=null;
	if ( grammar.superClass!=null ) {
	    sup = grammar.superClass;
	}
	else {
	    sup = "ANTLR_CHAR_SCANNER{TOKEN}";
	}	

	// print javadoc comment if any
	if ( grammar.comment!=null ) {
	    _println(grammar.comment);
	}
		
	println("class " + grammar.getClassName() + 
		"{TOKEN} < $ANTLR_TOKEN_STREAM{TOKEN} , $ANTLR_FILE_CURSOR is " );
	tabs++;
	println("include " + sup + " create -> private char_scanner_create;");
	println("include " + grammar.tokenManager.getName() + "_TOKENTYPES;");

	/*
	  Token tsuffix = (Token)grammar.options.get("classHeaderSuffix");
	  if ( tsuffix != null ) {
	  String suffix = Tool.stripFrontBack(tsuffix.getText(),"\"","\"");
	  if ( suffix != null ) {
	  print(", "+suffix);	// must be an interface name for Java
	  }
	  }
	*/

	println("");

	// Generate user-defined lexer class members
	print(
	      processActionForTreeSpecifiers(grammar.classMemberAction.getText(), 0, currentRule, null)
	      );

	//
	// Generate the constructor from ISTREAM, which in turn
	// calls the ByteBuffer constructor
	//
	println("create ( istr : $ISTREAM ) : SAME is");
	tabs++;
	println("inp : ANTLR_BYTE_BUFFER := #ANTLR_BYTE_BUFFER( istr );");
	println("res : SAME := #SAME( inp );");
	println("res.EOF_CHAR := istr.eof_char;");
	println("return res;");
	tabs--;
	println("end; -- create");
	println("");

	//
	// Generate the constructor from Reader, which in turn
	// calls the CharBuffer constructor
	//
	// 		println("public " + grammar.getClassName() + "(Reader in) {");
	// 		tabs++;
	// 		println("this(new CharBuffer(in));");
	// 		tabs--;
	// 		println("}");

	println("create ( bb : ANTLR_BYTE_BUFFER ) : SAME is");
	tabs++;
	// if debugging, wrap the input buffer in a debugger
	//  		if (grammar.debuggingOutput)
	//  			println("this(new LexerSharedInputState(new antlr_oaa.debug.DebuggingInputBuffer(ib)));");
	//  		else
	println("state : ANTLR_LEXER_SHARED_INPUT_STATE := #ANTLR_LEXER_SHARED_INPUT_STATE( bb );");
	println("res: SAME := #SAME( state );");
	println("return res;");
	tabs--;
	println("end; -- create");
	println("");

	//
	// Generate the constructor from InputBuffer (char or byte)
	//
	println("create ( state : ANTLR_LEXER_SHARED_INPUT_STATE ) : SAME is ");
	tabs++;

	println("res : SAME := char_scanner_create( state );");
	// if debugging, set up array variables and call user-overridable
	//   debugging setup method
	//		if ( grammar.debuggingOutput ) {
	//			println("rule_names  := sa_rule_names;");
	//                      println("sem_pred_names := sa_sem_pred_names;");
	//                      println("setup_debugging;");
	//		}	

	// Generate the initialization of a hashtable
	// containing the string literals used in the lexer
	// The literals variable itself is in CharScanner
	println("res.literals := #MAP{STR,INT};");
	Enumeration keys = grammar.tokenManager.getTokenSymbolKeys();
	while ( keys.hasMoreElements() ) {
	    String key = (String)keys.nextElement();
	    if ( key.charAt(0) != '"' ) {
		continue;
	    }
	    TokenSymbol sym = grammar.tokenManager.getTokenSymbol(key);
	    if ( sym instanceof StringLiteralSymbol ) {
		StringLiteralSymbol s = (StringLiteralSymbol)sym;
		println("res.literals[ " + s.getId() + " ] := " + s.getTokenType() + ";");
	    }
	}
	Enumeration ids;
	// Generate the setting of various generated options.
	println("res.case_sensitive_literals := " + g.caseSensitiveLiterals + ";");
	println("res.case_sensitive := " + g.caseSensitive + ";");
	println("return res;");
	tabs--;
	println("end; -- create");
	println("");

	// generate the rule name array for debugging
	if (grammar.debuggingOutput) {
	    println("private const sa_rule_names : ARRAY{STR} := |");

	    ids = grammar.rules.elements();
	    int ruleNum=0;
	    while ( ids.hasMoreElements() ) {
		GrammarSymbol sym = (GrammarSymbol) ids.nextElement();
		if ( sym instanceof RuleSymbol)
		    println("  \""+((RuleSymbol)sym).getId()+"\",");
	    }
	    println("|;");
	}		

	// Generate nextToken() rule.
	// nextToken() is a synthetic lexer rule that is the implicit OR of all
	// user-defined lexer rules.
	genNextToken();

	// Generate code for each rule in the lexer
	ids = grammar.rules.elements();
	int ruleNum=0;
	while ( ids.hasMoreElements() ) {
	    RuleSymbol sym = (RuleSymbol) ids.nextElement();
	    // Don't generate the synthetic rules
	    if (!sym.getId().equals("mnextToken")) {
		genRule(sym, false, ruleNum++);
	    }
	    exitIfError();
	}

	// Generate the semantic predicate map for debugging
	if (grammar.debuggingOutput)
	    genSemPredMap();

	// Generate the bitsets used throughout the lexer
	genBitsets(bitsetsUsed, ((LexerGrammar)grammar).charVocabulary.size());

	println("");
	tabs--;
	println("end; -- class");
12 3 4 5 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -