📄 codegenerator.java
字号:
package antlr_oaa;
/* ANTLR Translator Generator
* Project led by Terence Parr at http://www.jGuru.com
* Software rights: http://www.antlr.org/RIGHTS.html
*
* $Id: CodeGenerator.java,v 1.1 2002/11/08 17:38:13 agno Exp $
*/
import java.io.PrintWriter; // SAS: for proper text i/o
import java.io.IOException;
import java.io.FileWriter; // SAS: for proper text i/o
import antlr_oaa.collections.impl.Vector;
import antlr_oaa.collections.impl.BitSet;
/**A generic ANTLR code generator. All code generators
* Derive from this class.
*
* <p>
* A CodeGenerator knows about a Grammar data structure and
* a grammar analyzer. The Grammar is walked to generate the
* appropriate code for both a parser and lexer (if present).
* This interface may change slightly so that the lexer is
* itself living inside of a Grammar object (in which case,
* this class generates only one recognizer). The main method
* to call is <tt>gen()</tt>, which initiates all code gen.
*
* <p>
* The interaction of the code generator with the analyzer is
* simple: each subrule block calls deterministic() before generating
* code for the block. Method deterministic() sets lookahead caches
* in each Alternative object. Technically, a code generator
* doesn't need the grammar analyzer if all lookahead analysis
* is done at runtime, but this would result in a slower parser.
*
* <p>
* This class provides a set of support utilities to handle argument
* list parsing and so on.
*
* @author Terence Parr, John Lilley
* @version 2.00a
* @see antlr_oaa.JavaCodeGenerator
* @see antlr_oaa.DiagnosticCodeGenerator
* @see antlr_oaa.LLkAnalyzer
* @see antlr_oaa.Grammar
* @see antlr_oaa.AlternativeElement
* @see antlr_oaa.Lookahead
*/
public abstract class CodeGenerator {
/** Current tab indentation for code output */
protected int tabs=0;
/** Current output Stream */
transient protected PrintWriter currentOutput; // SAS: for proper text i/o
/** The grammar for which we generate code */
protected Grammar grammar = null;
/** List of all bitsets that must be dumped. These are Vectors of BitSet. */
protected Vector bitsetsUsed;
/** The antlr Tool */
protected Tool tool;
/** The grammar behavior */
protected DefineGrammarSymbols behavior;
/** The LLk analyzer */
protected LLkGrammarAnalyzer analyzer;
/** Object used to format characters in the target language.
* subclass must initialize this to the language-specific formatter
*/
protected CharFormatter charFormatter;
/** Use option "codeGenDebug" to generate debugging output */
protected boolean DEBUG_CODE_GENERATOR = false;
/** Default values for code-generation thresholds */
protected static final int DEFAULT_MAKE_SWITCH_THRESHOLD = 2;
protected static final int DEFAULT_BITSET_TEST_THRESHOLD = 4;
/** This is a hint for the language-specific code generator.
* A switch() or language-specific equivalent will be generated instead
* of a series of if/else statements for blocks with number of alternates
* greater than or equal to this number of non-predicated LL(1) alternates.
* This is modified by the grammar option "codeGenMakeSwitchThreshold"
*/
protected int makeSwitchThreshold = DEFAULT_MAKE_SWITCH_THRESHOLD;
/** This is a hint for the language-specific code generator.
* A bitset membership test will be generated instead of an
* ORed series of LA(k) comparisions for lookahead sets with
* degree greater than or equal to this value.
* This is modified by the grammar option "codeGenBitsetTestThreshold"
*/
protected int bitsetTestThreshold = DEFAULT_BITSET_TEST_THRESHOLD;
private static boolean OLD_ACTION_TRANSLATOR = true;
public static String TokenTypesFileSuffix = "TokenTypes";
public static String TokenTypesFileExt = ".txt";
/** Construct code generator base class */
public CodeGenerator() {}
/** Output a String to the currentOutput stream.
* Ignored if string is null.
* @param s The string to output
*/
protected void _print(String s) {
if (s != null) {
currentOutput.print(s);
}
}
/** Print an action without leading tabs, attempting to
* preserve the current indentation level for multi-line actions
* Ignored if string is null.
* @param s The action string to output
*/
protected void _printAction(String s) {
if (s == null) {
return;
}
// Skip leading newlines, tabs and spaces
int start = 0;
while (start < s.length() && Character.isSpaceChar(s.charAt(start)) )
{
start++;
}
// Skip leading newlines, tabs and spaces
int end = s.length()-1;
while ( end > start && Character.isSpaceChar(s.charAt(end)) )
{
end--;
}
char c=0;
for (int i = start; i <= end;)
{
c = s.charAt(i);
i++;
boolean newline = false;
switch (c)
{
case '\n':
newline=true;
break;
case '\r':
if ( i<=end && s.charAt(i)=='\n' ) {
i++;
}
newline=true;
break;
default:
currentOutput.print(c);
break;
}
if ( newline ) {
currentOutput.println();
printTabs();
// Absorb leading whitespace
while (i <= end && Character.isSpaceChar(s.charAt(i)) ) {
i++;
}
newline=false;
}
}
currentOutput.println();
}
/** Output a String followed by newline, to the currentOutput stream.
* Ignored if string is null.
* @param s The string to output
*/
protected void _println(String s) {
if (s != null) {
currentOutput.println(s);
}
}
/** Test if a set element array represents a contiguous range.
* @param elems The array of elements representing the set, usually from BitSet.toArray().
* @return true if the elements are a contiguous range (with two or more).
*/
public static boolean elementsAreRange(int[] elems) {
if (elems.length==0) {
return false;
}
int begin = elems[0];
int end = elems[elems.length-1];
if ( elems.length<=2 ) {
// Not enough elements for a range expression
return false;
}
if ( end-begin+1 > elems.length ) {
// The set does not represent a contiguous range
return false;
}
int v = begin+1;
for (int i=1; i<elems.length-1; i++) {
if ( v != elems[i] ) {
// The set does not represent a contiguous range
return false;
}
v++;
}
return true;
}
/** Get the identifier portion of an argument-action token.
* The ID of an action is assumed to be a trailing identifier.
* Specific code-generators may want to override this
* if the language has unusual declaration syntax.
* @param t The action token
* @return A string containing the text of the identifier
*/
protected String extractIdOfAction(Token t) {
return extractIdOfAction(t.getText(), t.getLine());
}
/** Get the identifier portion of an argument-action.
* The ID of an action is assumed to be a trailing identifier.
* Specific code-generators may want to override this
* if the language has unusual declaration syntax.
* @param s The action text
* @param line Line used for error reporting.
* @return A string containing the text of the identifier
*/
protected String extractIdOfAction(String s, int line) {
s = removeAssignmentFromDeclaration(s);
// Search back from the end for a non alphanumeric. That marks the
// beginning of the identifier
for (int i = s.length()-2; i >=0; i--)
{
// TODO: make this work for language-independent identifiers?
if (!Character.isLetterOrDigit(s.charAt(i)) && s.charAt(i) != '_')
{
// Found end of type part
return s.substring(i+1);
}
}
// Something is bogus, but we cannot parse the language-specific
// actions any better. The compiler will have to catch the problem.
tool.warning("Ill-formed action", grammar.getFilename(), line);
return "";
}
/** Get the type string out of an argument-action token.
* The type of an action is assumed to precede a trailing identifier
* Specific code-generators may want to override this
* if the language has unusual declaration syntax.
* @param t The action token
* @return A string containing the text of the type
*/
protected String extractTypeOfAction(Token t) {
return extractTypeOfAction(t.getText(), t.getLine());
}
/** Get the type portion of an argument-action.
* The type of an action is assumed to precede a trailing identifier
* Specific code-generators may want to override this
* if the language has unusual declaration syntax.
* @param s The action text
* @param line Line used for error reporting.
* @return A string containing the text of the type
*/
protected String extractTypeOfAction(String s, int line) {
s = removeAssignmentFromDeclaration(s);
// Search back from the end for a non alphanumeric. That marks the
// beginning of the identifier
for (int i = s.length()-2; i >=0; i--)
{
// TODO: make this work for language-independent identifiers?
if (!Character.isLetterOrDigit(s.charAt(i)) && s.charAt(i) != '_')
{
// Found end of type part
return s.substring(0,i+1);
}
}
// Something is bogus, but we cannot parse the language-specific
// actions any better. The compiler will have to catch the problem.
tool.warning("Ill-formed action", grammar.getFilename(), line);
return "";
}
/** Generate the code for all grammars
*/
public abstract void gen();
/** Generate code for the given grammar element.
* @param action The {...} action to generate
*/
public abstract void gen(ActionElement action);
/** Generate code for the given grammar element.
* @param blk The "x|y|z|..." block to generate
*/
public abstract void gen(AlternativeBlock blk);
/** Generate code for the given grammar element.
* @param end The block-end element to generate. Block-end
* elements are synthesized by the grammar parser to represent
* the end of a block.
*/
public abstract void gen(BlockEndElement end);
/** Generate code for the given grammar element.
* @param atom The character literal reference to generate
*/
public abstract void gen(CharLiteralElement atom);
/** Generate code for the given grammar element.
* @param r The character-range reference to generate
*/
public abstract void gen(CharRangeElement r);
/** Generate the code for a parser */
public abstract void gen(LexerGrammar g) throws IOException;
/** Generate code for the given grammar element.
* @param blk The (...)+ block to generate
*/
public abstract void gen(OneOrMoreBlock blk);
/** Generate the code for a parser */
public abstract void gen(ParserGrammar g) throws IOException;
/** Generate code for the given grammar element.
* @param rr The rule-reference to generate
*/
public abstract void gen(RuleRefElement rr);
/** Generate code for the given grammar element.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -