📄 antlr.g
字号:
header {/* [The "BSD licence"] Copyright (c) 2005-2006 Terence Parr All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/package org.antlr.tool;import java.util.*;import java.io.*;import org.antlr.analysis.*;import org.antlr.misc.*;import antlr.*;}/** Read in an ANTLR grammar and build an AST. Try not to do * any actions, just build the tree. * * The phases are: * * antlr.g (this file) * assign.types.g * define.g * buildnfa.g * antlr.print.g (optional) * codegen.g * * Terence Parr * University of San Francisco * 2005 */class ANTLRParser extends Parser;options { buildAST = true; exportVocab=ANTLR; ASTLabelType="GrammarAST"; k=2;}tokens { OPTIONS="options"; TOKENS="tokens"; PARSER="parser"; LEXER; RULE; BLOCK; OPTIONAL; CLOSURE; POSITIVE_CLOSURE; SYNPRED; RANGE; CHAR_RANGE; EPSILON; ALT; EOR; EOB; EOA; // end of alt CHARSET; SET; ID; ARG; ARGLIST; RET; LEXER_GRAMMAR; PARSER_GRAMMAR; TREE_GRAMMAR; COMBINED_GRAMMAR; INITACTION; LABEL; // $x used in rewrite rules TEMPLATE; SCOPE="scope"; GATED_SEMPRED; // {p}? => SYN_SEMPRED; // (...) => it's a synpred converted to sempred FRAGMENT="fragment";}{ Grammar grammar = null; protected int gtype = 0; protected String currentRuleName = null; protected GrammarAST currentBlockAST = null; /* this next stuff supports construction of the Tokens artificial rule. I hate having some partial functionality here, I like doing everything in future tree passes, but the Tokens rule is sensitive to filter mode. And if it adds syn preds, future tree passes will need to process the fragments defined in Tokens; a cyclic dependency. As of 1-17-06 then, Tokens is created for lexer grammars in the antlr grammar parser itself. This grammar is also sensitive to the backtrack grammar option that tells ANTLR to automatically backtrack when it can't compute a DFA. 7-2-06 I moved all option processing to antlr.g from define.g as I need backtrack option etc... for blocks. Got messy. */ protected List lexerRuleNames = new ArrayList(); public List getLexerRuleNames() { return lexerRuleNames; } protected GrammarAST setToBlockWithSet(GrammarAST b) { GrammarAST alt = #(#[ALT,"ALT"],#b,#[EOA,"<end-of-alt>"]); prefixWithSynPred(alt); return #(#[BLOCK,"BLOCK"], alt, #[EOB,"<end-of-block>"] ); } /** Create a copy of the alt and make it into a BLOCK; all actions, * labels, tree operators, rewrites are removed. */ protected GrammarAST createBlockFromDupAlt(GrammarAST alt) { //GrammarAST nalt = (GrammarAST)astFactory.dupTree(alt); GrammarAST nalt = GrammarAST.dupTreeNoActions(alt, null); GrammarAST blk = #(#[BLOCK,"BLOCK"], nalt, #[EOB,"<end-of-block>"] ); return blk; } /** Rewrite alt to have a synpred as first element; * (xxx)=>xxx * but only if they didn't specify one manually. */ protected void prefixWithSynPred(GrammarAST alt) { // if they want backtracking and it's not a lexer rule in combined grammar String autoBacktrack = (String)currentBlockAST.getOption("backtrack"); if ( autoBacktrack==null ) { autoBacktrack = (String)grammar.getOption("backtrack"); } if ( autoBacktrack!=null&&autoBacktrack.equals("true") && !(gtype==COMBINED_GRAMMAR && Character.isUpperCase(currentRuleName.charAt(0))) && alt.getFirstChild().getType()!=SYN_SEMPRED ) { // duplicate alt and make a synpred block around that dup'd alt GrammarAST synpredBlockAST = createBlockFromDupAlt(alt); // Create a SYN_SEMPRED node as if user had typed this in // Effectively we replace (xxx)=>xxx with {synpredxxx}? xxx GrammarAST synpredAST = createSynSemPredFromBlock(synpredBlockAST); // insert SYN_SEMPRED as first element of alt synpredAST.getLastSibling().setNextSibling(alt.getFirstChild()); alt.setFirstChild(synpredAST); } } protected GrammarAST createSynSemPredFromBlock(GrammarAST synpredBlockAST) { // add grammar fragment to a list so we can make fake rules for them // later. String predName = grammar.defineSyntacticPredicate(synpredBlockAST,currentRuleName); // convert (alpha)=> into {synpredN}? where N is some pred count // during code gen we convert to function call with templates String synpredinvoke = predName; GrammarAST p = #[SYN_SEMPRED,synpredinvoke]; p.setEnclosingRule(currentRuleName); // track how many decisions have synpreds grammar.blocksWithSynPreds.add(currentBlockAST); return p; } public GrammarAST createSimpleRuleAST(String name, GrammarAST block, boolean fragment) { GrammarAST modifier = null; if ( fragment ) { modifier = #[FRAGMENT,"fragment"]; } GrammarAST EORAST = #[EOR,"<end-of-rule>"]; GrammarAST EOBAST = block.getLastChild(); EORAST.setLine(EOBAST.getLine()); EORAST.setColumn(EOBAST.getColumn()); GrammarAST ruleAST = #([RULE,"rule"], [ID,name],modifier,[ARG,"ARG"],[RET,"RET"], [SCOPE,"scope"],block,EORAST); ruleAST.setLine(block.getLine()); ruleAST.setColumn(block.getColumn()); return ruleAST; } public void reportError(RecognitionException ex) { Token token = null; try { token = LT(1); } catch (TokenStreamException tse) { ErrorManager.internalError("can't get token???", tse); } ErrorManager.syntaxError( ErrorManager.MSG_SYNTAX_ERROR, grammar, token, "antlr: "+ex.toString(), ex); } public void cleanup(GrammarAST root) { if ( gtype==LEXER_GRAMMAR ) { String filter = (String)grammar.getOption("filter"); GrammarAST tokensRuleAST = grammar.addArtificialMatchTokensRule( root, lexerRuleNames, filter!=null&&filter.equals("true")); } }}grammar![Grammar g]{ this.grammar = g; GrammarAST opt=null; Token optionsStartToken = null; Map opts;} : //hdr:headerSpec ( ACTION )? ( cmt:DOC_COMMENT )? gr:grammarType gid:id SEMI ( {optionsStartToken=LT(1);} opts=optionsSpec {grammar.setOptions(opts, optionsStartToken);} {opt=(GrammarAST)returnAST;} )? (ts:tokensSpec!)? scopes:attrScopes (a:actions)? r:rules EOF { #grammar = #(null, #(#gr, #gid, #cmt, opt, #ts, #scopes, #a, #r)); cleanup(#grammar); } ;grammarType : ( "lexer"! {gtype=LEXER_GRAMMAR;} // pure lexer | "parser"! {gtype=PARSER_GRAMMAR;} // pure parser | "tree"! {gtype=TREE_GRAMMAR;} // a tree parser | {gtype=COMBINED_GRAMMAR;} // merged parser/lexer ) gr:"grammar" {#gr.setType(gtype);} ;actions : (action)+ ;/** Match stuff like @parser::members {int i;} */action : AMPERSAND^ (actionScopeName COLON! COLON!)? id ACTION ;/** Sometimes the scope names will collide with keywords; allow them as * ids for action scopes. */actionScopeName : id | l:"lexer" {#l.setType(ID);} | p:"parser" {#p.setType(ID);} ;/*optionsSpec returns [Map opts=new HashMap()] : #( OPTIONS (option[opts])+ ) ;option[Map opts]{ String key=null; Object value=null;} : #( ASSIGN id:ID {key=#id.getText();} value=optionValue ) {opts.put(key,value);} ;*/optionsSpec returns [Map opts=new HashMap()] : OPTIONS^ (option[opts] SEMI!)+ RCURLY! ;option[Map opts]{ Object value=null;} : o:id ASSIGN^ value=optionValue { opts.put(#o.getText(), value); } /* { if ( #o.getText().equals("filter") && #v.getText().equals("true") ) { isFilterMode = true; } else if ( #o.getText().equals("backtrack") && #v.getText().equals("true") ) { if ( currentRuleName==null ) { // must grammar level isAutoBacktrackMode = true; } else { blockAutoBacktrackMode = true; } } } */ ;optionValue returns [Object value=null] : x:id {value = #x.getText();} | s:STRING_LITERAL {String vs = #s.getText(); value=vs.substring(1,vs.length()-1);} | c:CHAR_LITERAL {String vs = #c.getText(); value=vs.substring(1,vs.length()-1);} | i:INT {value = new Integer(#i.getText());} | ss:STAR {#ss.setType(STRING_LITERAL); value = "*";} // used for k=*// | cs:charSet {value = #cs;} // return set AST in this case ;/*optionValue : id | STRING_LITERAL | CHAR_LITERAL | INT// | cs:charSet {value = #cs;} // return set AST in this case ;*//*will probably need for char vocab spec latercharSet : LPAREN^ {#LPAREN.setType(CHARSET);} charSetElement ( OR^ charSetElement )* RPAREN! ;charSetElement : c1:CHAR_LITERAL | c2:CHAR_LITERAL RANGE^ c3:CHAR_LITERAL ;*/tokensSpec : TOKENS^ ( tokenSpec )+ RCURLY! ;tokenSpec : TOKEN_REF ( ASSIGN^ (STRING_LITERAL|CHAR_LITERAL) )? SEMI! ;attrScopes : (attrScope)* ;attrScope : "scope"^ id ACTION ;rules : ( options { // limitation of appox LL(k) says ambig upon // DOC_COMMENT TOKEN_REF, but that's an impossible sequence warnWhenFollowAmbig=false; } : //{g.type==PARSER}? (aliasLexerRule)=>aliasLexerRule | rule )+ ;rule!{GrammarAST modifier=null, blk=null, blkRoot=null, eob=null;int start = ((TokenWithIndex)LT(1)).getIndex();int startLine = LT(1).getLine();GrammarAST opt = null;Map opts = null;} : ( d:DOC_COMMENT )? ( p1:"protected" {modifier=#p1;} | p2:"public" {modifier=#p2;} | p3:"private" {modifier=#p3;} | p4:"fragment" {modifier=#p4;} )? ruleName:id {currentRuleName=#ruleName.getText(); if ( gtype==LEXER_GRAMMAR && #p4==null ) { lexerRuleNames.add(currentRuleName); } } ( BANG )? ( aa:ARG_ACTION )? ( "returns" rt:ARG_ACTION )? ( throwsSpec )? ( opts=optionsSpec {opt=(GrammarAST)returnAST;} )? scopes:ruleScopeSpec (a:ruleActions)? colon:COLON { blkRoot = #[BLOCK,"BLOCK"]; blkRoot.options = opts; blkRoot.setLine(colon.getLine()); blkRoot.setColumn(colon.getColumn()); eob = #[EOB,"<end-of-block>"]; } ( (setNoParens SEMI) => s:setNoParens // try to collapse sets { blk = #(blkRoot,#(#[ALT,"ALT"],#s,#[EOA,"<end-of-alt>"]),eob); } | b:altList[opts] {blk = #b;} ) semi:SEMI ( ex:exceptionGroup )? { int stop = ((TokenWithIndex)LT(1)).getIndex()-1; // point at the semi or exception thingie eob.setLine(semi.getLine()); eob.setColumn(semi.getColumn()); GrammarAST eor = #[EOR,"<end-of-rule>"]; eor.setEnclosingRule(#ruleName.getText()); eor.setLine(semi.getLine()); eor.setColumn(semi.getColumn()); GrammarAST root = #[RULE,"rule"]; root.ruleStartTokenIndex = start; root.ruleStopTokenIndex = stop; root.setLine(startLine); root.options = opts; #rule = #(root, #ruleName,modifier,#(#[ARG,"ARG"],#aa),#(#[RET,"RET"],#rt), opt,#scopes,#a,blk,ex,eor); currentRuleName=null; } ;ruleActions : (ruleAction)+ ;/** Match stuff like @init {int i;} */ruleAction : AMPERSAND^ id ACTION ;throwsSpec : "throws" id ( COMMA id )* ;ruleScopeSpec{int line = LT(1).getLine();int column = LT(1).getColumn();} :! ( options {warnWhenFollowAmbig=false;} : "scope" a:ACTION )? ( "scope" ids:idList SEMI! )* { GrammarAST scopeRoot = (GrammarAST)#[SCOPE,"scope"]; scopeRoot.setLine(line); scopeRoot.setColumn(column); #ruleScopeSpec = #(scopeRoot, #a, #ids); } ;/** Build #(BLOCK ( #(ALT ...) EOB )+ ) */block{GrammarAST save = currentBlockAST;Map opts=null;} : (set) => s:set // special block like ('a'|'b'|'0'..'9') | lp:LPAREN^ {#lp.setType(BLOCK); #lp.setText("BLOCK");} ( // 2nd alt and optional branch ambig due to // linear approx LL(2) issue. COLON ACTION // matched correctly in 2nd alt. options { warnWhenFollowAmbig = false; } : (opts=optionsSpec {#block.setOptions(grammar,opts);})? ( ruleActions )? COLON! | ACTION COLON! )? {currentBlockAST = #lp;} a1:alternative rewrite {if (LA(1)==OR||(LA(2)==QUESTION||LA(2)==PLUS||LA(2)==STAR)) prefixWithSynPred(#a1);} ( OR! a2:alternative rewrite {if (LA(1)==OR||(LA(2)==QUESTION||LA(2)==PLUS||LA(2)==STAR)) prefixWithSynPred(#a2);} )* rp:RPAREN! { currentBlockAST = save; GrammarAST eob = #[EOB,"<end-of-block>"]; eob.setLine(rp.getLine()); eob.setColumn(rp.getColumn()); #block.addChild(eob); } ;altList[Map opts]{ GrammarAST blkRoot = #[BLOCK,"BLOCK"]; blkRoot.options = opts; blkRoot.setLine(LT(1).getLine()); blkRoot.setColumn(LT(1).getColumn()); GrammarAST save = currentBlockAST; currentBlockAST = #blkRoot;} : a1:alternative rewrite {if (LA(1)==OR||(LA(2)==QUESTION||LA(2)==PLUS||LA(2)==STAR)) prefixWithSynPred(#a1);} ( OR! a2:alternative rewrite {if (LA(1)==OR||(LA(2)==QUESTION||LA(2)==PLUS||LA(2)==STAR)) prefixWithSynPred(#a2);} )* { #altList = #(blkRoot,#altList,#[EOB,"<end-of-block>"]); currentBlockAST = save; } ;alternative{ GrammarAST eoa = #[EOA, "<end-of-alt>"]; GrammarAST altRoot = #[ALT,"ALT"]; altRoot.setLine(LT(1).getLine()); altRoot.setColumn(LT(1).getColumn());} : ( el:element )+ ( exceptionSpecNoLabel! )? { if ( #alternative==null ) { #alternative = #(altRoot,#[EPSILON,"epsilon"],eoa); } else { // we have a real list of stuff #alternative = #(altRoot, #alternative, eoa); } } | { GrammarAST eps = #[EPSILON,"epsilon"]; eps.setLine(LT(0).getLine()); // get line/col of '|' or ':' (prev token) eps.setColumn(LT(0).getColumn()); #alternative = #(altRoot,eps,eoa); } ;exceptionGroup : ( exceptionSpec )+ ;exceptionSpec : "exception"^ ( ARG_ACTION )? ( exceptionHandler )* ;exceptionSpecNoLabel : "exception" ( exceptionHandler )* ;exceptionHandler : "catch"^ ARG_ACTION ACTION ;element : elementNoOptionSpec //(elementOptionSpec!)? ;elementOptionSpec : OPEN_ELEMENT_OPTION id ASSIGN optionValue ( SEMI id ASSIGN optionValue )* CLOSE_ELEMENT_OPTION ;elementNoOptionSpec{ IntSet elements=null;} : (id ASSIGN^)? ( range | terminal | notSet | ebnf ) | id PLUS_ASSIGN^ ( terminal | notSet | ebnf ) | a:ACTION | p:SEMPRED ( IMPLIES! {#p.setType(GATED_SEMPRED);} )? { #p.setEnclosingRule(currentRuleName); grammar.blocksWithSemPreds.add(currentBlockAST); } | t3:tree ;notSet{ int line = LT(1).getLine(); int col = LT(1).getColumn(); GrammarAST subrule=null;} : n:NOT^ ( notTerminal | // special case: single element is not a set (LPAREN setElement RPAREN)=> LPAREN! setElement RPAREN! | set ) ( subrule=ebnfSuffix[#n,false] {#notSet = subrule;} )? {#notSet.setLine(line); #notSet.setColumn(col);} ;/** Match two or more set elements */set : LPAREN! s:setNoParens RPAREN! ( ast:ast_suffix! {#s.addChild(#ast);} )? ;setNoParens
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -