📄 grammar.java
字号:
/** Track names of preds so we can avoid generating preds that aren't used * Computed during NFA to DFA conversion. Just walk accept states * and look for synpreds because that is the only state target whose * incident edges can have synpreds. Same is try for * decisionsWhoseDFAsUsesSynPreds. */ public Set<String> synPredNamesUsedInDFA = new HashSet(); /** Track decisions with syn preds specified for reporting. * This is the a set of BLOCK type AST nodes. */ public Set<GrammarAST> blocksWithSemPreds = new HashSet(); /** Track decisions that actually use the syn preds in the DFA. Set<DFA> */ public Set decisionsWhoseDFAsUsesSemPreds = new HashSet(); protected boolean allDecisionDFACreated = false; /** We need a way to detect when a lexer grammar is autogenerated from * another grammar or we are just sending in a string representing a * grammar. We don't want to generate a .tokens file, for example, * in such cases. */ protected boolean builtFromString = false; /** Factored out the sanity checking code; delegate to it. */ GrammarSanity sanity = new GrammarSanity(this); public Grammar() { initTokenSymbolTables(); builtFromString = true; } public Grammar(String grammarString) throws antlr.RecognitionException, antlr.TokenStreamException { builtFromString = true; initTokenSymbolTables(); setFileName("<string>"); setGrammarContent(new StringReader(grammarString)); } public Grammar(String fileName, String grammarString) throws antlr.RecognitionException, antlr.TokenStreamException { this(null, fileName, new StringReader(grammarString)); } /** Create a grammar from a Reader. Parse the grammar, building a tree * and loading a symbol table of sorts here in Grammar. Then create * an NFA and associated factory. Walk the AST representing the grammar, * building the state clusters of the NFA. */ public Grammar(Tool tool, String fileName, Reader r) throws antlr.RecognitionException, antlr.TokenStreamException { initTokenSymbolTables(); setTool(tool); setFileName(fileName); setGrammarContent(r); } public void setFileName(String fileName) { this.fileName = fileName; } public String getFileName() { return fileName; } public void setName(String name) { if ( name==null ) { return; } // don't error check autogenerated files (those with '__' in them) String saneFile = fileName.replace('\\', '/'); int lastSlash = saneFile.lastIndexOf('/'); String onlyFileName = saneFile.substring(lastSlash+1, fileName.length()); if ( !builtFromString ) { int lastDot = onlyFileName.lastIndexOf('.'); String onlyFileNameNoSuffix = null; if ( lastDot < 0 ) { ErrorManager.error(ErrorManager.MSG_FILENAME_EXTENSION_ERROR, fileName); onlyFileNameNoSuffix = onlyFileName+GRAMMAR_FILE_EXTENSION; } else { onlyFileNameNoSuffix = onlyFileName.substring(0,lastDot); } if ( !name.equals(onlyFileNameNoSuffix) ) { ErrorManager.error(ErrorManager.MSG_FILE_AND_GRAMMAR_NAME_DIFFER, name, fileName); } } this.name = name; } public void setGrammarContent(String grammarString) throws antlr.RecognitionException, antlr.TokenStreamException { setGrammarContent(new StringReader(grammarString)); } public void setGrammarContent(Reader r) throws antlr.RecognitionException, antlr.TokenStreamException { ErrorManager.resetErrorState(); // reset in case > 1 grammar in same thread // BUILD AST FROM GRAMMAR ANTLRLexer lexer = new ANTLRLexer(r); lexer.setFilename(this.getFileName()); // use the rewrite engine because we want to buffer up all tokens // in case they have a merged lexer/parser, send lexer rules to // new grammar. lexer.setTokenObjectClass("antlr.TokenWithIndex"); tokenBuffer = new TokenStreamRewriteEngine(lexer); tokenBuffer.discard(ANTLRParser.WS); tokenBuffer.discard(ANTLRParser.ML_COMMENT); tokenBuffer.discard(ANTLRParser.COMMENT); tokenBuffer.discard(ANTLRParser.SL_COMMENT); ANTLRParser parser = new ANTLRParser(tokenBuffer); parser.getASTFactory().setASTNodeClass(GrammarAST.class); parser.setFilename(this.getFileName()); parser.setASTNodeClass("org.antlr.tool.GrammarAST"); parser.grammar(this); grammarTree = (GrammarAST)parser.getAST(); setFileName(lexer.getFilename()); // the lexer #src might change name if ( grammarTree.findFirstType(ANTLRParser.RULE)==null ) { ErrorManager.error(ErrorManager.MSG_NO_RULES, getFileName()); return; } // Get syn pred rules and add to existing tree List synpredRules = getArtificialRulesForSyntacticPredicates(parser, nameToSynpredASTMap); for (int i = 0; i < synpredRules.size(); i++) { GrammarAST rAST = (GrammarAST) synpredRules.get(i); grammarTree.addChild(rAST); } if ( Tool.internalOption_PrintGrammarTree ) { System.out.println(grammarTree.toStringList()); } // ASSIGN TOKEN TYPES //System.out.println("### assign types"); AssignTokenTypesWalker ttypesWalker = new AssignTokenTypesWalker(); ttypesWalker.setASTNodeClass("org.antlr.tool.GrammarAST"); try { ttypesWalker.grammar(grammarTree, this); } catch (RecognitionException re) { ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE, re); } // DEFINE RULES //System.out.println("### define rules"); DefineGrammarItemsWalker defineItemsWalker = new DefineGrammarItemsWalker(); defineItemsWalker.setASTNodeClass("org.antlr.tool.GrammarAST"); try { defineItemsWalker.grammar(grammarTree, this); } catch (RecognitionException re) { ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE, re); } // ANALYZE ACTIONS, LOOKING FOR LABEL AND ATTR REFS examineAllExecutableActions(); checkAllRulesForUselessLabels(); nameSpaceChecker.checkConflicts(); } /** If the grammar is a merged grammar, return the text of the implicit * lexer grammar. */ public String getLexerGrammar() { if ( lexerGrammarST.getAttribute("literals")==null && lexerGrammarST.getAttribute("rules")==null ) { // if no rules, return nothing return null; } lexerGrammarST.setAttribute("name", name); // if there are any actions set for lexer, pass them in if ( actions.get("lexer")!=null ) { lexerGrammarST.setAttribute("actionNames", ((Map)actions.get("lexer")).keySet()); lexerGrammarST.setAttribute("actions", ((Map)actions.get("lexer")).values()); } // make sure generated grammar has the same options if ( options!=null ) { Iterator optionNames = options.keySet().iterator(); while (optionNames.hasNext()) { String optionName = (String) optionNames.next(); if ( !doNotCopyOptionsToLexer.contains(optionName) ) { Object value = options.get(optionName); lexerGrammarST.setAttribute("options.{name,value}", optionName, value); } } } return lexerGrammarST.toString(); } public String getImplicitlyGeneratedLexerFileName() { return name+ IGNORE_STRING_IN_GRAMMAR_FILE_NAME + LEXER_GRAMMAR_FILE_EXTENSION; } public File getImportedVocabFileName(String vocabName) { return new File(tool.getLibraryDirectory(), File.separator+ vocabName+ CodeGenerator.VOCAB_FILE_EXTENSION); } /** Parse a rule we add artificially that is a list of the other lexer * rules like this: "Tokens : ID | INT | SEMI ;" nextToken() will invoke * this to set the current token. Add char literals before * the rule references. * * If in filter mode, we want every alt to backtrack and we need to * do k=1 to force the "first token def wins" rule. Otherwise, the * longest-match rule comes into play with LL(*). * * The ANTLRParser antlr.g file now invokes this when parsing a lexer * grammar, which I think is proper even though it peeks at the info * that later phases will compute. It gets a list of lexer rules * and builds a string representing the rule; then it creates a parser * and adds the resulting tree to the grammar's tree. */ public GrammarAST addArtificialMatchTokensRule(GrammarAST grammarAST, List ruleNames, boolean filterMode) { StringTemplate matchTokenRuleST = null; if ( filterMode ) { matchTokenRuleST = new StringTemplate( ARTIFICIAL_TOKENS_RULENAME+ " options {k=1; backtrack=true;} : <rules; separator=\"|\">;", AngleBracketTemplateLexer.class); } else { matchTokenRuleST = new StringTemplate( ARTIFICIAL_TOKENS_RULENAME+" : <rules; separator=\"|\">;", AngleBracketTemplateLexer.class); } // Now add token rule references for (int i = 0; i < ruleNames.size(); i++) { String rname = (String) ruleNames.get(i); matchTokenRuleST.setAttribute("rules", rname); } //System.out.println("tokens rule: "+matchTokenRuleST.toString()); ANTLRLexer lexer = new ANTLRLexer(new StringReader(matchTokenRuleST.toString())); lexer.setTokenObjectClass("antlr.TokenWithIndex"); TokenStreamRewriteEngine tokbuf = new TokenStreamRewriteEngine(lexer); tokbuf.discard(ANTLRParser.WS); tokbuf.discard(ANTLRParser.ML_COMMENT); tokbuf.discard(ANTLRParser.COMMENT); tokbuf.discard(ANTLRParser.SL_COMMENT); ANTLRParser parser = new ANTLRParser(tokbuf); parser.grammar = this; parser.gtype = ANTLRParser.LEXER_GRAMMAR; parser.setASTNodeClass("org.antlr.tool.GrammarAST"); try { parser.rule(); if ( Tool.internalOption_PrintGrammarTree ) { System.out.println("Tokens rule: "+parser.getAST().toStringTree()); } GrammarAST p = grammarAST; while ( p.getType()!=ANTLRParser.LEXER_GRAMMAR ) { p = (GrammarAST)p.getNextSibling(); } p.addChild(parser.getAST()); } catch (Exception e) { ErrorManager.error(ErrorManager.MSG_ERROR_CREATING_ARTIFICIAL_RULE, e); } return (GrammarAST)parser.getAST(); } /** for any syntactic predicates, we need to define rules for them; they will get * defined automatically like any other rule. :) */ protected List getArtificialRulesForSyntacticPredicates(ANTLRParser parser, LinkedHashMap nameToSynpredASTMap) { List rules = new ArrayList(); if ( nameToSynpredASTMap==null ) { return rules; } Set predNames = nameToSynpredASTMap.keySet(); boolean isLexer = grammarTree.getType()==ANTLRParser.LEXER_GRAMMAR; for (Iterator it = predNames.iterator(); it.hasNext();) { String synpredName = (String)it.next(); GrammarAST fragmentAST = (GrammarAST) nameToSynpredASTMap.get(synpredName); GrammarAST ruleAST = parser.createSimpleRuleAST(synpredName, fragmentAST, isLexer); rules.add(ruleAST); } return rules; } protected void initTokenSymbolTables() { // the faux token types take first NUM_FAUX_LABELS positions // then we must have room for the predefined runtime token types // like DOWN/UP used for tree parsing. typeToTokenList.setSize(Label.NUM_FAUX_LABELS+Label.MIN_TOKEN_TYPE-1); typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.INVALID, "<INVALID>"); typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOT, "<EOT>"); typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.SEMPRED, "<SEMPRED>"); typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.SET, "<SET>"); typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EPSILON, Label.EPSILON_STR); typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOF, "EOF"); typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOR_TOKEN_TYPE-1, "<EOR>"); typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.DOWN-1, "DOWN"); typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.UP-1, "UP"); tokenIDToTypeMap.put("<INVALID>", Utils.integer(Label.INVALID)); tokenIDToTypeMap.put("<EOT>", Utils.integer(Label.EOT)); tokenIDToTypeMap.put("<SEMPRED>", Utils.integer(Label.SEMPRED)); tokenIDToTypeMap.put("<SET>", Utils.integer(Label.SET)); tokenIDToTypeMap.put("<EPSILON>", Utils.integer(Label.EPSILON)); tokenIDToTypeMap.put("EOF", Utils.integer(Label.EOF)); tokenIDToTypeMap.put("<EOR>", Utils.integer(Label.EOR_TOKEN_TYPE)); tokenIDToTypeMap.put("DOWN", Utils.integer(Label.DOWN)); tokenIDToTypeMap.put("UP", Utils.integer(Label.UP)); } /** Walk the list of options, altering this Grammar object according * to any I recognize. protected void processOptions() { Iterator optionNames = options.keySet().iterator(); while (optionNames.hasNext()) { String optionName = (String) optionNames.next(); Object value = options.get(optionName); if ( optionName.equals("tokenVocab") ) { } } } */ public void createNFAs() { //System.out.println("### create NFAs"); if ( nfa!=null ) { // don't let it create more than once; has side-effects return; } if ( getRules().size()==0 ) { return; } nfa = new NFA(this); // create NFA that TreeToNFAConverter'll fill in NFAFactory factory = new NFAFactory(nfa); TreeToNFAConverter nfaBuilder = new TreeToNFAConverter(this, nfa, factory); try { nfaBuilder.grammar(grammarTree); } catch (RecognitionException re) { ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE, name, re); } //System.out.println("NFA has "+factory.getNumberOfStates()+" states"); } /** For each decision in this grammar, compute a single DFA using the * NFA states associated with the decision. The DFA construction * determines whether or not the alternatives in the decision are * separable using a regular lookahead language. * * Store the lookahead DFAs in the AST created from the user's grammar * so the code generator or whoever can easily access it. * * This is a separate method because you might want to create a * Grammar without doing the expensive analysis. */ public void createLookaheadDFAs() { if ( nfa==null ) { createNFAs(); } long start = System.currentTimeMillis(); //System.out.println("### create DFAs"); int numDecisions = getNumberOfDecisions();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -