📄 grammar.java
字号:
antlr.Token label, GrammarAST tokenRef) { Rule r = getRule(ruleName); if ( r!=null ) { if ( type==LEXER && (tokenRef.getType()==ANTLRParser.CHAR_LITERAL|| tokenRef.getType()==ANTLRParser.BLOCK|| tokenRef.getType()==ANTLRParser.NOT|| tokenRef.getType()==ANTLRParser.CHAR_RANGE|| tokenRef.getType()==ANTLRParser.WILDCARD)) { defineLabel(r, label, tokenRef, CHAR_LABEL); } else { defineLabel(r, label, tokenRef, TOKEN_LABEL); } } } public void defineRuleRefLabel(String ruleName, antlr.Token label, GrammarAST ruleRef) { Rule r = getRule(ruleName); if ( r!=null ) { defineLabel(r, label, ruleRef, RULE_LABEL); } } public void defineTokenListLabel(String ruleName, antlr.Token label, GrammarAST element) { Rule r = getRule(ruleName); if ( r!=null ) { defineLabel(r, label, element, TOKEN_LIST_LABEL); } } public void defineRuleListLabel(String ruleName, antlr.Token label, GrammarAST element) { Rule r = getRule(ruleName); if ( r!=null ) { if ( !r.getHasMultipleReturnValues() ) { ErrorManager.grammarError( ErrorManager.MSG_LIST_LABEL_INVALID_UNLESS_RETVAL_STRUCT,this, label,label.getText()); } defineLabel(r, label, element, RULE_LIST_LABEL); } } /** Given a set of all rewrite elements on right of ->, filter for * label types such as Grammar.TOKEN_LABEL, Grammar.TOKEN_LIST_LABEL, ... * Return a displayable token type name computed from the GrammarAST. */ public Set<String> getLabels(Set<GrammarAST> rewriteElements, int labelType) { Set<String> labels = new HashSet<String>(); for (Iterator it = rewriteElements.iterator(); it.hasNext();) { GrammarAST el = (GrammarAST) it.next(); if ( el.getType()==ANTLRParser.LABEL ) { Rule r = getRule(el.enclosingRule); String labelName = el.getText(); LabelElementPair pair = r.getLabel(labelName); // if valid label and type is what we're looking for // and not ref to old value val $rule, add to list if ( pair!=null && pair.type==labelType && !labelName.equals(el.enclosingRule) ) { labels.add(labelName); } } } return labels; } /** Before generating code, we examine all actions that can have * $x.y and $y stuff in them because some code generation depends on * Rule.referencedPredefinedRuleAttributes. I need to remove unused * rule labels for example. */ protected void examineAllExecutableActions() { Collection rules = getRules(); for (Iterator it = rules.iterator(); it.hasNext();) { Rule r = (Rule) it.next(); // walk all actions within the rule elements, args, and exceptions List<GrammarAST> actions = r.getInlineActions(); for (int i = 0; i < actions.size(); i++) { GrammarAST actionAST = (GrammarAST) actions.get(i); ActionAnalysisLexer sniffer = new ActionAnalysisLexer(this, r.name, actionAST); sniffer.analyze(); } // walk any named actions like @init, @after Collection<GrammarAST> namedActions = r.getActions().values(); for (Iterator it2 = namedActions.iterator(); it2.hasNext();) { GrammarAST actionAST = (GrammarAST) it2.next(); ActionAnalysisLexer sniffer = new ActionAnalysisLexer(this, r.name, actionAST); sniffer.analyze(); } } } /** Remove all labels on rule refs whose target rules have no return value. * Do this for all rules in grammar. */ public void checkAllRulesForUselessLabels() { if ( type==LEXER ) { return; } Set rules = nameToRuleMap.keySet(); for (Iterator it = rules.iterator(); it.hasNext();) { String ruleName = (String) it.next(); Rule r = getRule(ruleName); removeUselessLabels(r.getRuleLabels()); removeUselessLabels(r.getRuleListLabels()); } } /** A label on a rule is useless if the rule has no return value, no * tree or template output, and it is not referenced in an action. */ protected void removeUselessLabels(Map ruleToElementLabelPairMap) { if ( ruleToElementLabelPairMap==null ) { return; } Collection labels = ruleToElementLabelPairMap.values(); List kill = new ArrayList(); for (Iterator labelit = labels.iterator(); labelit.hasNext();) { LabelElementPair pair = (LabelElementPair) labelit.next(); Rule refdRule = getRule(pair.elementRef.getText()); if ( refdRule!=null && !refdRule.getHasReturnValue() && !pair.actionReferencesLabel ) { //System.out.println(pair.label.getText()+" is useless"); kill.add(pair.label.getText()); } } for (int i = 0; i < kill.size(); i++) { String labelToKill = (String) kill.get(i); // System.out.println("kill "+labelToKill); ruleToElementLabelPairMap.remove(labelToKill); } } /** Track a rule reference within an outermost alt of a rule. Used * at the moment to decide if $ruleref refers to a unique rule ref in * the alt. Rewrite rules force tracking of all rule AST results. * * This data is also used to verify that all rules have been defined. */ public void altReferencesRule(String ruleName, GrammarAST refAST, int outerAltNum) { Rule r = getRule(ruleName); if ( r==null ) { return; } r.trackRuleReferenceInAlt(refAST, outerAltNum); antlr.Token refToken = refAST.getToken(); if ( !ruleRefs.contains(refToken) ) { ruleRefs.add(refToken); } } /** Track a token reference within an outermost alt of a rule. Used * to decide if $tokenref refers to a unique token ref in * the alt. Does not track literals! * * Rewrite rules force tracking of all tokens. */ public void altReferencesTokenID(String ruleName, GrammarAST refAST, int outerAltNum) { Rule r = getRule(ruleName); if ( r==null ) { return; } r.trackTokenReferenceInAlt(refAST, outerAltNum); if ( !tokenIDRefs.contains(refAST.getToken()) ) { tokenIDRefs.add(refAST.getToken()); } } /** To yield smaller, more readable code, track which rules have their * predefined attributes accessed. If the rule has no user-defined * return values, then don't generate the return value scope classes * etc... Make the rule have void return value. Don't track for lexer * rules. */ public void referenceRuleLabelPredefinedAttribute(String ruleName) { Rule r = getRule(ruleName); if ( r!=null && type!=LEXER ) { // indicate that an action ref'd an attr unless it's in a lexer // so that $ID.text refs don't force lexer rules to define // return values...Token objects are created by the caller instead. r.referencedPredefinedRuleAttributes = true; } } public List checkAllRulesForLeftRecursion() { return sanity.checkAllRulesForLeftRecursion(); } /** Return a list of left-recursive rules; no analysis can be done * successfully on these. Useful to skip these rules then and also * for ANTLRWorks to highlight them. */ public Set getLeftRecursiveRules() { if ( nfa==null ) { createNFAs(); } if ( leftRecursiveRules!=null ) { return leftRecursiveRules; } sanity.checkAllRulesForLeftRecursion(); return leftRecursiveRules; } public void checkRuleReference(GrammarAST refAST, GrammarAST argsAST, String currentRuleName) { sanity.checkRuleReference(refAST, argsAST, currentRuleName); } /** Rules like "a : ;" and "a : {...} ;" should not generate * try/catch blocks for RecognitionException. To detect this * it's probably ok to just look for any reference to an atom * that can match some input. W/o that, the rule is unlikey to have * any else. */ public boolean isEmptyRule(GrammarAST block) { GrammarAST aTokenRefNode = block.findFirstType(ANTLRParser.TOKEN_REF); GrammarAST aStringLiteralRefNode = block.findFirstType(ANTLRParser.STRING_LITERAL); GrammarAST aCharLiteralRefNode = block.findFirstType(ANTLRParser.CHAR_LITERAL); GrammarAST aWildcardRefNode = block.findFirstType(ANTLRParser.WILDCARD); GrammarAST aRuleRefNode = block.findFirstType(ANTLRParser.RULE_REF); if ( aTokenRefNode==null&& aStringLiteralRefNode==null&& aCharLiteralRefNode==null&& aWildcardRefNode==null&& aRuleRefNode==null ) { return true; } return false; } public int getTokenType(String tokenName) { Integer I = null; if ( tokenName.charAt(0)=='\'') { I = (Integer)stringLiteralToTypeMap.get(tokenName); } else { // must be a label like ID I = (Integer)tokenIDToTypeMap.get(tokenName); } int i = (I!=null)?I.intValue():Label.INVALID; //System.out.println("grammar type "+type+" "+tokenName+"->"+i); return i; } /** Get the list of tokens that are IDs like BLOCK and LPAREN */ public Set getTokenIDs() { return tokenIDToTypeMap.keySet(); } /** Return an ordered integer list of token types that have no * corresponding token ID like INT or KEYWORD_BEGIN; for stuff * like 'begin'. */ public Collection getTokenTypesWithoutID() { List types = new ArrayList(); for (int t =Label.MIN_TOKEN_TYPE; t<=getMaxTokenType(); t++) { String name = getTokenDisplayName(t); if ( name.charAt(0)=='\'' ) { types.add(Utils.integer(t)); } } return types; } /** Get a list of all token IDs and literals that have an associated * token type. */ public Set getTokenDisplayNames() { Set names = new HashSet(); for (int t =Label.MIN_TOKEN_TYPE; t <=getMaxTokenType(); t++) { names.add(getTokenDisplayName(t)); } return names; } /** Given a literal like (the 3 char sequence with single quotes) 'a', * return the int value of 'a'. Convert escape sequences here also. * ANTLR's antlr.g parser does not convert escape sequences. * * 11/26/2005: I changed literals to always be '...' even for strings. * This routine still works though. */ public static int getCharValueFromGrammarCharLiteral(String literal) { if ( literal.length()==3 ) { // 'x' return literal.charAt(1); // no escape char } else if ( literal.length() == 4 ) { // '\x' (antlr lexer will catch invalid char) int escChar = literal.charAt(2); int charVal = ANTLRLiteralEscapedCharValue[escChar]; if ( charVal==0 ) { // Unnecessary escapes like '\{' should just yield { return escChar; } return charVal; } else if( literal.length() == 8 ) { // '\u1234' String unicodeChars = literal.substring(3,literal.length()-1); return Integer.parseInt(unicodeChars, 16); } ErrorManager.assertTrue(false, "invalid char literal: "+literal); return -1; } /** ANTLR does not convert escape sequences during the parse phase because * it could not know how to print String/char literals back out when * printing grammars etc... Someone in China might use the real unicode * char in a literal as it will display on their screen; when printing * back out, I could not know whether to display or use a unicode escape. * * This routine converts a string literal with possible escape sequences * into a pure string of 16-bit char values. Escapes and unicode \u0000 * specs are converted to pure chars. return in a buffer; people may * want to walk/manipulate further. * * The NFA construction routine must know the actual char values. */ public static StringBuffer getUnescapedStringFromGrammarStringLiteral(String literal) { //System.out.println("escape: ["+literal+"]"); StringBuffer buf = new StringBuffer(); int last = literal.length()-1; // skip quotes on outside for (int i=1; i<last; i++) { char c = literal.charAt(i); if ( c=='\\' ) { i++; c = literal.charAt(i); if ( Character.toUpperCase(c)=='U' ) { // \u0000 i++; String unicodeChars = literal.substring(i,i+4); // parse the unicode 16 bit hex value int val = Integer.parseInt(unicodeChars, 16); i+=4-1; // loop will inc by 1; only jump 3 then buf.append((char)val); } else { buf.append((char)ANTLRLiteralEscapedCharValue[c]); // normal \x escape } } else { buf.append(c); // simple char x } } //System.out.println("string: ["+buf.toString()+"]"); return buf; } /** Pull your token definitions from an existing grammar in memory. * You must use Grammar() ctor then this method then setGrammarContent() * to make this work. This is useful primarily for testing and * interpreting grammars. Return the max token type found. */ public int importTokenVocabulary(Grammar importFromGr) { Set importedTokenIDs = importFromGr.getTokenIDs(); for (Iterator it = importedTokenIDs.iterator(); it.hasNext();) { String tokenID = (String) it.next(); int tokenType = importFromGr.getTokenType(tokenID); maxTokenType = Math.max(maxTokenType,tokenType); if ( tokenType>=Label.MIN_TOKEN_TYPE ) { //System.out.println("import token from grammar "+tokenID+"="+tokenType); defineToken(tokenID, tokenType); } } return maxTokenType; // return max found } /** Load a vocab file <vocabName>.tokens and return max token type found. */ public int importTokenVocabulary(String vocabName) { File fullFile = getImportedVocabFileName(vocabName); try { FileReader fr = new FileReader(fullFile);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -