📄 antlr3.py
字号:
def get(self, i): """ Return absolute token i; ignore which channel the tokens are on; that is, count all tokens not just on-channel tokens. """ return self.tokens[i] def LA(self, i): return self.LT(i).type def mark(self): self.lastMarker = self.index() return self.lastMarker def release(self, marker=None): # no resources to release pass def size(self): return len(self.tokens) def index(self): return self.p def rewind(self, marker=None): if marker is None: marker = self.lastMarker self.seek(marker) def seek(self, index): self.p = index def getTokenSource(self): return self.tokenSource def toString(self, start=None, stop=None): if self.p == -1: self.fillBuffer() if start is None: start = 0 elif not isinstance(start, int): start = start.index if stop is None: stop = len(self.tokens) - 1 elif not isinstance(stop, int): stop = stop.index if stop >= len(self.tokens): stop = len(self.tokens) - 1 return ''.join([t.text for t in self.tokens[start:stop+1]])class RewriteOperation(object): """@brief Helper class used by TokenRewriteStream""" def __init__(self, index, text): self.index = index self.text = text def execute(self, buf): """Execute the rewrite operation by possibly adding to the buffer. Return the index of the next token to operate on. """ return self.index def toString(self): opName = self.__class__.__name__ return opName+"@"+self.index+'"'+self.text+'"' __str__ = toStringclass InsertBeforeOp(RewriteOperation): """@brief Helper class used by TokenRewriteStream""" def execute(self, buf): buf.write(self.text) return self.indexclass ReplaceOp(RewriteOperation): """ @brief Helper class used by TokenRewriteStream I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp instructions. """ def __init__(self, first, last, text): RewriteOperation.__init__(self, first, text) self.lastIndex = last def execute(self, buf): if self.text is not None: buf.write(self.text) return self.lastIndex + 1class TokenRewriteStream(CommonTokenStream): """Useful for dumping out the input stream after doing some augmentation or other manipulations. You can insert stuff, replace, and delete chunks. Note that the operations are done lazily--only if you convert the buffer to a String. This is very efficient because you are not moving data around all the time. As the buffer of tokens is converted to strings, the toString() method(s) check to see if there is an operation at the current index. If so, the operation is done and then normal String rendering continues on the buffer. This is like having multiple Turing machine instruction streams (programs) operating on a single input tape. :) Since the operations are done lazily at toString-time, operations do not screw up the token index values. That is, an insert operation at token index i does not change the index values for tokens i+1..n-1. Because operations never actually alter the buffer, you may always get the original token stream back without undoing anything. Since the instructions are queued up, you can easily simulate transactions and roll back any changes if there is an error just by removing instructions. For example, CharStream input = new ANTLRFileStream("input"); TLexer lex = new TLexer(input); TokenRewriteStream tokens = new TokenRewriteStream(lex); T parser = new T(tokens); parser.startRule(); Then in the rules, you can execute Token t,u; ... input.insertAfter(t, "text to put after t");} input.insertAfter(u, "text after u");} System.out.println(tokens.toString()); Actually, you have to cast the 'input' to a TokenRewriteStream. :( You can also have multiple "instruction streams" and get multiple rewrites from a single pass over the input. Just name the instruction streams and use that name again when printing the buffer. This could be useful for generating a C file and also its header file--all from the same buffer: tokens.insertAfter("pass1", t, "text to put after t");} tokens.insertAfter("pass2", u, "text after u");} System.out.println(tokens.toString("pass1")); System.out.println(tokens.toString("pass2")); If you don't use named rewrite streams, a "default" stream is used as the first example shows. """ DEFAULT_PROGRAM_NAME = "default" MIN_TOKEN_INDEX = 0 def __init__(self, tokenSource=None, channel=DEFAULT_CHANNEL): CommonTokenStream.__init__(self, tokenSource, channel) # You may have multiple, named streams of rewrite operations. # I'm calling these things "programs." # Maps String (name) -> rewrite (List) self.programs = {} self.programs[self.DEFAULT_PROGRAM_NAME] = [] # Map String (program name) -> Integer index self.lastRewriteTokenIndexes = {} def rollback(self, *args): """ Rollback the instruction stream for a program so that the indicated instruction (via instructionIndex) is no longer in the stream. UNTESTED! """ if len(args) == 2: programName = args[0] instructionIndex = args[1] elif len(args) == 1: programName = self.DEFAULT_PROGRAM_NAME instructionIndex = args[0] else: raise TypeError("Invalid arguments") p = self.programs.get(programName, None) if p is not None: self.programs[programName] = p[self.MIN_TOKEN_INDEX:instructionIndex] def deleteProgram(self, programName=DEFAULT_PROGRAM_NAME): """Reset the program so that no instructions exist""" self.rollback(programName, self.MIN_TOKEN_INDEX) def addToSortedRewriteList(self, *args): """ Add an instruction to the rewrite instruction list ordered by the instruction number (do not use a binary search for bad efficiency). The list is ordered so that toString() can be done efficiently. When there are multiple instructions at the same index, the instructions must be ordered to ensure proper behavior. For example, a delete at index i must kill any replace operation at i. Insert-before operations must come before any replace / delete instructions. If there are multiple insert instructions for a single index, they are done in reverse insertion order so that "insert foo" then "insert bar" yields "foobar" in front rather than "barfoo". This is convenient because I can insert new InsertOp instructions at the index returned by the binary search. A ReplaceOp kills any previous replace op. Since delete is the same as replace with null text, i can check for ReplaceOp and cover DeleteOp at same time. :) """ if len(args) == 2: programName = args[0] op = args[1] elif len(args) == 1: programName = self.DEFAULT_PROGRAM_NAME op = args[0] else: raise TypeError("Invalid arguments") rewrites = self.getProgram(programName) #System.out.println("### add "+op+"; rewrites="+rewrites) # first insert position for operation for pos, searchOp in enumerate(rewrites): if searchOp.index == op.index: # now pos is the index in rewrites of first op with op.index #System.out.println("first op with op.index: pos="+pos) # an instruction operating already on that index was found; # make this one happen after all the others #System.out.println("found instr for index="+op.index) if isinstance(op, ReplaceOp): replaced = False i = pos # look for an existing replace while i < len(rewrites): prevOp = rewrites[pos] if prevOp.index != op.index: break if isinstance(prevOp, ReplaceOp): rewrites[pos] = op # replace old with new replaced = True break # keep going; must be an insert i += 1 if not replaced: # add replace op to the end of all the inserts rewrites.insert(i, op) else: # inserts are added in front of existing inserts rewrites.insert(pos, op) break elif searchOp.index > op.index: #System.out.println("no instruction at pos=="+pos) rewrites.insert(pos, op) break else: # new op is past any existing op, append to end rewrites.append(op) #System.out.println("after, rewrites="+rewrites) def insertAfter(self, *args): if len(args) == 2: programName = self.DEFAULT_PROGRAM_NAME index = args[0] text = args[1] elif len(args) == 3: programName = args[0] index = args[1] text = args[2] else: raise TypeError("Invalid arguments") if isinstance(index, Token): # index is a Token, grap the stream index from it index = index.index # to insert after, just insert before next index (even if past end) self.insertBefore(programName, index+1, text) def insertBefore(self, *args): if len(args) == 2: programName = self.DEFAULT_PROGRAM_NAME index = args[0] text = args[1] elif len(args) == 3: programName = args[0] index = args[1] text = args[2] else: raise TypeError("Invalid arguments") if isinstance(index, Token): # index is a Token, grap the stream index from it index = index.index self.addToSortedRewriteList( programName, InsertBeforeOp(index, text) ) def replace(self, *args): if len(args) == 2: programName = self.DEFAULT_PROGRAM_NAME first = args[0] last = args[0] text = args[1] elif len(args) == 3: programName = self.DEFAULT_PROGRAM_NAME first = args[0] last = args[1] text = args[2] elif len(args) == 4: programName = args[0] first = args[1] last = args[2] text = args[3] else: raise TypeError("Invalid arguments") if isinstance(first, Token): # first is a Token, grap the stream index from it first = first.index if isinstance(last, Token): # last is a Token, grap the stream index from it last = last.index if first > last or first < 0 or last < 0: return self.addToSortedRewriteList( programName, ReplaceOp(first, last, text) ) def delete(self, *args): self.replace(*(list(args) + [None])) def getLastRewriteTokenIndex(self, programName=DEFAULT_PROGRAM_NAME): return self.lastRewriteTokenIndexes.get(programName, -1) def setLastRewriteTokenIndex(self, programName, i): self.lastRewriteTokenIndexes[programName] = i def getProgram(self, name): p = self.programs.get(name, None) if p is None: p = self.initializeProgram(name) return p def initializeProgram(self, name): p = [] self.programs[name] = p return p def toOriginalString(self, start=None, end=None): if start is None: start = self.MIN_TOKEN_INDEX if end is None: end = self.size() - 1 buf = StringIO() i = start while i >= self.MIN_TOKEN_INDEX and i <= end and i < len(self.tokens): buf.write(self.get(i).text
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -