📄 t012lexerxml.py
字号:
import antlr3import testbaseimport unittestimport osimport sysfrom cStringIO import StringIOimport difflibimport textwrapclass t012lexerXML(testbase.ANTLRTest): def setUp(self): self.compileGrammar() def lexerClass(self, base): class TLexer(base): def emitErrorMessage(self, msg): # report errors to /dev/null pass def recover(self, re): # no error recovery yet, just crash! raise re return TLexer def testValid(self): inputPath = os.path.splitext(__file__)[0] + '.input' stream = antlr3.StringStream(unicode(open(inputPath).read(), 'utf-8')) lexer = self.getLexer(stream) while True: token = lexer.nextToken() if token.type == self.lexerModule.EOF: break output = unicode(lexer.outbuf.getvalue(), 'utf-8') outputPath = os.path.splitext(__file__)[0] + '.output' testOutput = unicode(open(outputPath).read(), 'utf-8') success = (output == testOutput) if not success: d = difflib.Differ() r = d.compare(output.splitlines(1), testOutput.splitlines(1)) self.fail( ''.join([l.encode('ascii', 'backslashreplace') for l in r]) ) def testMalformedInput1(self): input = textwrap.dedent("""\ <?xml version='1.0'?> <document d> </document> """) stream = antlr3.StringStream(input) lexer = self.getLexer(stream) try: while True: token = lexer.nextToken() if token.type == antlr3.EOF: break raise AssertionError except antlr3.NoViableAltException, exc: assert exc.unexpectedType == '>', repr(exc.unexpectedType) assert exc.charPositionInLine == 11, repr(exc.charPositionInLine) assert exc.line == 2, repr(exc.line) def testMalformedInput2(self): input = textwrap.dedent("""\ <?tml version='1.0'?> <document> </document> """) stream = antlr3.StringStream(input) lexer = self.getLexer(stream) try: while True: token = lexer.nextToken() if token.type == antlr3.EOF: break raise AssertionError except antlr3.MismatchedSetException, exc: assert exc.unexpectedType == 't', repr(exc.unexpectedType) assert exc.charPositionInLine == 2, repr(exc.charPositionInLine) assert exc.line == 1, repr(exc.line) def testMalformedInput3(self): input = textwrap.dedent("""\ <?xml version='1.0'?> <docu ment attr="foo"> </document> """) stream = antlr3.StringStream(input) lexer = self.getLexer(stream) try: while True: token = lexer.nextToken() if token.type == antlr3.EOF: break raise AssertionError except antlr3.NoViableAltException, exc: assert exc.unexpectedType == 'a', repr(exc.unexpectedType) assert exc.charPositionInLine == 11, repr(exc.charPositionInLine) assert exc.line == 2, repr(exc.line) if __name__ == '__main__': unittest.main()## # run an infinite loop with randomly mangled input## while True:## print "ping"## input = """\## <?xml version='1.0'?>## <!DOCTYPE component [## <!ELEMENT component (PCDATA|sub)*>## <!ATTLIST component## attr CDATA #IMPLIED## attr2 CDATA #IMPLIED## >## <!ELMENT sub EMPTY>## ]>## <component attr="val'ue" attr2='val"ue'>## <!-- This is a comment -->## Text## <![CDATA[huhu]]>## &## <## <?xtal cursor='11'?>## <sub/>## <sub></sub>## </component>## """## import random## input = list(input) # make it mutable## for _ in range(3):## p1 = random.randrange(len(input))## p2 = random.randrange(len(input))## c1 = input[p1]## input[p1] = input[p2]## input[p2] = c1## input = ''.join(input) # back to string ## stream = antlr3.StringStream(input)## lexer = Lexer(stream)## try:## while True:## token = lexer.nextToken()## if token.type == EOF:## break## except antlr3.RecognitionException, exc:## print exc## for l in input.splitlines()[0:exc.line]:## print l## print ' '*exc.charPositionInLine + '^'## except BaseException, exc:## print '\n'.join(['%02d: %s' % (idx+1, l) for idx, l in enumerate(input.splitlines())])## print "%s at %d:%d" % (exc, stream.line, stream.charPositionInLine)## print ## raise
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -