📄 t012lexerxml.py

📁 antlr最新版本V3源代码
💻 PY
字号:
import antlr3import testbaseimport unittestimport osimport sysfrom cStringIO import StringIOimport difflibimport textwrapclass t012lexerXML(testbase.ANTLRTest):    def setUp(self):        self.compileGrammar()                    def lexerClass(self, base):        class TLexer(base):            def emitErrorMessage(self, msg):                # report errors to /dev/null                pass            def recover(self, re):                # no error recovery yet, just crash!                raise re        return TLexer                def testValid(self):        inputPath = os.path.splitext(__file__)[0] + '.input'        stream = antlr3.StringStream(unicode(open(inputPath).read(), 'utf-8'))        lexer = self.getLexer(stream)        while True:            token = lexer.nextToken()            if token.type == self.lexerModule.EOF:                break        output = unicode(lexer.outbuf.getvalue(), 'utf-8')        outputPath = os.path.splitext(__file__)[0] + '.output'        testOutput = unicode(open(outputPath).read(), 'utf-8')        success = (output == testOutput)        if not success:            d = difflib.Differ()            r = d.compare(output.splitlines(1), testOutput.splitlines(1))            self.fail(                ''.join([l.encode('ascii', 'backslashreplace') for l in r])                )    def testMalformedInput1(self):        input = textwrap.dedent("""\        <?xml version='1.0'?>        <document d>        </document>        """)        stream = antlr3.StringStream(input)        lexer = self.getLexer(stream)        try:            while True:                token = lexer.nextToken()                if token.type == antlr3.EOF:                    break            raise AssertionError        except antlr3.NoViableAltException, exc:            assert exc.unexpectedType == '>', repr(exc.unexpectedType)            assert exc.charPositionInLine == 11, repr(exc.charPositionInLine)            assert exc.line == 2, repr(exc.line)    def testMalformedInput2(self):        input = textwrap.dedent("""\        <?tml version='1.0'?>        <document>        </document>        """)        stream = antlr3.StringStream(input)        lexer = self.getLexer(stream)        try:            while True:                token = lexer.nextToken()                if token.type == antlr3.EOF:                    break            raise AssertionError        except antlr3.MismatchedSetException, exc:            assert exc.unexpectedType == 't', repr(exc.unexpectedType)            assert exc.charPositionInLine == 2, repr(exc.charPositionInLine)            assert exc.line == 1, repr(exc.line)    def testMalformedInput3(self):        input = textwrap.dedent("""\        <?xml version='1.0'?>        <docu ment attr="foo">        </document>        """)        stream = antlr3.StringStream(input)        lexer = self.getLexer(stream)        try:            while True:                token = lexer.nextToken()                if token.type == antlr3.EOF:                    break            raise AssertionError        except antlr3.NoViableAltException, exc:            assert exc.unexpectedType == 'a', repr(exc.unexpectedType)            assert exc.charPositionInLine == 11, repr(exc.charPositionInLine)            assert exc.line == 2, repr(exc.line)            if __name__ == '__main__':    unittest.main()## # run an infinite loop with randomly mangled input## while True:##     print "ping"##     input = """\## <?xml version='1.0'?>## <!DOCTYPE component [## <!ELEMENT component (PCDATA|sub)*>## <!ATTLIST component##           attr CDATA #IMPLIED##           attr2 CDATA #IMPLIED## >## <!ELMENT sub EMPTY>## ]>## <component attr="val'ue" attr2='val"ue'>## <!-- This is a comment -->## Text## <![CDATA[huhu]]>## &amp;## &lt;## <?xtal cursor='11'?>## <sub/>## <sub></sub>## </component>## """##     import random##     input = list(input) # make it mutable##     for _ in range(3):##         p1 = random.randrange(len(input))##         p2 = random.randrange(len(input))##         c1 = input[p1]##         input[p1] = input[p2]##         input[p2] = c1##     input = ''.join(input) # back to string        ##     stream = antlr3.StringStream(input)##     lexer = Lexer(stream)##     try:##         while True:##             token = lexer.nextToken()##             if token.type == EOF:##                 break##     except antlr3.RecognitionException, exc:##         print exc##         for l in input.splitlines()[0:exc.line]:##             print l##         print ' '*exc.charPositionInLine + '^'##     except BaseException, exc:##         print '\n'.join(['%02d: %s' % (idx+1, l) for idx, l in enumerate(input.splitlines())])##         print "%s at %d:%d" % (exc, stream.line, stream.charPositionInLine)##         print        ##         raise
💿 文件大小 3471 K
👤 上传用户 LotusAwhaT
📂 所属分类 Java编程
🏷️ 相关标签

#antlr #版本 #源代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -