⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 test_urlparser.py

📁 Harvestman-最新版本
💻 PY
📖 第 1 页 / 共 2 页
字号:
        assert(self.l[0].get_full_url()=='http://www.yahoo.com/photos/my%20photo.gif')        assert(self.l[1].get_full_url()=='http://www.rediff.com/r/r/tn2/2003/jun/25usfed.htm')        assert(self.l[2].get_full_url()=='http://cwc2003.rediffblogs.com/')        assert(self.l[3].get_full_url()=='http://www.rediff.com/sports/2003/jun/25beck1.htm')        assert(self.l[4].get_full_url()=='http://ftp.gnu.org/pub/lpf.README')        assert(self.l[5].get_full_url()=='http://www.python.org/doc/2.3b2')        assert(self.l[6].get_full_url()=='http://images.sourceforge.net/div.png')        assert(self.l[7].get_full_url()=='http://pyro.sourceforge.net/manual/LICENSE')        assert(self.l[8].get_full_url()=='http://www.foo.com/bar/python/test.htm')        assert(self.l[9].get_full_url()=='http://www.foo.com/python/test.css')        assert(self.l[10].get_full_url()=='http://www.garshol.priv.no/visuals/standard.css')        assert(self.l[11].get_full_url()=='http://www.fnorb.org/index.html')        assert(self.l[12].get_full_url()=='http://profigure.sourceforge.net/index.html')        assert(self.l[13].get_full_url()=='http://www.foo.com/bar/index.html')        assert(self.l[14].get_full_url()=='http://nltk.sourceforge.net/lite/doc/api/nltk_lite.contrib.fst.draw_graph.GraphEdgeWidget-class.html')        assert(self.l[15].get_full_url()=='http://www.python.org/doc/current/icons/up.png')        assert(self.l[16].get_full_url()=='http://www.eidsvoll.kommune.no/eway/eway/library/getmessage.asp?objectid=27015&moduleid=160')        assert(self.l[17].get_full_url()=='http://www.dz-rs.si/index.php')        assert(self.l[18].get_full_url()=='http://www.evvs.dk/index.php?cPath=26&osCsid=90207c4908a98db6503c0381b6b7aa70')        assert(self.l[19].get_full_url()=='http://arstechnica.com/reviews/os/macosx-10.4.ars/')        assert(self.l[20].get_full_url()=='http://www.fylkesmannen.no/fmt_hoved.asp')        assert(self.l[21].get_full_url()=='http://www.example.com/display%3C%5D%2F?weight=1.0&article=fred&lang=en&size=100&country=in&q=&id=')        assert(self.l[22].get_full_url()=='file:extension.css')        assert(self.l[23].get_full_url()=='file://home/anand/style.css')        assert(self.l[24].get_full_url()=='file://style.css')        assert(self.l[25].get_full_url()=='file:/home/anand/style.css')        assert(self.l[26].get_full_url()=='file:/home/anand')        assert(self.l[27].get_full_url()=='file://home/anand')        assert(self.l[28].get_full_url()=='http://www.foo.com/bar/')                # Second set        assert(self.l2[0].get_full_url()=='http://razor.occams.info/code/repo/coderef.c')        assert(self.l2[1].get_full_url()=='http://razor.occams.info/code/repo/?/govtrack/sec/coderef2.c')        assert(self.l2[2].get_full_url()=='http://razor.occams.info/code/repo/?/sec/coderef3.c')        assert(self.l2[3].get_full_url()=='http://razor.occams.info/code/repo/?sec/coderef4.c')        assert(self.l2[4].get_full_url()=='http://razor.occams.info/code/repo/sec/coderef5.c')        assert(self.l2[5].get_full_url()=='http://razor.occams.info/sec/coderef6.c')        assert(self.l2[6].get_full_url()=='http://razor.occams.info/code/repo/govtrack/sec/coderef7.c')        assert(self.l2[7].get_full_url()=='http://razor.occams.info/code/repo/govtrack/?/sec/../coderef8.c')        assert(self.l2[8].get_full_url()=='http://www.foo.com/govtrack/sec/?/id/../coderef9.c')        assert(self.l2[9].get_full_url()=='http://razor.occams.info/code/repo2/govtrack/sec/?/id/../coderef10.c')        assert(self.l2[10].get_full_url()=='http://razor.occams.info/code/coderef11.c')        assert(self.l2[11].get_full_url()=='http://razor.occams.info/code/repo/govtrack/?/sec/coderef12.c')        assert(self.l2[12].get_full_url()=='http://razor.occams.info/code/govtrack2/?/../sec/.././sec/coderef13.c')        assert(self.l2[13].get_full_url()=='http://razor.occams.info/code/repo/?/govtrack/?/sec/coderef14.c')        assert(self.l2[14].get_full_url()=='http://razor.occams.info/code/sec/?/../?/./sec/coderef15.c')                                                   def test_is_file_like(self):        assert(self.l[0].filelike==True)        assert(self.l[1].filelike==True)        assert(self.l[2].filelike==False)        assert(self.l[3].filelike==True)        assert(self.l[4].filelike==True)        assert(self.l[5].filelike==True)        assert(self.l[6].filelike==True)        assert(self.l[7].filelike==True)        assert(self.l[8].filelike==True)        assert(self.l[9].filelike==True)        assert(self.l[10].filelike==True)        assert(self.l[11].filelike==True)        assert(self.l[12].filelike==True)        assert(self.l[13].filelike==True)        assert(self.l[14].filelike==True)        assert(self.l[15].filelike==True)        assert(self.l[16].filelike==True)        assert(self.l[17].filelike==True)        assert(self.l[18].filelike==True)        assert(self.l[19].filelike==False)        assert(self.l[20].filelike==True)        assert(self.l[21].filelike==True)                                    def test_anchor_tag(self):        assert(self.l[0].get_anchor()=='')        assert(self.l[1].get_anchor()=='')        assert(self.l[2].get_anchor()=='')        assert(self.l[3].get_anchor()=='')        assert(self.l[4].get_anchor()=='')        assert(self.l[5].get_anchor()=='')        assert(self.l[6].get_anchor()=='')        assert(self.l[7].get_anchor()=='')        assert(self.l[8].get_anchor()=='')        assert(self.l[9].get_anchor()=='')        assert(self.l[10].get_anchor()=='')        assert(self.l[11].get_anchor()=='')        assert(self.l[12].get_anchor()=='')        assert(self.l[13].get_anchor()=='#anchor')        assert(self.l[14].get_anchor()=='#__init__#index-after')        assert(self.l[15].get_anchor()=='')        assert(self.l[16].get_anchor()=='')        assert(self.l[17].get_anchor()=='')        assert(self.l[18].get_anchor()=='')        assert(self.l[19].get_anchor()=='')        assert(self.l[20].get_anchor()=='')        assert(self.l[21].get_anchor()=='')                            def test_canonical_url(self):        assert(self.l[21].get_canonical_url()=='http://example.com/display%3C%5D%2F?article=fred&country=in&lang=en&size=100&weight=1.0')    def test_invalid_urls(self):        # Make sure invalid URLs do raise an error        try:            HarvestManUrl('')            # If it comes here, it is an error            assert(0==1)        except HarvestManUrlError, e:            # This should produce an error            assert(str(e)=='Error: Zero Length Url')        try:            HarvestManUrl('',baseurl='http://www.foo.com')            # If it comes here, it is an error            assert(0==1)        except HarvestManUrlError, e:            # This should produce an error            assert(str(e)=='Error: Zero Length Url')        try:            HarvestManUrl('http://',baseurl='http://www.foo.com')            # If it comes here, it is an error            assert(0==1)        except HarvestManUrlError, e:            # This should produce an error            assert(str(e)=='Error: Invalid URL containing only protocol')        try:            HarvestManUrl('https://',baseurl='http://www.foo.com')            # If it comes here, it is an error            assert(0==1)        except HarvestManUrlError, e:            # This should produce an error            assert(str(e)=='Error: Invalid URL containing only protocol')        try:            HarvestManUrl('ftp://',baseurl='http://www.foo.com')            # If it comes here, it is an error            assert(0==1)        except HarvestManUrlError, e:            # This should produce an error            assert(str(e)=='Error: Invalid URL containing only protocol')        try:            HarvestManUrl('file://',baseurl='http://www.foo.com')            # If it comes here, it is an error            assert(0==1)        except HarvestManUrlError, e:            # This should produce an error            assert(str(e)=='Error: Invalid URL containing only protocol')            def run(result):    return test_base.run_test(TestHarvestManUrl, result)if __name__=="__main__":    s = unittest.makeSuite(TestHarvestManUrl)    unittest.TextTestRunner(verbosity=2).run(s)    test_base.clean_up()        

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -