📄 test_urlparser.py
字号:
assert(self.l[0].get_full_url()=='http://www.yahoo.com/photos/my%20photo.gif') assert(self.l[1].get_full_url()=='http://www.rediff.com/r/r/tn2/2003/jun/25usfed.htm') assert(self.l[2].get_full_url()=='http://cwc2003.rediffblogs.com/') assert(self.l[3].get_full_url()=='http://www.rediff.com/sports/2003/jun/25beck1.htm') assert(self.l[4].get_full_url()=='http://ftp.gnu.org/pub/lpf.README') assert(self.l[5].get_full_url()=='http://www.python.org/doc/2.3b2') assert(self.l[6].get_full_url()=='http://images.sourceforge.net/div.png') assert(self.l[7].get_full_url()=='http://pyro.sourceforge.net/manual/LICENSE') assert(self.l[8].get_full_url()=='http://www.foo.com/bar/python/test.htm') assert(self.l[9].get_full_url()=='http://www.foo.com/python/test.css') assert(self.l[10].get_full_url()=='http://www.garshol.priv.no/visuals/standard.css') assert(self.l[11].get_full_url()=='http://www.fnorb.org/index.html') assert(self.l[12].get_full_url()=='http://profigure.sourceforge.net/index.html') assert(self.l[13].get_full_url()=='http://www.foo.com/bar/index.html') assert(self.l[14].get_full_url()=='http://nltk.sourceforge.net/lite/doc/api/nltk_lite.contrib.fst.draw_graph.GraphEdgeWidget-class.html') assert(self.l[15].get_full_url()=='http://www.python.org/doc/current/icons/up.png') assert(self.l[16].get_full_url()=='http://www.eidsvoll.kommune.no/eway/eway/library/getmessage.asp?objectid=27015&moduleid=160') assert(self.l[17].get_full_url()=='http://www.dz-rs.si/index.php') assert(self.l[18].get_full_url()=='http://www.evvs.dk/index.php?cPath=26&osCsid=90207c4908a98db6503c0381b6b7aa70') assert(self.l[19].get_full_url()=='http://arstechnica.com/reviews/os/macosx-10.4.ars/') assert(self.l[20].get_full_url()=='http://www.fylkesmannen.no/fmt_hoved.asp') assert(self.l[21].get_full_url()=='http://www.example.com/display%3C%5D%2F?weight=1.0&article=fred&lang=en&size=100&country=in&q=&id=') assert(self.l[22].get_full_url()=='file:extension.css') assert(self.l[23].get_full_url()=='file://home/anand/style.css') assert(self.l[24].get_full_url()=='file://style.css') assert(self.l[25].get_full_url()=='file:/home/anand/style.css') assert(self.l[26].get_full_url()=='file:/home/anand') assert(self.l[27].get_full_url()=='file://home/anand') assert(self.l[28].get_full_url()=='http://www.foo.com/bar/') # Second set assert(self.l2[0].get_full_url()=='http://razor.occams.info/code/repo/coderef.c') assert(self.l2[1].get_full_url()=='http://razor.occams.info/code/repo/?/govtrack/sec/coderef2.c') assert(self.l2[2].get_full_url()=='http://razor.occams.info/code/repo/?/sec/coderef3.c') assert(self.l2[3].get_full_url()=='http://razor.occams.info/code/repo/?sec/coderef4.c') assert(self.l2[4].get_full_url()=='http://razor.occams.info/code/repo/sec/coderef5.c') assert(self.l2[5].get_full_url()=='http://razor.occams.info/sec/coderef6.c') assert(self.l2[6].get_full_url()=='http://razor.occams.info/code/repo/govtrack/sec/coderef7.c') assert(self.l2[7].get_full_url()=='http://razor.occams.info/code/repo/govtrack/?/sec/../coderef8.c') assert(self.l2[8].get_full_url()=='http://www.foo.com/govtrack/sec/?/id/../coderef9.c') assert(self.l2[9].get_full_url()=='http://razor.occams.info/code/repo2/govtrack/sec/?/id/../coderef10.c') assert(self.l2[10].get_full_url()=='http://razor.occams.info/code/coderef11.c') assert(self.l2[11].get_full_url()=='http://razor.occams.info/code/repo/govtrack/?/sec/coderef12.c') assert(self.l2[12].get_full_url()=='http://razor.occams.info/code/govtrack2/?/../sec/.././sec/coderef13.c') assert(self.l2[13].get_full_url()=='http://razor.occams.info/code/repo/?/govtrack/?/sec/coderef14.c') assert(self.l2[14].get_full_url()=='http://razor.occams.info/code/sec/?/../?/./sec/coderef15.c') def test_is_file_like(self): assert(self.l[0].filelike==True) assert(self.l[1].filelike==True) assert(self.l[2].filelike==False) assert(self.l[3].filelike==True) assert(self.l[4].filelike==True) assert(self.l[5].filelike==True) assert(self.l[6].filelike==True) assert(self.l[7].filelike==True) assert(self.l[8].filelike==True) assert(self.l[9].filelike==True) assert(self.l[10].filelike==True) assert(self.l[11].filelike==True) assert(self.l[12].filelike==True) assert(self.l[13].filelike==True) assert(self.l[14].filelike==True) assert(self.l[15].filelike==True) assert(self.l[16].filelike==True) assert(self.l[17].filelike==True) assert(self.l[18].filelike==True) assert(self.l[19].filelike==False) assert(self.l[20].filelike==True) assert(self.l[21].filelike==True) def test_anchor_tag(self): assert(self.l[0].get_anchor()=='') assert(self.l[1].get_anchor()=='') assert(self.l[2].get_anchor()=='') assert(self.l[3].get_anchor()=='') assert(self.l[4].get_anchor()=='') assert(self.l[5].get_anchor()=='') assert(self.l[6].get_anchor()=='') assert(self.l[7].get_anchor()=='') assert(self.l[8].get_anchor()=='') assert(self.l[9].get_anchor()=='') assert(self.l[10].get_anchor()=='') assert(self.l[11].get_anchor()=='') assert(self.l[12].get_anchor()=='') assert(self.l[13].get_anchor()=='#anchor') assert(self.l[14].get_anchor()=='#__init__#index-after') assert(self.l[15].get_anchor()=='') assert(self.l[16].get_anchor()=='') assert(self.l[17].get_anchor()=='') assert(self.l[18].get_anchor()=='') assert(self.l[19].get_anchor()=='') assert(self.l[20].get_anchor()=='') assert(self.l[21].get_anchor()=='') def test_canonical_url(self): assert(self.l[21].get_canonical_url()=='http://example.com/display%3C%5D%2F?article=fred&country=in&lang=en&size=100&weight=1.0') def test_invalid_urls(self): # Make sure invalid URLs do raise an error try: HarvestManUrl('') # If it comes here, it is an error assert(0==1) except HarvestManUrlError, e: # This should produce an error assert(str(e)=='Error: Zero Length Url') try: HarvestManUrl('',baseurl='http://www.foo.com') # If it comes here, it is an error assert(0==1) except HarvestManUrlError, e: # This should produce an error assert(str(e)=='Error: Zero Length Url') try: HarvestManUrl('http://',baseurl='http://www.foo.com') # If it comes here, it is an error assert(0==1) except HarvestManUrlError, e: # This should produce an error assert(str(e)=='Error: Invalid URL containing only protocol') try: HarvestManUrl('https://',baseurl='http://www.foo.com') # If it comes here, it is an error assert(0==1) except HarvestManUrlError, e: # This should produce an error assert(str(e)=='Error: Invalid URL containing only protocol') try: HarvestManUrl('ftp://',baseurl='http://www.foo.com') # If it comes here, it is an error assert(0==1) except HarvestManUrlError, e: # This should produce an error assert(str(e)=='Error: Invalid URL containing only protocol') try: HarvestManUrl('file://',baseurl='http://www.foo.com') # If it comes here, it is an error assert(0==1) except HarvestManUrlError, e: # This should produce an error assert(str(e)=='Error: Invalid URL containing only protocol') def run(result): return test_base.run_test(TestHarvestManUrl, result)if __name__=="__main__": s = unittest.makeSuite(TestHarvestManUrl) unittest.TextTestRunner(verbosity=2).run(s) test_base.clean_up()
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -