📄 test_812.py
字号:
# Demoing fix for EIAO bug #812.# 812: Crawler does not identify links with arguments containing "#".# Bug: http://trac.eiao.net/cgi-bin/trac.cgi/ticket/812import syssys.path.append('..')from lib import pageparserfrom lib import configfrom lib import loggerfrom lib.common.common import *from lib import urltypesclass Url(str): def __init__(self, link): self.url = link[1] self.typ = link[0] def __eq__(self, item): return item == self.url SetAlias(config.HarvestManStateObject())SetAlias(logger.HarvestManLogger())cfg = objects.configcfg.getquerylinks = Truep = pageparser.HarvestManSGMLOpParser()p.feed(open('soskut_hu_index.html').read())urls = []for link in p.links: urls.append(Url(link))print urlstest_urls = ['?module=municip#MIDDLE', '?module=institutes#MIDDLE', '?module=regulations#MIDDLE', '?module=events#MIDDLE']for turl in test_urls: print 'Asserting',turl assert(turl in urls)for url in urls: if url in test_urls: print 'Asserting type of',turl assert(url.typ == urltypes.URL_TYPE_ANY and url.typ != urltypes.URL_TYPE_ANCHOR)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -