⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 test_812.py

📁 Harvestman-最新版本
💻 PY
字号:
# Demoing fix for EIAO bug #812.# 812: Crawler does not identify links with arguments containing "#".# Bug: http://trac.eiao.net/cgi-bin/trac.cgi/ticket/812import syssys.path.append('..')from lib import pageparserfrom lib import configfrom lib import loggerfrom lib.common.common import *from lib import urltypesclass Url(str):    def __init__(self, link):        self.url = link[1]        self.typ = link[0]    def __eq__(self, item):        return item == self.url    SetAlias(config.HarvestManStateObject())SetAlias(logger.HarvestManLogger())cfg = objects.configcfg.getquerylinks = Truep = pageparser.HarvestManSGMLOpParser()p.feed(open('soskut_hu_index.html').read())urls = []for link in p.links:    urls.append(Url(link))print urlstest_urls = ['?module=municip#MIDDLE',             '?module=institutes#MIDDLE',             '?module=regulations#MIDDLE',             '?module=events#MIDDLE']for turl in test_urls:    print 'Asserting',turl    assert(turl in urls)for url in urls:    if url in test_urls:        print 'Asserting type of',turl                assert(url.typ ==  urltypes.URL_TYPE_ANY and url.typ != urltypes.URL_TYPE_ANCHOR)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -