📄 test_808.py
字号:
# Demoing fix for #808.# 808: Crawler should try and parse links in "select" options in HTML# forms.# Bug: http://trac.eiao.net/cgi-bin/trac.cgi/ticket/808import syssys.path.append('..')from lib import pageparserfrom lib import configfrom lib import loggerfrom lib.common.common import *SetAlias(config.HarvestManStateObject())SetAlias(logger.HarvestManLogger())# First parse with sgmlop parser with option parsing disabled...print 'Testing with sgmlop parser...'p = pageparser.HarvestManSGMLOpParser()p.feed(open('s_municipaux.htm').read())print 'Asserting link count with option tag disabled...'assert(len(p.links)==18)# Now turn on option tag parsingp.enable_feature('option')p.feed(open('s_municipaux.htm').read())print 'Asserting link count with option tag enabled...'assert(len(p.links)==31)print 'Testing with pure Python parser...'p = pageparser.HarvestManSimpleParser()p.disable_feature('option')p.feed(open('s_municipaux.htm').read())print 'Asserting link count with option tag disabled...'assert(len(p.links)==18)# Now turn on option tag parsingp.enable_feature('option')p.feed(open('s_municipaux.htm').read())print 'Asserting link count with option tag enabled...'assert(len(p.links)==31)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -