📄 imagecrawler.py
字号:
#!/usr/bin/env python"""imagecrawler.py - Demonstrating custom crawler writing bysubscribing to events. This is a crawler which downloadsonly images from the web.Created by Anand B Pillai <abpillai at gmail dot com> Copyright (C) 2008 Anand B Pillai"""import sysimport __init__from harvestman.apps.spider import HarvestManfrom harvestman.lib.common.macros import *class ImageCrawler(HarvestMan): """ A crawler which saves only images to disk """ def write_this_url(self, event, *args, **kwargs): url = event.url if url.is_image() or url.starturl: return True else: return False def include_links(self, event, *args, **kwargs): url = event.url if url.is_image(): return True else: passif __name__ == "__main__": spider=ImageCrawler() spider.initialize() config = spider.get_config() config.robots = 0 # You might want to re-enable this! config.verbosity = 3 # Need in-mem data mode to obtain data for # web-page URLs to parse them! config.datamode = CONNECTOR_DATA_MODE_INMEM spider.bind_event('writeurl', spider.write_this_url) spider.bind_event('includelinks', spider.include_links) spider.main()
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -