📄 urllister.py
字号:
"""Extract list of URLs in a web pageThis program is part of "Dive Into Python", a free Python book forexperienced programmers. Visit http://diveintopython.org/ for thelatest version."""__author__ = "Mark Pilgrim (mark@diveintopython.org)"__version__ = "$Revision: 1.2 $"__date__ = "$Date: 2004/05/05 21:57:19 $"__copyright__ = "Copyright (c) 2001 Mark Pilgrim"__license__ = "Python"from sgmllib import SGMLParserclass URLLister(SGMLParser): def reset(self): SGMLParser.reset(self) self.urls = [] def start_a(self, attrs): href = [v for k, v in attrs if k=='href'] if href: self.urls.extend(href)if __name__ == "__main__": import urllib usock = urllib.urlopen("http://diveintopython.org/") parser = URLLister() parser.feed(usock.read()) parser.close() usock.close() for url in parser.urls: print url
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -