urllib.py
来自「mallet是自然语言处理、机器学习领域的一个开源项目。」· Python 代码 · 共 1,466 行 · 第 1/4 页
PY
1,466 行
except ftplib.error_perm, reason: raise IOError, ('ftp error', reason), sys.exc_info()[2] # Restore the transfer mode! self.ftp.voidcmd(cmd) # Try to retrieve as a file try: cmd = 'RETR ' + file conn = self.ftp.ntransfercmd(cmd) except ftplib.error_perm, reason: if str(reason)[:3] != '550': raise IOError, ('ftp error', reason), sys.exc_info()[2] if not conn: # Set transfer mode to ASCII! self.ftp.voidcmd('TYPE A') # Try a directory listing if file: cmd = 'LIST ' + file else: cmd = 'LIST' conn = self.ftp.ntransfercmd(cmd) self.busy = 1 # Pass back both a suitably decorated object and a retrieval length return (addclosehook(conn[0].makefile('rb'), self.endtransfer), conn[1]) def endtransfer(self): if not self.busy: return self.busy = 0 try: self.ftp.voidresp() except ftperrors(): pass def close(self): self.endtransfer() try: self.ftp.close() except ftperrors(): passclass addbase: """Base class for addinfo and addclosehook.""" def __init__(self, fp): self.fp = fp self.read = self.fp.read self.readline = self.fp.readline if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines if hasattr(self.fp, "fileno"): self.fileno = self.fp.fileno def __repr__(self): return '<%s at %s whose fp = %s>' % (self.__class__.__name__, `id(self)`, `self.fp`) def close(self): self.read = None self.readline = None self.readlines = None self.fileno = None if self.fp: self.fp.close() self.fp = Noneclass addclosehook(addbase): """Class to add a close hook to an open file.""" def __init__(self, fp, closehook, *hookargs): addbase.__init__(self, fp) self.closehook = closehook self.hookargs = hookargs def close(self): addbase.close(self) if self.closehook: apply(self.closehook, self.hookargs) self.closehook = None self.hookargs = Noneclass addinfo(addbase): """class to add an info() method to an open file.""" def __init__(self, fp, headers): addbase.__init__(self, fp) self.headers = headers def info(self): return self.headersclass addinfourl(addbase): """class to add info() and geturl() methods to an open file.""" def __init__(self, fp, headers, url): addbase.__init__(self, fp) self.headers = headers self.url = url def info(self): return self.headers def geturl(self): return self.urldef basejoin(base, url): """Utility to combine a URL with a base URL to form a new URL.""" type, path = splittype(url) if type: # if url is complete (i.e., it contains a type), return it return url host, path = splithost(path) type, basepath = splittype(base) # inherit type from base if host: # if url contains host, just inherit type if type: return type + '://' + host + path else: # no type inherited, so url must have started with // # just return it return url host, basepath = splithost(basepath) # inherit host basepath, basetag = splittag(basepath) # remove extraneous cruft basepath, basequery = splitquery(basepath) # idem if path[:1] != '/': # non-absolute path name if path[:1] in ('#', '?'): # path is just a tag or query, attach to basepath i = len(basepath) else: # else replace last component i = basepath.rfind('/') if i < 0: # basepath not absolute if host: # host present, make absolute basepath = '/' else: # else keep non-absolute basepath = '' else: # remove last file component basepath = basepath[:i+1] # Interpret ../ (important because of symlinks) while basepath and path[:3] == '../': path = path[3:] i = basepath[:-1].rfind('/') if i > 0: basepath = basepath[:i+1] elif i == 0: basepath = '/' break else: basepath = '' path = basepath + path if host and path and path[0] != '/': path = '/' + path if type and host: return type + '://' + host + path elif type: return type + ':' + path elif host: return '//' + host + path # don't know what this means else: return path# Utilities to parse URLs (most of these return None for missing parts):# unwrap('<URL:type://host/path>') --> 'type://host/path'# splittype('type:opaquestring') --> 'type', 'opaquestring'# splithost('//host[:port]/path') --> 'host[:port]', '/path'# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'# splitpasswd('user:passwd') -> 'user', 'passwd'# splitport('host:port') --> 'host', 'port'# splitquery('/path?query') --> '/path', 'query'# splittag('/path#tag') --> '/path', 'tag'# splitattr('/path;attr1=value1;attr2=value2;...') -># '/path', ['attr1=value1', 'attr2=value2', ...]# splitvalue('attr=value') --> 'attr', 'value'# splitgophertype('/Xselector') --> 'X', 'selector'# unquote('abc%20def') -> 'abc def'# quote('abc def') -> 'abc%20def')def toBytes(url): """toBytes(u"URL") --> 'URL'.""" # Most URL schemes require ASCII. If that changes, the conversion # can be relaxed if type(url) is types.UnicodeType: try: url = url.encode("ASCII") except UnicodeError: raise UnicodeError("URL " + repr(url) + " contains non-ASCII characters") return urldef unwrap(url): """unwrap('<URL:type://host/path>') --> 'type://host/path'.""" url = url.strip() if url[:1] == '<' and url[-1:] == '>': url = url[1:-1].strip() if url[:4] == 'URL:': url = url[4:].strip() return url_typeprog = Nonedef splittype(url): """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" global _typeprog if _typeprog is None: import re _typeprog = re.compile('^([^/:]+):') match = _typeprog.match(url) if match: scheme = match.group(1) return scheme.lower(), url[len(scheme) + 1:] return None, url_hostprog = Nonedef splithost(url): """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" global _hostprog if _hostprog is None: import re _hostprog = re.compile('^//([^/]*)(.*)$') match = _hostprog.match(url) if match: return match.group(1, 2) return None, url_userprog = Nonedef splituser(host): """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" global _userprog if _userprog is None: import re _userprog = re.compile('^(.*)@(.*)$') match = _userprog.match(host) if match: return map(unquote, match.group(1, 2)) return None, host_passwdprog = Nonedef splitpasswd(user): """splitpasswd('user:passwd') -> 'user', 'passwd'.""" global _passwdprog if _passwdprog is None: import re _passwdprog = re.compile('^([^:]*):(.*)$') match = _passwdprog.match(user) if match: return match.group(1, 2) return user, None# splittag('/path#tag') --> '/path', 'tag'_portprog = Nonedef splitport(host): """splitport('host:port') --> 'host', 'port'.""" global _portprog if _portprog is None: import re _portprog = re.compile('^(.*):([0-9]+)$') match = _portprog.match(host) if match: return match.group(1, 2) return host, None_nportprog = Nonedef splitnport(host, defport=-1): """Split host and port, returning numeric port. Return given default port if no ':' found; defaults to -1. Return numerical port if a valid number are found after ':'. Return None if ':' but not a valid number.""" global _nportprog if _nportprog is None: import re _nportprog = re.compile('^(.*):(.*)$') match = _nportprog.match(host) if match: host, port = match.group(1, 2) try: if not port: raise ValueError, "no digits" nport = int(port) except ValueError: nport = None return host, nport return host, defport_queryprog = Nonedef splitquery(url): """splitquery('/path?query') --> '/path', 'query'.""" global _queryprog if _queryprog is None: import re _queryprog = re.compile('^(.*)\?([^?]*)$') match = _queryprog.match(url) if match: return match.group(1, 2) return url, None_tagprog = Nonedef splittag(url): """splittag('/path#tag') --> '/path', 'tag'.""" global _tagprog if _tagprog is None: import re _tagprog = re.compile('^(.*)#([^#]*)$') match = _tagprog.match(url) if match: return match.group(1, 2) return url, Nonedef splitattr(url): """splitattr('/path;attr1=value1;attr2=value2;...') -> '/path', ['attr1=value1', 'attr2=value2', ...].""" words = url.split(';') return words[0], words[1:]_valueprog = Nonedef splitvalue(attr): """splitvalue('attr=value') --> 'attr', 'value'.""" global _valueprog if _valueprog is None: import re _valueprog = re.compile('^([^=]*)=(.*)$') match = _valueprog.match(attr) if match: return match.group(1, 2) return attr, Nonedef splitgophertype(selector): """splitgophertype('/Xselector') --> 'X', 'selector'.""" if selector[:1] == '/' and selector[1:2]: return selector[1], selector[2:] return None, selectordef unquote(s): """unquote('abc%20def') -> 'abc def'.""" mychr = chr myatoi = int list = s.split('%') res = [list[0]] myappend = res.append del list[0] for item in list: if item[1:2]: try: myappend(mychr(myatoi(item[:2], 16)) + item[2:]) except ValueError: myappend('%' + item) else: myappend('%' + item) return "".join(res)def unquote_plus(s): """unquote('%7e/abc+def') -> '~/abc def'""" if '+' in s: # replace '+' with ' ' s = ' '.join(s.split('+')) return unquote(s)always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' '0123456789' '_.-')_fast_safe_test = always_safe + '/'_fast_safe = Nonedef _fast_quote(s): global _fast_safe if _fast_safe is None: _fast_safe = {} for c in _fast_safe_test: _fast_safe[c] = c res = list(s)
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?