urllib.py

来自「mallet是自然语言处理、机器学习领域的一个开源项目。」· Python 代码 · 共 1,466 行 · 第 1/4 页

PY
1,466
字号
            except ftplib.error_perm, reason:                raise IOError, ('ftp error', reason), sys.exc_info()[2]            # Restore the transfer mode!            self.ftp.voidcmd(cmd)            # Try to retrieve as a file            try:                cmd = 'RETR ' + file                conn = self.ftp.ntransfercmd(cmd)            except ftplib.error_perm, reason:                if str(reason)[:3] != '550':                    raise IOError, ('ftp error', reason), sys.exc_info()[2]        if not conn:            # Set transfer mode to ASCII!            self.ftp.voidcmd('TYPE A')            # Try a directory listing            if file: cmd = 'LIST ' + file            else: cmd = 'LIST'            conn = self.ftp.ntransfercmd(cmd)        self.busy = 1        # Pass back both a suitably decorated object and a retrieval length        return (addclosehook(conn[0].makefile('rb'),                             self.endtransfer), conn[1])    def endtransfer(self):        if not self.busy:            return        self.busy = 0        try:            self.ftp.voidresp()        except ftperrors():            pass    def close(self):        self.endtransfer()        try:            self.ftp.close()        except ftperrors():            passclass addbase:    """Base class for addinfo and addclosehook."""    def __init__(self, fp):        self.fp = fp        self.read = self.fp.read        self.readline = self.fp.readline        if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines        if hasattr(self.fp, "fileno"): self.fileno = self.fp.fileno    def __repr__(self):        return '<%s at %s whose fp = %s>' % (self.__class__.__name__,                                             `id(self)`, `self.fp`)    def close(self):        self.read = None        self.readline = None        self.readlines = None        self.fileno = None        if self.fp: self.fp.close()        self.fp = Noneclass addclosehook(addbase):    """Class to add a close hook to an open file."""    def __init__(self, fp, closehook, *hookargs):        addbase.__init__(self, fp)        self.closehook = closehook        self.hookargs = hookargs    def close(self):        addbase.close(self)        if self.closehook:            apply(self.closehook, self.hookargs)            self.closehook = None            self.hookargs = Noneclass addinfo(addbase):    """class to add an info() method to an open file."""    def __init__(self, fp, headers):        addbase.__init__(self, fp)        self.headers = headers    def info(self):        return self.headersclass addinfourl(addbase):    """class to add info() and geturl() methods to an open file."""    def __init__(self, fp, headers, url):        addbase.__init__(self, fp)        self.headers = headers        self.url = url    def info(self):        return self.headers    def geturl(self):        return self.urldef basejoin(base, url):    """Utility to combine a URL with a base URL to form a new URL."""    type, path = splittype(url)    if type:        # if url is complete (i.e., it contains a type), return it        return url    host, path = splithost(path)    type, basepath = splittype(base) # inherit type from base    if host:        # if url contains host, just inherit type        if type: return type + '://' + host + path        else:            # no type inherited, so url must have started with //            # just return it            return url    host, basepath = splithost(basepath) # inherit host    basepath, basetag = splittag(basepath) # remove extraneous cruft    basepath, basequery = splitquery(basepath) # idem    if path[:1] != '/':        # non-absolute path name        if path[:1] in ('#', '?'):            # path is just a tag or query, attach to basepath            i = len(basepath)        else:            # else replace last component            i = basepath.rfind('/')        if i < 0:            # basepath not absolute            if host:                # host present, make absolute                basepath = '/'            else:                # else keep non-absolute                basepath = ''        else:            # remove last file component            basepath = basepath[:i+1]        # Interpret ../ (important because of symlinks)        while basepath and path[:3] == '../':            path = path[3:]            i = basepath[:-1].rfind('/')            if i > 0:                basepath = basepath[:i+1]            elif i == 0:                basepath = '/'                break            else:                basepath = ''        path = basepath + path    if host and path and path[0] != '/':        path = '/' + path    if type and host: return type + '://' + host + path    elif type: return type + ':' + path    elif host: return '//' + host + path # don't know what this means    else: return path# Utilities to parse URLs (most of these return None for missing parts):# unwrap('<URL:type://host/path>') --> 'type://host/path'# splittype('type:opaquestring') --> 'type', 'opaquestring'# splithost('//host[:port]/path') --> 'host[:port]', '/path'# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'# splitpasswd('user:passwd') -> 'user', 'passwd'# splitport('host:port') --> 'host', 'port'# splitquery('/path?query') --> '/path', 'query'# splittag('/path#tag') --> '/path', 'tag'# splitattr('/path;attr1=value1;attr2=value2;...') ->#   '/path', ['attr1=value1', 'attr2=value2', ...]# splitvalue('attr=value') --> 'attr', 'value'# splitgophertype('/Xselector') --> 'X', 'selector'# unquote('abc%20def') -> 'abc def'# quote('abc def') -> 'abc%20def')def toBytes(url):    """toBytes(u"URL") --> 'URL'."""    # Most URL schemes require ASCII. If that changes, the conversion    # can be relaxed    if type(url) is types.UnicodeType:        try:            url = url.encode("ASCII")        except UnicodeError:            raise UnicodeError("URL " + repr(url) +                               " contains non-ASCII characters")    return urldef unwrap(url):    """unwrap('<URL:type://host/path>') --> 'type://host/path'."""    url = url.strip()    if url[:1] == '<' and url[-1:] == '>':        url = url[1:-1].strip()    if url[:4] == 'URL:': url = url[4:].strip()    return url_typeprog = Nonedef splittype(url):    """splittype('type:opaquestring') --> 'type', 'opaquestring'."""    global _typeprog    if _typeprog is None:        import re        _typeprog = re.compile('^([^/:]+):')    match = _typeprog.match(url)    if match:        scheme = match.group(1)        return scheme.lower(), url[len(scheme) + 1:]    return None, url_hostprog = Nonedef splithost(url):    """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""    global _hostprog    if _hostprog is None:        import re        _hostprog = re.compile('^//([^/]*)(.*)$')    match = _hostprog.match(url)    if match: return match.group(1, 2)    return None, url_userprog = Nonedef splituser(host):    """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""    global _userprog    if _userprog is None:        import re        _userprog = re.compile('^(.*)@(.*)$')    match = _userprog.match(host)    if match: return map(unquote, match.group(1, 2))    return None, host_passwdprog = Nonedef splitpasswd(user):    """splitpasswd('user:passwd') -> 'user', 'passwd'."""    global _passwdprog    if _passwdprog is None:        import re        _passwdprog = re.compile('^([^:]*):(.*)$')    match = _passwdprog.match(user)    if match: return match.group(1, 2)    return user, None# splittag('/path#tag') --> '/path', 'tag'_portprog = Nonedef splitport(host):    """splitport('host:port') --> 'host', 'port'."""    global _portprog    if _portprog is None:        import re        _portprog = re.compile('^(.*):([0-9]+)$')    match = _portprog.match(host)    if match: return match.group(1, 2)    return host, None_nportprog = Nonedef splitnport(host, defport=-1):    """Split host and port, returning numeric port.    Return given default port if no ':' found; defaults to -1.    Return numerical port if a valid number are found after ':'.    Return None if ':' but not a valid number."""    global _nportprog    if _nportprog is None:        import re        _nportprog = re.compile('^(.*):(.*)$')    match = _nportprog.match(host)    if match:        host, port = match.group(1, 2)        try:            if not port: raise ValueError, "no digits"            nport = int(port)        except ValueError:            nport = None        return host, nport    return host, defport_queryprog = Nonedef splitquery(url):    """splitquery('/path?query') --> '/path', 'query'."""    global _queryprog    if _queryprog is None:        import re        _queryprog = re.compile('^(.*)\?([^?]*)$')    match = _queryprog.match(url)    if match: return match.group(1, 2)    return url, None_tagprog = Nonedef splittag(url):    """splittag('/path#tag') --> '/path', 'tag'."""    global _tagprog    if _tagprog is None:        import re        _tagprog = re.compile('^(.*)#([^#]*)$')    match = _tagprog.match(url)    if match: return match.group(1, 2)    return url, Nonedef splitattr(url):    """splitattr('/path;attr1=value1;attr2=value2;...') ->        '/path', ['attr1=value1', 'attr2=value2', ...]."""    words = url.split(';')    return words[0], words[1:]_valueprog = Nonedef splitvalue(attr):    """splitvalue('attr=value') --> 'attr', 'value'."""    global _valueprog    if _valueprog is None:        import re        _valueprog = re.compile('^([^=]*)=(.*)$')    match = _valueprog.match(attr)    if match: return match.group(1, 2)    return attr, Nonedef splitgophertype(selector):    """splitgophertype('/Xselector') --> 'X', 'selector'."""    if selector[:1] == '/' and selector[1:2]:        return selector[1], selector[2:]    return None, selectordef unquote(s):    """unquote('abc%20def') -> 'abc def'."""    mychr = chr    myatoi = int    list = s.split('%')    res = [list[0]]    myappend = res.append    del list[0]    for item in list:        if item[1:2]:            try:                myappend(mychr(myatoi(item[:2], 16))                     + item[2:])            except ValueError:                myappend('%' + item)        else:            myappend('%' + item)    return "".join(res)def unquote_plus(s):    """unquote('%7e/abc+def') -> '~/abc def'"""    if '+' in s:        # replace '+' with ' '        s = ' '.join(s.split('+'))    return unquote(s)always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'               'abcdefghijklmnopqrstuvwxyz'               '0123456789' '_.-')_fast_safe_test = always_safe + '/'_fast_safe = Nonedef _fast_quote(s):    global _fast_safe    if _fast_safe is None:        _fast_safe = {}        for c in _fast_safe_test:            _fast_safe[c] = c    res = list(s)

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?