📄 urllib.py

📁 python s60 1.4.5版本的源代码
💻 PY
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
    if path[:1] != '/':
        if path[:1] in ('#', '?'):
            i = len(basepath)
        else:
            i = basepath.rfind('/')
        if i < 0:
            if host:
                basepath = '/'
            else:
                basepath = ''
        else:
            basepath = basepath[:i+1]
        while basepath and path[:3] == '../':
            path = path[3:]
            i = basepath[:-1].rfind('/')
            if i > 0:
                basepath = basepath[:i+1]
            elif i == 0:
                basepath = '/'
                break
            else:
                basepath = ''

        path = basepath + path
    if host and path and path[0] != '/':
        path = '/' + path
    if type and host: return type + '://' + host + path
    elif type: return type + ':' + path
    elif host: return '//' + host + path
    else: return path

def toBytes(url):
    if type(url) is types.UnicodeType:
        try:
            url = url.encode("ASCII")
        except UnicodeError:
            raise UnicodeError("URL " + repr(url) +
                               " contains non-ASCII characters")
    return url

def unwrap(url):
    url = url.strip()
    if url[:1] == '<' and url[-1:] == '>':
        url = url[1:-1].strip()
    if url[:4] == 'URL:': url = url[4:].strip()
    return url

_typeprog = None
def splittype(url):
    global _typeprog
    if _typeprog is None:
        import re
        _typeprog = re.compile('^([^/:]+):')

    match = _typeprog.match(url)
    if match:
        scheme = match.group(1)
        return scheme.lower(), url[len(scheme) + 1:]
    return None, url

_hostprog = None
def splithost(url):
    global _hostprog
    if _hostprog is None:
        import re
        _hostprog = re.compile('^//([^/]*)(.*)$')

    match = _hostprog.match(url)
    if match: return match.group(1, 2)
    return None, url

_userprog = None
def splituser(host):
    global _userprog
    if _userprog is None:
        import re
        _userprog = re.compile('^(.*)@(.*)$')

    match = _userprog.match(host)
    if match: return map(unquote, match.group(1, 2))
    return None, host

_passwdprog = None
def splitpasswd(user):
    global _passwdprog
    if _passwdprog is None:
        import re
        _passwdprog = re.compile('^([^:]*):(.*)$')

    match = _passwdprog.match(user)
    if match: return match.group(1, 2)
    return user, None

_portprog = None
def splitport(host):
    global _portprog
    if _portprog is None:
        import re
        _portprog = re.compile('^(.*):([0-9]+)$')

    match = _portprog.match(host)
    if match: return match.group(1, 2)
    return host, None

_nportprog = None
def splitnport(host, defport=-1):
    global _nportprog
    if _nportprog is None:
        import re
        _nportprog = re.compile('^(.*):(.*)$')

    match = _nportprog.match(host)
    if match:
        host, port = match.group(1, 2)
        try:
            if not port: raise ValueError, "no digits"
            nport = int(port)
        except ValueError:
            nport = None
        return host, nport
    return host, defport

_queryprog = None
def splitquery(url):
    global _queryprog
    if _queryprog is None:
        import re
        _queryprog = re.compile('^(.*)\?([^?]*)$')

    match = _queryprog.match(url)
    if match: return match.group(1, 2)
    return url, None

_tagprog = None
def splittag(url):
    global _tagprog
    if _tagprog is None:
        import re
        _tagprog = re.compile('^(.*)#([^#]*)$')

    match = _tagprog.match(url)
    if match: return match.group(1, 2)
    return url, None

def splitattr(url):
    words = url.split(';')
    return words[0], words[1:]

_valueprog = None
def splitvalue(attr):
    global _valueprog
    if _valueprog is None:
        import re
        _valueprog = re.compile('^([^=]*)=(.*)$')

    match = _valueprog.match(attr)
    if match: return match.group(1, 2)
    return attr, None

def splitgophertype(selector):
    if selector[:1] == '/' and selector[1:2]:
        return selector[1], selector[2:]
    return None, selector

def unquote(s):
    mychr = chr
    myatoi = int
    list = s.split('%')
    res = [list[0]]
    myappend = res.append
    del list[0]
    for item in list:
        if item[1:2]:
            try:
                myappend(mychr(myatoi(item[:2], 16))
                     + item[2:])
            except ValueError:
                myappend('%' + item)
        else:
            myappend('%' + item)
    return "".join(res)

def unquote_plus(s):
    if '+' in s:
        # replace '+' with ' '
        s = ' '.join(s.split('+'))
    return unquote(s)

always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
               'abcdefghijklmnopqrstuvwxyz'
               '0123456789' '_.-')

_fast_safe_test = always_safe + '/'
_fast_safe = None

def _fast_quote(s):
    global _fast_safe
    if _fast_safe is None:
        _fast_safe = {}
        for c in _fast_safe_test:
            _fast_safe[c] = c
    res = list(s)
    for i in range(len(res)):
        c = res[i]
        if not _fast_safe.has_key(c):
            res[i] = '%%%02X' % ord(c)
    return ''.join(res)

def quote(s, safe = '/'):
    safe = always_safe + safe
    if _fast_safe_test == safe:
        return _fast_quote(s)
    res = list(s)
    for i in range(len(res)):
        c = res[i]
        if c not in safe:
            res[i] = '%%%02X' % ord(c)
    return ''.join(res)

def quote_plus(s, safe = ''):
    if ' ' in s:
        l = s.split(' ')
        for i in range(len(l)):
            l[i] = quote(l[i], safe)
        return '+'.join(l)
    else:
        return quote(s, safe)

def urlencode(query,doseq=0):

    if hasattr(query,"items"):
        query = query.items()
    else:
        try:
            x = len(query)
            if len(query) and type(query[0]) != types.TupleType:
                raise TypeError
        except TypeError:
            ty,va,tb = sys.exc_info()
            raise TypeError, "not a valid non-string sequence or mapping object", tb

    l = []
    if not doseq:
        for k, v in query:
            k = quote_plus(str(k))
            v = quote_plus(str(v))
            l.append(k + '=' + v)
    else:
        for k, v in query:
            k = quote_plus(str(k))
            if type(v) == types.StringType:
                v = quote_plus(v)
                l.append(k + '=' + v)
            elif type(v) == types.UnicodeType:
                v = quote_plus(v.encode("ASCII","replace"))
                l.append(k + '=' + v)
            else:
                try:
                    x = len(v)
                except TypeError:
                    v = quote_plus(str(v))
                    l.append(k + '=' + v)
                else:
                    for elt in v:
                        l.append(k + '=' + quote_plus(str(elt)))
    return '&'.join(l)

def getproxies_environment():
    proxies = {}
    for name, value in os.environ.items():
        name = name.lower()
        if value and name[-6:] == '_proxy':
            proxies[name[:-6]] = value
    return proxies

getproxies = getproxies_environment

def proxy_bypass(host):
    return 0

# Test and time quote() and unquote()
def test1():
    import time
    s = ''
    for i in range(256): s = s + chr(i)
    s = s*4
    t0 = time.time()
    qs = quote(s)
    uqs = unquote(qs)
    t1 = time.time()
    if uqs != s:
        print 'Wrong!'
    print `s`
    print `qs`
    print `uqs`
    print round(t1 - t0, 3), 'sec'


def reporthook(blocknum, blocksize, totalsize):
    print "Block number: %d, Block size: %d, Total size: %d" % (
        blocknum, blocksize, totalsize)

# Test program
def test(args=[]):
    if not args:
        args = [
            '/etc/passwd',
            'file:/etc/passwd',
            'file://localhost/etc/passwd',
            'ftp://ftp.python.org/pub/python/README',
##          'gopher://gopher.micro.umn.edu/1/',
            'http://www.python.org/index.html',
            ]
        if hasattr(URLopener, "open_https"):
            args.append('https://synergy.as.cmu.edu/~geek/')
    try:
        for url in args:
            print '-'*10, url, '-'*10
            fn, h = urlretrieve(url, None, reporthook)
            print fn
            if h:
                print '======'
                for k in h.keys(): print k + ':', h[k]
                print '======'
            fp = open(fn, 'rb')
            data = fp.read()
            del fp
            if '\r' in data:
                table = string.maketrans("", "")
                data = data.translate(table, "\r")
            print data
            fn, h = None, None
        print '-'*40
    finally:
        urlcleanup()

def main():
    import getopt, sys
    try:
        opts, args = getopt.getopt(sys.argv[1:], "th")
    except getopt.error, msg:
        print msg
        print "Use -h for help"
        return
    t = 0
    for o, a in opts:
        if o == '-t':
            t = t + 1
        if o == '-h':
            print "Usage: python urllib.py [-t] [url ...]"
            print "-t runs self-test;",
            print "otherwise, contents of urls are printed"
            return
    if t:
        if t > 1:
            test1()
        test(args)
    else:
        if not args:
            print "Use -h for help"
        for url in args:
            print urlopen(url).read(),

# Run test program when run as a script
if __name__ == '__main__':
    main()
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -