📄 urllib.py
字号:
if path[:1] != '/':
if path[:1] in ('#', '?'):
i = len(basepath)
else:
i = basepath.rfind('/')
if i < 0:
if host:
basepath = '/'
else:
basepath = ''
else:
basepath = basepath[:i+1]
while basepath and path[:3] == '../':
path = path[3:]
i = basepath[:-1].rfind('/')
if i > 0:
basepath = basepath[:i+1]
elif i == 0:
basepath = '/'
break
else:
basepath = ''
path = basepath + path
if host and path and path[0] != '/':
path = '/' + path
if type and host: return type + '://' + host + path
elif type: return type + ':' + path
elif host: return '//' + host + path
else: return path
def toBytes(url):
if type(url) is types.UnicodeType:
try:
url = url.encode("ASCII")
except UnicodeError:
raise UnicodeError("URL " + repr(url) +
" contains non-ASCII characters")
return url
def unwrap(url):
url = url.strip()
if url[:1] == '<' and url[-1:] == '>':
url = url[1:-1].strip()
if url[:4] == 'URL:': url = url[4:].strip()
return url
_typeprog = None
def splittype(url):
global _typeprog
if _typeprog is None:
import re
_typeprog = re.compile('^([^/:]+):')
match = _typeprog.match(url)
if match:
scheme = match.group(1)
return scheme.lower(), url[len(scheme) + 1:]
return None, url
_hostprog = None
def splithost(url):
global _hostprog
if _hostprog is None:
import re
_hostprog = re.compile('^//([^/]*)(.*)$')
match = _hostprog.match(url)
if match: return match.group(1, 2)
return None, url
_userprog = None
def splituser(host):
global _userprog
if _userprog is None:
import re
_userprog = re.compile('^(.*)@(.*)$')
match = _userprog.match(host)
if match: return map(unquote, match.group(1, 2))
return None, host
_passwdprog = None
def splitpasswd(user):
global _passwdprog
if _passwdprog is None:
import re
_passwdprog = re.compile('^([^:]*):(.*)$')
match = _passwdprog.match(user)
if match: return match.group(1, 2)
return user, None
_portprog = None
def splitport(host):
global _portprog
if _portprog is None:
import re
_portprog = re.compile('^(.*):([0-9]+)$')
match = _portprog.match(host)
if match: return match.group(1, 2)
return host, None
_nportprog = None
def splitnport(host, defport=-1):
global _nportprog
if _nportprog is None:
import re
_nportprog = re.compile('^(.*):(.*)$')
match = _nportprog.match(host)
if match:
host, port = match.group(1, 2)
try:
if not port: raise ValueError, "no digits"
nport = int(port)
except ValueError:
nport = None
return host, nport
return host, defport
_queryprog = None
def splitquery(url):
global _queryprog
if _queryprog is None:
import re
_queryprog = re.compile('^(.*)\?([^?]*)$')
match = _queryprog.match(url)
if match: return match.group(1, 2)
return url, None
_tagprog = None
def splittag(url):
global _tagprog
if _tagprog is None:
import re
_tagprog = re.compile('^(.*)#([^#]*)$')
match = _tagprog.match(url)
if match: return match.group(1, 2)
return url, None
def splitattr(url):
words = url.split(';')
return words[0], words[1:]
_valueprog = None
def splitvalue(attr):
global _valueprog
if _valueprog is None:
import re
_valueprog = re.compile('^([^=]*)=(.*)$')
match = _valueprog.match(attr)
if match: return match.group(1, 2)
return attr, None
def splitgophertype(selector):
if selector[:1] == '/' and selector[1:2]:
return selector[1], selector[2:]
return None, selector
def unquote(s):
mychr = chr
myatoi = int
list = s.split('%')
res = [list[0]]
myappend = res.append
del list[0]
for item in list:
if item[1:2]:
try:
myappend(mychr(myatoi(item[:2], 16))
+ item[2:])
except ValueError:
myappend('%' + item)
else:
myappend('%' + item)
return "".join(res)
def unquote_plus(s):
if '+' in s:
# replace '+' with ' '
s = ' '.join(s.split('+'))
return unquote(s)
always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'abcdefghijklmnopqrstuvwxyz'
'0123456789' '_.-')
_fast_safe_test = always_safe + '/'
_fast_safe = None
def _fast_quote(s):
global _fast_safe
if _fast_safe is None:
_fast_safe = {}
for c in _fast_safe_test:
_fast_safe[c] = c
res = list(s)
for i in range(len(res)):
c = res[i]
if not _fast_safe.has_key(c):
res[i] = '%%%02X' % ord(c)
return ''.join(res)
def quote(s, safe = '/'):
safe = always_safe + safe
if _fast_safe_test == safe:
return _fast_quote(s)
res = list(s)
for i in range(len(res)):
c = res[i]
if c not in safe:
res[i] = '%%%02X' % ord(c)
return ''.join(res)
def quote_plus(s, safe = ''):
if ' ' in s:
l = s.split(' ')
for i in range(len(l)):
l[i] = quote(l[i], safe)
return '+'.join(l)
else:
return quote(s, safe)
def urlencode(query,doseq=0):
if hasattr(query,"items"):
query = query.items()
else:
try:
x = len(query)
if len(query) and type(query[0]) != types.TupleType:
raise TypeError
except TypeError:
ty,va,tb = sys.exc_info()
raise TypeError, "not a valid non-string sequence or mapping object", tb
l = []
if not doseq:
for k, v in query:
k = quote_plus(str(k))
v = quote_plus(str(v))
l.append(k + '=' + v)
else:
for k, v in query:
k = quote_plus(str(k))
if type(v) == types.StringType:
v = quote_plus(v)
l.append(k + '=' + v)
elif type(v) == types.UnicodeType:
v = quote_plus(v.encode("ASCII","replace"))
l.append(k + '=' + v)
else:
try:
x = len(v)
except TypeError:
v = quote_plus(str(v))
l.append(k + '=' + v)
else:
for elt in v:
l.append(k + '=' + quote_plus(str(elt)))
return '&'.join(l)
def getproxies_environment():
proxies = {}
for name, value in os.environ.items():
name = name.lower()
if value and name[-6:] == '_proxy':
proxies[name[:-6]] = value
return proxies
getproxies = getproxies_environment
def proxy_bypass(host):
return 0
# Test and time quote() and unquote()
def test1():
import time
s = ''
for i in range(256): s = s + chr(i)
s = s*4
t0 = time.time()
qs = quote(s)
uqs = unquote(qs)
t1 = time.time()
if uqs != s:
print 'Wrong!'
print `s`
print `qs`
print `uqs`
print round(t1 - t0, 3), 'sec'
def reporthook(blocknum, blocksize, totalsize):
print "Block number: %d, Block size: %d, Total size: %d" % (
blocknum, blocksize, totalsize)
# Test program
def test(args=[]):
if not args:
args = [
'/etc/passwd',
'file:/etc/passwd',
'file://localhost/etc/passwd',
'ftp://ftp.python.org/pub/python/README',
## 'gopher://gopher.micro.umn.edu/1/',
'http://www.python.org/index.html',
]
if hasattr(URLopener, "open_https"):
args.append('https://synergy.as.cmu.edu/~geek/')
try:
for url in args:
print '-'*10, url, '-'*10
fn, h = urlretrieve(url, None, reporthook)
print fn
if h:
print '======'
for k in h.keys(): print k + ':', h[k]
print '======'
fp = open(fn, 'rb')
data = fp.read()
del fp
if '\r' in data:
table = string.maketrans("", "")
data = data.translate(table, "\r")
print data
fn, h = None, None
print '-'*40
finally:
urlcleanup()
def main():
import getopt, sys
try:
opts, args = getopt.getopt(sys.argv[1:], "th")
except getopt.error, msg:
print msg
print "Use -h for help"
return
t = 0
for o, a in opts:
if o == '-t':
t = t + 1
if o == '-h':
print "Usage: python urllib.py [-t] [url ...]"
print "-t runs self-test;",
print "otherwise, contents of urls are printed"
return
if t:
if t > 1:
test1()
test(args)
else:
if not args:
print "Use -h for help"
for url in args:
print urlopen(url).read(),
# Run test program when run as a script
if __name__ == '__main__':
main()
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -