📄 urllib2.py
字号:
skip.append(klass) elif isinstance(check, klass): skip.append(klass) for klass in skip: default_classes.remove(klass) for klass in default_classes: opener.add_handler(klass()) for h in handlers: if inspect.isclass(h): h = h() opener.add_handler(h) return openerclass BaseHandler: def add_parent(self, parent): self.parent = parent def close(self): self.parent = Noneclass HTTPDefaultErrorHandler(BaseHandler): def http_error_default(self, req, fp, code, msg, hdrs): raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)class HTTPRedirectHandler(BaseHandler): def redirect_request(self, req, fp, code, msg, headers, newurl): """Return a Request or None in response to a redirect. This is called by the http_error_30x methods when a redirection response is received. If a redirection should take place, return a new Request to allow http_error_30x to perform the redirect. Otherwise, raise HTTPError if no-one else should try to handle this url. Return None if you can't but another Handler might. """ m = req.get_method() if (code in (301, 302, 303, 307) and m in ("GET", "HEAD") or code in (302, 303) and m == "POST"): # Strictly (according to RFC 2616), 302 in response to a # POST MUST NOT cause a redirection without confirmation # from the user (of urllib2, in this case). In practice, # essentially all clients do redirect in this case, so we # do the same. return Request(newurl, headers=req.headers) else: raise HTTPError(req.get_full_url(), code, msg, hdrs, fp) # Implementation note: To avoid the server sending us into an # infinite loop, the request object needs to track what URLs we # have already seen. Do this by adding a handler-specific # attribute to the Request object. def http_error_302(self, req, fp, code, msg, headers): if headers.has_key('location'): newurl = headers['location'] elif headers.has_key('uri'): newurl = headers['uri'] else: return newurl = urlparse.urljoin(req.get_full_url(), newurl) # XXX Probably want to forget about the state of the current # request, although that might interact poorly with other # handlers that also use handler-specific request attributes new = self.redirect_request(req, fp, code, msg, headers, newurl) if new is None: return # loop detection new.error_302_dict = {} if hasattr(req, 'error_302_dict'): if len(req.error_302_dict)>10 or \ req.error_302_dict.has_key(newurl): raise HTTPError(req.get_full_url(), code, self.inf_msg + msg, headers, fp) new.error_302_dict.update(req.error_302_dict) new.error_302_dict[newurl] = newurl # Don't close the fp until we are sure that we won't use it # with HTTPError. fp.read() fp.close() return self.parent.open(new) http_error_301 = http_error_303 = http_error_307 = http_error_302 inf_msg = "The HTTP server returned a redirect error that would" \ "lead to an infinite loop.\n" \ "The last 302 error message was:\n"class ProxyHandler(BaseHandler): def __init__(self, proxies=None): if proxies is None: proxies = getproxies() assert hasattr(proxies, 'has_key'), "proxies must be a mapping" self.proxies = proxies for type, url in proxies.items(): setattr(self, '%s_open' % type, lambda r, proxy=url, type=type, meth=self.proxy_open: \ meth(r, proxy, type)) def proxy_open(self, req, proxy, type): orig_type = req.get_type() type, r_type = splittype(proxy) host, XXX = splithost(r_type) if '@' in host: user_pass, host = host.split('@', 1) if ':' in user_pass: user, password = user_pass.split(':', 1) user_pass = base64.encodestring('%s:%s' % (unquote(user), unquote(password))) req.add_header('Proxy-Authorization', 'Basic ' + user_pass) host = unquote(host) req.set_proxy(host, type) if orig_type == type: # let other handlers take care of it # XXX this only makes sense if the proxy is before the # other handlers return None else: # need to start over, because the other handlers don't # grok the proxy's URL type return self.parent.open(req)# feature suggested by Duncan Booth# XXX custom is not a good nameclass CustomProxy: # either pass a function to the constructor or override handle def __init__(self, proto, func=None, proxy_addr=None): self.proto = proto self.func = func self.addr = proxy_addr def handle(self, req): if self.func and self.func(req): return 1 def get_proxy(self): return self.addrclass CustomProxyHandler(BaseHandler): def __init__(self, *proxies): self.proxies = {} def proxy_open(self, req): proto = req.get_type() try: proxies = self.proxies[proto] except KeyError: return None for p in proxies: if p.handle(req): req.set_proxy(p.get_proxy()) return self.parent.open(req) return None def do_proxy(self, p, req): return self.parent.open(req) def add_proxy(self, cpo): if self.proxies.has_key(cpo.proto): self.proxies[cpo.proto].append(cpo) else: self.proxies[cpo.proto] = [cpo]class HTTPPasswordMgr: def __init__(self): self.passwd = {} def add_password(self, realm, uri, user, passwd): # uri could be a single URI or a sequence if isinstance(uri, (types.StringType, types.UnicodeType)): uri = [uri] uri = tuple(map(self.reduce_uri, uri)) if not self.passwd.has_key(realm): self.passwd[realm] = {} self.passwd[realm][uri] = (user, passwd) def find_user_password(self, realm, authuri): domains = self.passwd.get(realm, {}) authuri = self.reduce_uri(authuri) for uris, authinfo in domains.items(): for uri in uris: if self.is_suburi(uri, authuri): return authinfo return None, None def reduce_uri(self, uri): """Accept netloc or URI and extract only the netloc and path""" parts = urlparse.urlparse(uri) if parts[1]: return parts[1], parts[2] or '/' else: return parts[2], '/' def is_suburi(self, base, test): """Check if test is below base in a URI tree Both args must be URIs in reduced form. """ if base == test: return 1 if base[0] != test[0]: return 0 common = posixpath.commonprefix((base[1], test[1])) if len(common) == len(base[1]): return 1 return 0class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr): def find_user_password(self, realm, authuri): user, password = HTTPPasswordMgr.find_user_password(self,realm,authuri) if user is not None: return user, password return HTTPPasswordMgr.find_user_password(self, None, authuri)class AbstractBasicAuthHandler: rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"') # XXX there can actually be multiple auth-schemes in a # www-authenticate header. should probably be a lot more careful # in parsing them to extract multiple alternatives def __init__(self, password_mgr=None): if password_mgr is None: password_mgr = HTTPPasswordMgr() self.passwd = password_mgr self.add_password = self.passwd.add_password def http_error_auth_reqed(self, authreq, host, req, headers): # XXX could be multiple headers authreq = headers.get(authreq, None) if authreq: mo = AbstractBasicAuthHandler.rx.match(authreq) if mo: scheme, realm = mo.groups() if scheme.lower() == 'basic': return self.retry_http_basic_auth(host, req, realm) def retry_http_basic_auth(self, host, req, realm): user,pw = self.passwd.find_user_password(realm, host) if pw: raw = "%s:%s" % (user, pw) auth = 'Basic %s' % base64.encodestring(raw).strip() if req.headers.get(self.auth_header, None) == auth: return None req.add_header(self.auth_header, auth) return self.parent.open(req) else: return Noneclass HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): auth_header = 'Authorization' def http_error_401(self, req, fp, code, msg, headers): host = urlparse.urlparse(req.get_full_url())[1] return self.http_error_auth_reqed('www-authenticate', host, req, headers)class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): auth_header = 'Proxy-Authorization' def http_error_407(self, req, fp, code, msg, headers): host = req.get_host() return self.http_error_auth_reqed('proxy-authenticate', host, req, headers)class AbstractDigestAuthHandler: def __init__(self, passwd=None): if passwd is None: passwd = HTTPPasswordMgr() self.passwd = passwd self.add_password = self.passwd.add_password def http_error_auth_reqed(self, authreq, host, req, headers): authreq = headers.get(self.auth_header, None) if authreq: kind = authreq.split()[0] if kind == 'Digest': return self.retry_http_digest_auth(req, authreq) def retry_http_digest_auth(self, req, auth): token, challenge = auth.split(' ', 1) chal = parse_keqv_list(parse_http_list(challenge)) auth = self.get_authorization(req, chal) if auth: auth_val = 'Digest %s' % auth if req.headers.get(self.auth_header, None) == auth_val: return None req.add_header(self.auth_header, auth_val) resp = self.parent.open(req) return resp def get_authorization(self, req, chal): try: realm = chal['realm'] nonce = chal['nonce'] algorithm = chal.get('algorithm', 'MD5') # mod_digest doesn't send an opaque, even though it isn't # supposed to be optional opaque = chal.get('opaque', None) except KeyError: return None H, KD = self.get_algorithm_impls(algorithm) if H is None: return None user, pw = self.passwd.find_user_password(realm, req.get_full_url()) if user is None: return None # XXX not implemented yet if req.has_data(): entdig = self.get_entity_digest(req.get_data(), chal) else: entdig = None A1 = "%s:%s:%s" % (user, realm, pw) A2 = "%s:%s" % (req.has_data() and 'POST' or 'GET', # XXX selector: what about proxies and full urls req.get_selector()) respdig = KD(H(A1), "%s:%s" % (nonce, H(A2))) # XXX should the partial digests be encoded too? base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \ 'response="%s"' % (user, realm, nonce, req.get_selector(), respdig) if opaque: base = base + ', opaque="%s"' % opaque if entdig: base = base + ', digest="%s"' % entdig if algorithm != 'MD5': base = base + ', algorithm="%s"' % algorithm return base def get_algorithm_impls(self, algorithm): # lambdas assume digest modules are imported at the top level if algorithm == 'MD5': H = lambda x, e=encode_digest:e(md5.new(x).digest()) elif algorithm == 'SHA': H = lambda x, e=encode_digest:e(sha.new(x).digest()) # XXX MD5-sess KD = lambda s, d, H=H: H("%s:%s" % (s, d)) return H, KD def get_entity_digest(self, data, chal): # XXX not implemented yet return Noneclass HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): """An authentication protocol defined by RFC 2069 Digest authentication improves on basic authentication because it does not transmit passwords in the clear. """ header = 'Authorization' def http_error_401(self, req, fp, code, msg, headers): host = urlparse.urlparse(req.get_full_url())[1] self.http_error_auth_reqed('www-authenticate', host, req, headers)class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): header = 'Proxy-Authorization' def http_error_407(self, req, fp, code, msg, headers): host = req.get_host()
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -