⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 urllib2.py

📁 mallet是自然语言处理、机器学习领域的一个开源项目。
💻 PY
📖 第 1 页 / 共 3 页
字号:
                    skip.append(klass)            elif isinstance(check, klass):                skip.append(klass)    for klass in skip:        default_classes.remove(klass)    for klass in default_classes:        opener.add_handler(klass())    for h in handlers:        if inspect.isclass(h):            h = h()        opener.add_handler(h)    return openerclass BaseHandler:    def add_parent(self, parent):        self.parent = parent    def close(self):        self.parent = Noneclass HTTPDefaultErrorHandler(BaseHandler):    def http_error_default(self, req, fp, code, msg, hdrs):        raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)class HTTPRedirectHandler(BaseHandler):    def redirect_request(self, req, fp, code, msg, headers, newurl):        """Return a Request or None in response to a redirect.        This is called by the http_error_30x methods when a redirection        response is received.  If a redirection should take place, return a new        Request to allow http_error_30x to perform the redirect.  Otherwise,        raise HTTPError if no-one else should try to handle this url.  Return        None if you can't but another Handler might.        """        m = req.get_method()        if (code in (301, 302, 303, 307) and m in ("GET", "HEAD")            or code in (302, 303) and m == "POST"):            # Strictly (according to RFC 2616), 302 in response to a            # POST MUST NOT cause a redirection without confirmation            # from the user (of urllib2, in this case).  In practice,            # essentially all clients do redirect in this case, so we            # do the same.            return Request(newurl, headers=req.headers)        else:            raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)    # Implementation note: To avoid the server sending us into an    # infinite loop, the request object needs to track what URLs we    # have already seen.  Do this by adding a handler-specific    # attribute to the Request object.    def http_error_302(self, req, fp, code, msg, headers):        if headers.has_key('location'):            newurl = headers['location']        elif headers.has_key('uri'):            newurl = headers['uri']        else:            return        newurl = urlparse.urljoin(req.get_full_url(), newurl)        # XXX Probably want to forget about the state of the current        # request, although that might interact poorly with other        # handlers that also use handler-specific request attributes        new = self.redirect_request(req, fp, code, msg, headers, newurl)        if new is None:            return        # loop detection        new.error_302_dict = {}        if hasattr(req, 'error_302_dict'):            if len(req.error_302_dict)>10 or \               req.error_302_dict.has_key(newurl):                raise HTTPError(req.get_full_url(), code,                                self.inf_msg + msg, headers, fp)            new.error_302_dict.update(req.error_302_dict)        new.error_302_dict[newurl] = newurl        # Don't close the fp until we are sure that we won't use it        # with HTTPError.        fp.read()        fp.close()        return self.parent.open(new)    http_error_301 = http_error_303 = http_error_307 = http_error_302    inf_msg = "The HTTP server returned a redirect error that would" \              "lead to an infinite loop.\n" \              "The last 302 error message was:\n"class ProxyHandler(BaseHandler):    def __init__(self, proxies=None):        if proxies is None:            proxies = getproxies()        assert hasattr(proxies, 'has_key'), "proxies must be a mapping"        self.proxies = proxies        for type, url in proxies.items():            setattr(self, '%s_open' % type,                    lambda r, proxy=url, type=type, meth=self.proxy_open: \                    meth(r, proxy, type))    def proxy_open(self, req, proxy, type):        orig_type = req.get_type()        type, r_type = splittype(proxy)        host, XXX = splithost(r_type)        if '@' in host:            user_pass, host = host.split('@', 1)            if ':' in user_pass:                user, password = user_pass.split(':', 1)                user_pass = base64.encodestring('%s:%s' % (unquote(user),                                                           unquote(password)))                req.add_header('Proxy-Authorization', 'Basic ' + user_pass)        host = unquote(host)        req.set_proxy(host, type)        if orig_type == type:            # let other handlers take care of it            # XXX this only makes sense if the proxy is before the            # other handlers            return None        else:            # need to start over, because the other handlers don't            # grok the proxy's URL type            return self.parent.open(req)# feature suggested by Duncan Booth# XXX custom is not a good nameclass CustomProxy:    # either pass a function to the constructor or override handle    def __init__(self, proto, func=None, proxy_addr=None):        self.proto = proto        self.func = func        self.addr = proxy_addr    def handle(self, req):        if self.func and self.func(req):            return 1    def get_proxy(self):        return self.addrclass CustomProxyHandler(BaseHandler):    def __init__(self, *proxies):        self.proxies = {}    def proxy_open(self, req):        proto = req.get_type()        try:            proxies = self.proxies[proto]        except KeyError:            return None        for p in proxies:            if p.handle(req):                req.set_proxy(p.get_proxy())                return self.parent.open(req)        return None    def do_proxy(self, p, req):        return self.parent.open(req)    def add_proxy(self, cpo):        if self.proxies.has_key(cpo.proto):            self.proxies[cpo.proto].append(cpo)        else:            self.proxies[cpo.proto] = [cpo]class HTTPPasswordMgr:    def __init__(self):        self.passwd = {}    def add_password(self, realm, uri, user, passwd):        # uri could be a single URI or a sequence        if isinstance(uri, (types.StringType, types.UnicodeType)):            uri = [uri]        uri = tuple(map(self.reduce_uri, uri))        if not self.passwd.has_key(realm):            self.passwd[realm] = {}        self.passwd[realm][uri] = (user, passwd)    def find_user_password(self, realm, authuri):        domains = self.passwd.get(realm, {})        authuri = self.reduce_uri(authuri)        for uris, authinfo in domains.items():            for uri in uris:                if self.is_suburi(uri, authuri):                    return authinfo        return None, None    def reduce_uri(self, uri):        """Accept netloc or URI and extract only the netloc and path"""        parts = urlparse.urlparse(uri)        if parts[1]:            return parts[1], parts[2] or '/'        else:            return parts[2], '/'    def is_suburi(self, base, test):        """Check if test is below base in a URI tree        Both args must be URIs in reduced form.        """        if base == test:            return 1        if base[0] != test[0]:            return 0        common = posixpath.commonprefix((base[1], test[1]))        if len(common) == len(base[1]):            return 1        return 0class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):    def find_user_password(self, realm, authuri):        user, password = HTTPPasswordMgr.find_user_password(self,realm,authuri)        if user is not None:            return user, password        return HTTPPasswordMgr.find_user_password(self, None, authuri)class AbstractBasicAuthHandler:    rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"')    # XXX there can actually be multiple auth-schemes in a    # www-authenticate header.  should probably be a lot more careful    # in parsing them to extract multiple alternatives    def __init__(self, password_mgr=None):        if password_mgr is None:            password_mgr = HTTPPasswordMgr()        self.passwd = password_mgr        self.add_password = self.passwd.add_password    def http_error_auth_reqed(self, authreq, host, req, headers):        # XXX could be multiple headers        authreq = headers.get(authreq, None)        if authreq:            mo = AbstractBasicAuthHandler.rx.match(authreq)            if mo:                scheme, realm = mo.groups()                if scheme.lower() == 'basic':                    return self.retry_http_basic_auth(host, req, realm)    def retry_http_basic_auth(self, host, req, realm):        user,pw = self.passwd.find_user_password(realm, host)        if pw:            raw = "%s:%s" % (user, pw)            auth = 'Basic %s' % base64.encodestring(raw).strip()            if req.headers.get(self.auth_header, None) == auth:                return None            req.add_header(self.auth_header, auth)            return self.parent.open(req)        else:            return Noneclass HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):    auth_header = 'Authorization'    def http_error_401(self, req, fp, code, msg, headers):        host = urlparse.urlparse(req.get_full_url())[1]        return self.http_error_auth_reqed('www-authenticate',                                          host, req, headers)class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):    auth_header = 'Proxy-Authorization'    def http_error_407(self, req, fp, code, msg, headers):        host = req.get_host()        return self.http_error_auth_reqed('proxy-authenticate',                                          host, req, headers)class AbstractDigestAuthHandler:    def __init__(self, passwd=None):        if passwd is None:            passwd = HTTPPasswordMgr()        self.passwd = passwd        self.add_password = self.passwd.add_password    def http_error_auth_reqed(self, authreq, host, req, headers):        authreq = headers.get(self.auth_header, None)        if authreq:            kind = authreq.split()[0]            if kind == 'Digest':                return self.retry_http_digest_auth(req, authreq)    def retry_http_digest_auth(self, req, auth):        token, challenge = auth.split(' ', 1)        chal = parse_keqv_list(parse_http_list(challenge))        auth = self.get_authorization(req, chal)        if auth:            auth_val = 'Digest %s' % auth            if req.headers.get(self.auth_header, None) == auth_val:                return None            req.add_header(self.auth_header, auth_val)            resp = self.parent.open(req)            return resp    def get_authorization(self, req, chal):        try:            realm = chal['realm']            nonce = chal['nonce']            algorithm = chal.get('algorithm', 'MD5')            # mod_digest doesn't send an opaque, even though it isn't            # supposed to be optional            opaque = chal.get('opaque', None)        except KeyError:            return None        H, KD = self.get_algorithm_impls(algorithm)        if H is None:            return None        user, pw = self.passwd.find_user_password(realm,                                                  req.get_full_url())        if user is None:            return None        # XXX not implemented yet        if req.has_data():            entdig = self.get_entity_digest(req.get_data(), chal)        else:            entdig = None        A1 = "%s:%s:%s" % (user, realm, pw)        A2 = "%s:%s" % (req.has_data() and 'POST' or 'GET',                        # XXX selector: what about proxies and full urls                        req.get_selector())        respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))        # XXX should the partial digests be encoded too?        base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \               'response="%s"' % (user, realm, nonce, req.get_selector(),                                  respdig)        if opaque:            base = base + ', opaque="%s"' % opaque        if entdig:            base = base + ', digest="%s"' % entdig        if algorithm != 'MD5':            base = base + ', algorithm="%s"' % algorithm        return base    def get_algorithm_impls(self, algorithm):        # lambdas assume digest modules are imported at the top level        if algorithm == 'MD5':            H = lambda x, e=encode_digest:e(md5.new(x).digest())        elif algorithm == 'SHA':            H = lambda x, e=encode_digest:e(sha.new(x).digest())        # XXX MD5-sess        KD = lambda s, d, H=H: H("%s:%s" % (s, d))        return H, KD    def get_entity_digest(self, data, chal):        # XXX not implemented yet        return Noneclass HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):    """An authentication protocol defined by RFC 2069    Digest authentication improves on basic authentication because it    does not transmit passwords in the clear.    """    header = 'Authorization'    def http_error_401(self, req, fp, code, msg, headers):        host = urlparse.urlparse(req.get_full_url())[1]        self.http_error_auth_reqed('www-authenticate', host, req, headers)class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):    header = 'Proxy-Authorization'    def http_error_407(self, req, fp, code, msg, headers):        host = req.get_host()

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -