⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 keepalive.py

📁 Harvestman-最新版本
💻 PY
📖 第 1 页 / 共 2 页
字号:
                    h.putheader('Content-type',                                'application/x-www-form-urlencoded')                if not req.headers.has_key('Content-length'):                    h.putheader('Content-length', '%d' % len(data))            else:                h.putrequest('GET', req.get_selector())        except (socket.error, httplib.HTTPException), err:            raise urllib2.URLError(err)        for args in self.parent.addheaders:            h.putheader(*args)        for k, v in req.headers.items():            h.putheader(k, v)        h.endheaders()        if req.has_data():            h.send(data)    def _get_connection(self, host):        return NotImplementedErrorclass HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler):    def __init__(self):        KeepAliveHandler.__init__(self)    def http_open(self, req):        return self.do_open(req)    def _get_connection(self, host):        return HTTPConnection(host)class HTTPSHandler(KeepAliveHandler, urllib2.HTTPSHandler):    def __init__(self, ssl_factory=None):        KeepAliveHandler.__init__(self)        #if not ssl_factory:        #    ssl_factory = sslfactory.get_factory()        #self._ssl_factory = ssl_factory        def https_open(self, req):        return self.do_open(req)    def _get_connection(self, host):        # return self._ssl_factory.create_https_connection(host)        return HTTPSConnection(host)class HTTPResponse(httplib.HTTPResponse):    # we need to subclass HTTPResponse in order to    # 1) add readline() and readlines() methods    # 2) add close_connection() methods    # 3) add info() and geturl() methods    # in order to add readline(), read must be modified to deal with a    # buffer.  example: readline must read a buffer and then spit back    # one line at a time.  The only real alternative is to read one    # BYTE at a time (ick).  Once something has been read, it can't be    # put back (ok, maybe it can, but that's even uglier than this),    # so if you THEN do a normal read, you must first take stuff from    # the buffer.    # the read method wraps the original to accomodate buffering,    # although read() never adds to the buffer.    # Both readline and readlines have been stolen with almost no    # modification from socket.py        def __init__(self, sock, debuglevel=0, strict=0, method=None):        if method: # the httplib in python 2.3 uses the method arg            httplib.HTTPResponse.__init__(self, sock, debuglevel, method)        else: # 2.2 doesn't            httplib.HTTPResponse.__init__(self, sock, debuglevel)        self.fileno = sock.fileno        self.code = None        self._rbuf = ''        self._rbufsize = 8096        self._handler = None # inserted by the handler later        self._host = None    # (same)        self._url = None     # (same)        self._connection = None # (same)    _raw_read = httplib.HTTPResponse.read    def close(self):        if self.fp:            self.fp.close()            self.fp = None            if self._handler:                self._handler._request_closed(self, self._host,                                              self._connection)    def close_connection(self):        self._handler._remove_connection(self._host, self._connection, close=1)        self.close()            def info(self):        return self.headers    def geturl(self):        return self._url    def read(self, amt=None):        # the _rbuf test is only in this first if for speed.  It's not        # logically necessary        if self._rbuf and not amt is None:            L = len(self._rbuf)            if amt > L:                amt -= L            else:                s = self._rbuf[:amt]                self._rbuf = self._rbuf[amt:]                return s        s = self._rbuf + self._raw_read(amt)        self._rbuf = ''        return s    def readline(self, limit=-1):        data = ""        i = self._rbuf.find('\n')        while i < 0 and not (0 < limit <= len(self._rbuf)):            new = self._raw_read(self._rbufsize)            if not new: break            i = new.find('\n')            if i >= 0: i = i + len(self._rbuf)            self._rbuf = self._rbuf + new        if i < 0: i = len(self._rbuf)        else: i = i+1        if 0 <= limit < len(self._rbuf): i = limit        data, self._rbuf = self._rbuf[:i], self._rbuf[i:]        return data    def readlines(self, sizehint = 0):        total = 0        list = []        while 1:            line = self.readline()            if not line: break            list.append(line)            total += len(line)            if sizehint and total >= sizehint:                break        return listclass HTTPConnection(httplib.HTTPConnection):    # use the modified response class    response_class = HTTPResponseclass HTTPSConnection(httplib.HTTPSConnection):    response_class = HTTPResponse    def connect(self):        import _socket                # For fixing #503        sock = _socket.socket(socket.AF_INET, socket.SOCK_STREAM)        sock.connect((self.host, self.port))        # Change this to certicate paths where you have your SSL client certificates        # to be able to download URLs producing SSL errors.        ssl = socket.ssl(sock, None, None)                self.sock = httplib.FakeSocket(sock, ssl)            ##############################################################################   TEST FUNCTIONS#########################################################################def error_handler(url):    global HANDLE_ERRORS    orig = HANDLE_ERRORS    keepalive_handler = HTTPHandler()    opener = urllib2.build_opener(keepalive_handler)    urllib2.install_opener(opener)    pos = {0: 'off', 1: 'on'}    for i in (0, 1):        print "  fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i)        HANDLE_ERRORS = i        try:            fo = urllib2.urlopen(url)            foo = fo.read()            fo.close()            try: status, reason = fo.status, fo.reason            except AttributeError: status, reason = None, None        except IOError, e:            print "  EXCEPTION: %s" % e            raise        else:            print "  status = %s, reason = %s" % (status, reason)    HANDLE_ERRORS = orig    hosts = keepalive_handler.open_connections()    print "open connections:", hosts    keepalive_handler.close_all()def continuity(url):    import md5    format = '%25s: %s'        # first fetch the file with the normal http handler    opener = urllib2.build_opener()    urllib2.install_opener(opener)    fo = urllib2.urlopen(url)    foo = fo.read()    fo.close()    m = md5.new(foo)    print format % ('normal urllib', m.hexdigest())    # now install the keepalive handler and try again    opener = urllib2.build_opener(HTTPHandler())    urllib2.install_opener(opener)    fo = urllib2.urlopen(url)    foo = fo.read()    fo.close()    m = md5.new(foo)    print format % ('keepalive read', m.hexdigest())    fo = urllib2.urlopen(url)    foo = ''    while 1:        f = fo.readline()        if f: foo = foo + f        else: break    fo.close()    m = md5.new(foo)    print format % ('keepalive readline', m.hexdigest())def comp(N, url):    print '  making %i connections to:\n  %s' % (N, url)    sys.stdout.write('  first using the normal urllib handlers')    # first use normal opener    opener = urllib2.build_opener()    urllib2.install_opener(opener)    t1 = fetch(N, url)    print '  TIME: %.3f s' % t1    sys.stdout.write('  now using the keepalive handler       ')    # now install the keepalive handler and try again    opener = urllib2.build_opener(HTTPHandler())    urllib2.install_opener(opener)    t2 = fetch(N, url)    print '  TIME: %.3f s' % t2    print '  improvement factor: %.2f' % (t1/t2, )    def fetch(N, url, delay=0):    import time    lens = []    starttime = time.time()    for i in range(N):        if delay and i > 0: time.sleep(delay)        fo = urllib2.urlopen(url)        foo = fo.read()        fo.close()        lens.append(len(foo))    diff = time.time() - starttime    j = 0    for i in lens[1:]:        j = j + 1        if not i == lens[0]:            print "WARNING: inconsistent length on read %i: %i" % (j, i)    return diffdef test_timeout(url):    global DEBUG    dbbackup = DEBUG    class FakeLogger:        def debug(self, msg, *args): print msg % args        info = warning = error = debug    DEBUG = FakeLogger()    print "  fetching the file to establish a connection"    fo = urllib2.urlopen(url)    data1 = fo.read()    fo.close()     i = 20    print "  waiting %i seconds for the server to close the connection" % i    while i > 0:        sys.stdout.write('\r  %2i' % i)        sys.stdout.flush()        time.sleep(1)        i -= 1    sys.stderr.write('\r')    print "  fetching the file a second time"    fo = urllib2.urlopen(url)    data2 = fo.read()    fo.close()    if data1 == data2:        print '  data are identical'    else:        print '  ERROR: DATA DIFFER'    DEBUG = dbbackup    def test(url, N=10):    print "checking error hander (do this on a non-200)"    try: error_handler(url)    except IOError, e:        print "exiting - exception will prevent further tests"        sys.exit()    print    print "performing continuity test (making sure stuff isn't corrupted)"    continuity(url)    print    print "performing speed comparison"    comp(N, url)    print    print "performing dropped-connection check"    test_timeout(url)    if __name__ == '__main__':    import time    import sys    try:        N = int(sys.argv[1])        url = sys.argv[2]    except:        print "%s <integer> <url>" % sys.argv[0]    else:        test(url, N)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -