📄 keepalive.py
字号:
h.putheader('Content-type', 'application/x-www-form-urlencoded') if not req.headers.has_key('Content-length'): h.putheader('Content-length', '%d' % len(data)) else: h.putrequest('GET', req.get_selector()) except (socket.error, httplib.HTTPException), err: raise urllib2.URLError(err) for args in self.parent.addheaders: h.putheader(*args) for k, v in req.headers.items(): h.putheader(k, v) h.endheaders() if req.has_data(): h.send(data) def _get_connection(self, host): return NotImplementedErrorclass HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler): def __init__(self): KeepAliveHandler.__init__(self) def http_open(self, req): return self.do_open(req) def _get_connection(self, host): return HTTPConnection(host)class HTTPSHandler(KeepAliveHandler, urllib2.HTTPSHandler): def __init__(self, ssl_factory=None): KeepAliveHandler.__init__(self) #if not ssl_factory: # ssl_factory = sslfactory.get_factory() #self._ssl_factory = ssl_factory def https_open(self, req): return self.do_open(req) def _get_connection(self, host): # return self._ssl_factory.create_https_connection(host) return HTTPSConnection(host)class HTTPResponse(httplib.HTTPResponse): # we need to subclass HTTPResponse in order to # 1) add readline() and readlines() methods # 2) add close_connection() methods # 3) add info() and geturl() methods # in order to add readline(), read must be modified to deal with a # buffer. example: readline must read a buffer and then spit back # one line at a time. The only real alternative is to read one # BYTE at a time (ick). Once something has been read, it can't be # put back (ok, maybe it can, but that's even uglier than this), # so if you THEN do a normal read, you must first take stuff from # the buffer. # the read method wraps the original to accomodate buffering, # although read() never adds to the buffer. # Both readline and readlines have been stolen with almost no # modification from socket.py def __init__(self, sock, debuglevel=0, strict=0, method=None): if method: # the httplib in python 2.3 uses the method arg httplib.HTTPResponse.__init__(self, sock, debuglevel, method) else: # 2.2 doesn't httplib.HTTPResponse.__init__(self, sock, debuglevel) self.fileno = sock.fileno self.code = None self._rbuf = '' self._rbufsize = 8096 self._handler = None # inserted by the handler later self._host = None # (same) self._url = None # (same) self._connection = None # (same) _raw_read = httplib.HTTPResponse.read def close(self): if self.fp: self.fp.close() self.fp = None if self._handler: self._handler._request_closed(self, self._host, self._connection) def close_connection(self): self._handler._remove_connection(self._host, self._connection, close=1) self.close() def info(self): return self.headers def geturl(self): return self._url def read(self, amt=None): # the _rbuf test is only in this first if for speed. It's not # logically necessary if self._rbuf and not amt is None: L = len(self._rbuf) if amt > L: amt -= L else: s = self._rbuf[:amt] self._rbuf = self._rbuf[amt:] return s s = self._rbuf + self._raw_read(amt) self._rbuf = '' return s def readline(self, limit=-1): data = "" i = self._rbuf.find('\n') while i < 0 and not (0 < limit <= len(self._rbuf)): new = self._raw_read(self._rbufsize) if not new: break i = new.find('\n') if i >= 0: i = i + len(self._rbuf) self._rbuf = self._rbuf + new if i < 0: i = len(self._rbuf) else: i = i+1 if 0 <= limit < len(self._rbuf): i = limit data, self._rbuf = self._rbuf[:i], self._rbuf[i:] return data def readlines(self, sizehint = 0): total = 0 list = [] while 1: line = self.readline() if not line: break list.append(line) total += len(line) if sizehint and total >= sizehint: break return listclass HTTPConnection(httplib.HTTPConnection): # use the modified response class response_class = HTTPResponseclass HTTPSConnection(httplib.HTTPSConnection): response_class = HTTPResponse def connect(self): import _socket # For fixing #503 sock = _socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((self.host, self.port)) # Change this to certicate paths where you have your SSL client certificates # to be able to download URLs producing SSL errors. ssl = socket.ssl(sock, None, None) self.sock = httplib.FakeSocket(sock, ssl) ############################################################################## TEST FUNCTIONS#########################################################################def error_handler(url): global HANDLE_ERRORS orig = HANDLE_ERRORS keepalive_handler = HTTPHandler() opener = urllib2.build_opener(keepalive_handler) urllib2.install_opener(opener) pos = {0: 'off', 1: 'on'} for i in (0, 1): print " fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i) HANDLE_ERRORS = i try: fo = urllib2.urlopen(url) foo = fo.read() fo.close() try: status, reason = fo.status, fo.reason except AttributeError: status, reason = None, None except IOError, e: print " EXCEPTION: %s" % e raise else: print " status = %s, reason = %s" % (status, reason) HANDLE_ERRORS = orig hosts = keepalive_handler.open_connections() print "open connections:", hosts keepalive_handler.close_all()def continuity(url): import md5 format = '%25s: %s' # first fetch the file with the normal http handler opener = urllib2.build_opener() urllib2.install_opener(opener) fo = urllib2.urlopen(url) foo = fo.read() fo.close() m = md5.new(foo) print format % ('normal urllib', m.hexdigest()) # now install the keepalive handler and try again opener = urllib2.build_opener(HTTPHandler()) urllib2.install_opener(opener) fo = urllib2.urlopen(url) foo = fo.read() fo.close() m = md5.new(foo) print format % ('keepalive read', m.hexdigest()) fo = urllib2.urlopen(url) foo = '' while 1: f = fo.readline() if f: foo = foo + f else: break fo.close() m = md5.new(foo) print format % ('keepalive readline', m.hexdigest())def comp(N, url): print ' making %i connections to:\n %s' % (N, url) sys.stdout.write(' first using the normal urllib handlers') # first use normal opener opener = urllib2.build_opener() urllib2.install_opener(opener) t1 = fetch(N, url) print ' TIME: %.3f s' % t1 sys.stdout.write(' now using the keepalive handler ') # now install the keepalive handler and try again opener = urllib2.build_opener(HTTPHandler()) urllib2.install_opener(opener) t2 = fetch(N, url) print ' TIME: %.3f s' % t2 print ' improvement factor: %.2f' % (t1/t2, ) def fetch(N, url, delay=0): import time lens = [] starttime = time.time() for i in range(N): if delay and i > 0: time.sleep(delay) fo = urllib2.urlopen(url) foo = fo.read() fo.close() lens.append(len(foo)) diff = time.time() - starttime j = 0 for i in lens[1:]: j = j + 1 if not i == lens[0]: print "WARNING: inconsistent length on read %i: %i" % (j, i) return diffdef test_timeout(url): global DEBUG dbbackup = DEBUG class FakeLogger: def debug(self, msg, *args): print msg % args info = warning = error = debug DEBUG = FakeLogger() print " fetching the file to establish a connection" fo = urllib2.urlopen(url) data1 = fo.read() fo.close() i = 20 print " waiting %i seconds for the server to close the connection" % i while i > 0: sys.stdout.write('\r %2i' % i) sys.stdout.flush() time.sleep(1) i -= 1 sys.stderr.write('\r') print " fetching the file a second time" fo = urllib2.urlopen(url) data2 = fo.read() fo.close() if data1 == data2: print ' data are identical' else: print ' ERROR: DATA DIFFER' DEBUG = dbbackup def test(url, N=10): print "checking error hander (do this on a non-200)" try: error_handler(url) except IOError, e: print "exiting - exception will prevent further tests" sys.exit() print print "performing continuity test (making sure stuff isn't corrupted)" continuity(url) print print "performing speed comparison" comp(N, url) print print "performing dropped-connection check" test_timeout(url) if __name__ == '__main__': import time import sys try: N = int(sys.argv[1]) url = sys.argv[2] except: print "%s <integer> <url>" % sys.argv[0] else: test(url, N)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -