httplib.py
来自「mallet是自然语言处理、机器学习领域的一个开源项目。」· Python 代码 · 共 1,235 行 · 第 1/3 页
PY
1,235 行
"""HTTP/1.1 client library<intro stuff goes here><other stuff, too>HTTPConnection go through a number of "states", which defines when a clientmay legally make another request or fetch the response for a particularrequest. This diagram details these state transitions: (null) | | HTTPConnection() v Idle | | putrequest() v Request-started | | ( putheader() )* endheaders() v Request-sent | | response = getresponse() v Unread-response [Response-headers-read] |\____________________ | | | response.read() | putrequest() v v Idle Req-started-unread-response ______/| / | response.read() | | ( putheader() )* endheaders() v v Request-started Req-sent-unread-response | | response.read() v Request-sentThis diagram presents the following rules: -- a second request may not be started until {response-headers-read} -- a response [object] cannot be retrieved until {request-sent} -- there is no differentiation between an unread response body and a partially read response bodyNote: this enforcement is applied by the HTTPConnection class. The HTTPResponse class does not enforce this state machine, which implies sophisticated clients may accelerate the request/response pipeline. Caution should be taken, though: accelerating the states beyond the above pattern may imply knowledge of the server's connection-close behavior for certain requests. For example, it is impossible to tell whether the server will close the connection UNTIL the response headers have been read; this means that further requests cannot be placed into the pipeline until it is known that the server will NOT be closing the connection.Logical State __state __response------------- ------- ----------Idle _CS_IDLE NoneRequest-started _CS_REQ_STARTED NoneRequest-sent _CS_REQ_SENT NoneUnread-response _CS_IDLE <response_class>Req-started-unread-response _CS_REQ_STARTED <response_class>Req-sent-unread-response _CS_REQ_SENT <response_class>"""import errnoimport mimetoolsimport socketfrom urlparse import urlsplittry: from cStringIO import StringIOexcept ImportError: from StringIO import StringIO__all__ = ["HTTP", "HTTPResponse", "HTTPConnection", "HTTPSConnection", "HTTPException", "NotConnected", "UnknownProtocol", "UnknownTransferEncoding", "UnimplementedFileMode", "IncompleteRead", "InvalidURL", "ImproperConnectionState", "CannotSendRequest", "CannotSendHeader", "ResponseNotReady", "BadStatusLine", "error"]HTTP_PORT = 80HTTPS_PORT = 443_UNKNOWN = 'UNKNOWN'# connection states_CS_IDLE = 'Idle'_CS_REQ_STARTED = 'Request-started'_CS_REQ_SENT = 'Request-sent'class HTTPMessage(mimetools.Message): def addheader(self, key, value): """Add header for field key handling repeats.""" prev = self.dict.get(key) if prev is None: self.dict[key] = value else: combined = ", ".join((prev, value)) self.dict[key] = combined def addcontinue(self, key, more): """Add more field data from a continuation line.""" prev = self.dict[key] self.dict[key] = prev + "\n " + more def readheaders(self): """Read header lines. Read header lines up to the entirely blank line that terminates them. The (normally blank) line that ends the headers is skipped, but not included in the returned list. If a non-header line ends the headers, (which is an error), an attempt is made to backspace over it; it is never included in the returned list. The variable self.status is set to the empty string if all went well, otherwise it is an error message. The variable self.headers is a completely uninterpreted list of lines contained in the header (so printing them will reproduce the header exactly as it appears in the file). If multiple header fields with the same name occur, they are combined according to the rules in RFC 2616 sec 4.2: Appending each subsequent field-value to the first, each separated by a comma. The order in which header fields with the same field-name are received is significant to the interpretation of the combined field value. """ # XXX The implementation overrides the readheaders() method of # rfc822.Message. The base class design isn't amenable to # customized behavior here so the method here is a copy of the # base class code with a few small changes. self.dict = {} self.unixfrom = '' self.headers = list = [] self.status = '' headerseen = "" firstline = 1 startofline = unread = tell = None if hasattr(self.fp, 'unread'): unread = self.fp.unread elif self.seekable: tell = self.fp.tell while 1: if tell: try: startofline = tell() except IOError: startofline = tell = None self.seekable = 0 line = self.fp.readline() if not line: self.status = 'EOF in headers' break # Skip unix From name time lines if firstline and line.startswith('From '): self.unixfrom = self.unixfrom + line continue firstline = 0 if headerseen and line[0] in ' \t': # XXX Not sure if continuation lines are handled properly # for http and/or for repeating headers # It's a continuation line. list.append(line) x = self.dict[headerseen] + "\n " + line.strip() self.addcontinue(headerseen, line.strip()) continue elif self.iscomment(line): # It's a comment. Ignore it. continue elif self.islast(line): # Note! No pushback here! The delimiter line gets eaten. break headerseen = self.isheader(line) if headerseen: # It's a legal header line, save it. list.append(line) self.addheader(headerseen, line[len(headerseen)+1:].strip()) continue else: # It's not a header line; throw it back and stop here. if not self.dict: self.status = 'No headers' else: self.status = 'Non-header line where header expected' # Try to undo the read. if unread: unread(line) elif tell: self.fp.seek(startofline) else: self.status = self.status + '; bad seek' breakclass HTTPResponse: # strict: If true, raise BadStatusLine if the status line can't be # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is # false because it prevents clients from talking to HTTP/0.9 # servers. Note that a response with a sufficiently corrupted # status line will look like an HTTP/0.9 response. # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. def __init__(self, sock, debuglevel=0, strict=0): self.fp = sock.makefile('rb', 0) self.debuglevel = debuglevel self.strict = strict self.msg = None # from the Status-Line of the response self.version = _UNKNOWN # HTTP-Version self.status = _UNKNOWN # Status-Code self.reason = _UNKNOWN # Reason-Phrase self.chunked = _UNKNOWN # is "chunked" being used? self.chunk_left = _UNKNOWN # bytes left to read in current chunk self.length = _UNKNOWN # number of bytes left in response self.will_close = _UNKNOWN # conn will close at end of response def _read_status(self): # Initialize with Simple-Response defaults line = self.fp.readline() if self.debuglevel > 0: print "reply:", repr(line) try: [version, status, reason] = line.split(None, 2) except ValueError: try: [version, status] = line.split(None, 1) reason = "" except ValueError: # empty version will cause next test to fail and status # will be treated as 0.9 response. version = "" if not version.startswith('HTTP/'): if self.strict: self.close() raise BadStatusLine(line) else: # assume it's a Simple-Response from an 0.9 server self.fp = LineAndFileWrapper(line, self.fp) return "HTTP/0.9", 200, "" # The status code is a three-digit number try: status = int(status) if status < 100 or status > 999: raise BadStatusLine(line) except ValueError: raise BadStatusLine(line) return version, status, reason def begin(self): if self.msg is not None: # we've already started reading the response return # read until we get a non-100 response while 1: version, status, reason = self._read_status() if status != 100: break # skip the header from the 100 response while 1: skip = self.fp.readline().strip() if not skip: break if self.debuglevel > 0: print "header:", skip self.status = status self.reason = reason.strip() if version == 'HTTP/1.0': self.version = 10 elif version.startswith('HTTP/1.'): self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1 elif version == 'HTTP/0.9': self.version = 9 else: raise UnknownProtocol(version) if self.version == 9: self.chunked = 0 self.will_close = 1 self.msg = HTTPMessage(StringIO()) return self.msg = HTTPMessage(self.fp, 0) if self.debuglevel > 0: for hdr in self.msg.headers: print "header:", hdr, # don't let the msg keep an fp self.msg.fp = None # are we using the chunked-style of transfer encoding? tr_enc = self.msg.getheader('transfer-encoding') if tr_enc and tr_enc.lower() == "chunked": self.chunked = 1 self.chunk_left = None else: self.chunked = 0 # will the connection close at the end of the response? conn = self.msg.getheader('connection') if conn: conn = conn.lower() # a "Connection: close" will always close the connection. if we # don't see that and this is not HTTP/1.1, then the connection will # close unless we see a Keep-Alive header. self.will_close = conn.find('close') != -1 or \ ( self.version != 11 and \ not self.msg.getheader('keep-alive') ) else: # for HTTP/1.1, the connection will always remain open # otherwise, it will remain open IFF we see a Keep-Alive header self.will_close = self.version != 11 and \ not self.msg.getheader('keep-alive') # do we have a Content-Length? # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked" length = self.msg.getheader('content-length') if length and not self.chunked: try: self.length = int(length) except ValueError: self.length = None else: self.length = None # does the body have a fixed length? (of zero) if (status == 204 or # No Content status == 304 or # Not Modified 100 <= status < 200): # 1xx codes self.length = 0 # if the connection remains open, and we aren't using chunked, and # a content-length was not provided, then assume that the connection # WILL close. if not self.will_close and \ not self.chunked and \ self.length is None: self.will_close = 1 def close(self): if self.fp: self.fp.close() self.fp = None def isclosed(self): # NOTE: it is possible that we will not ever call self.close(). This # case occurs when will_close is TRUE, length is None, and we # read up to the last byte, but NOT past it. # # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be # called, meaning self.isclosed() is meaningful. return self.fp is None def read(self, amt=None): if self.fp is None: return '' if self.chunked: return self._read_chunked(amt) if amt is None: # unbounded read if self.will_close: s = self.fp.read() else: s = self._safe_read(self.length) self.close() # we read everything return s if self.length is not None: if amt > self.length: # clip the read to the "end of response" amt = self.length self.length -= amt # we do not use _safe_read() here because this may be a .will_close # connection, and the user is reading more bytes than will be provided # (for example, reading in 1k chunks) s = self.fp.read(amt) return s def _read_chunked(self, amt): assert self.chunked != _UNKNOWN chunk_left = self.chunk_left value = '' # XXX This accumulates chunks by repeated string concatenation, # which is not efficient as the number or size of chunks gets big. while 1: if chunk_left is None: line = self.fp.readline() i = line.find(';') if i >= 0: line = line[:i] # strip chunk-extensions chunk_left = int(line, 16) if chunk_left == 0: break
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?