📄 codecs.py
字号:
self.reader = Reader(stream, errors) self.writer = Writer(stream, errors) self.errors = errors def read(self, size=-1): return self.reader.read(size) def readline(self, size=None): return self.reader.readline(size) def readlines(self, sizehint=None): return self.reader.readlines(sizehint) def write(self, data): return self.writer.write(data) def writelines(self, list): return self.writer.writelines(list) def reset(self): self.reader.reset() self.writer.reset() def __getattr__(self, name, getattr=getattr): """ Inherit all other methods from the underlying stream. """ return getattr(self.stream, name)###class StreamRecoder: """ StreamRecoder instances provide a frontend - backend view of encoding data. They use the complete set of APIs returned by the codecs.lookup() function to implement their task. Data written to the stream is first decoded into an intermediate format (which is dependent on the given codec combination) and then written to the stream using an instance of the provided Writer class. In the other direction, data is read from the stream using a Reader instance and then return encoded data to the caller. """ # Optional attributes set by the file wrappers below data_encoding = 'unknown' file_encoding = 'unknown' def __init__(self, stream, encode, decode, Reader, Writer, errors='strict'): """ Creates a StreamRecoder instance which implements a two-way conversion: encode and decode work on the frontend (the input to .read() and output of .write()) while Reader and Writer work on the backend (reading and writing to the stream). You can use these objects to do transparent direct recodings from e.g. latin-1 to utf-8 and back. stream must be a file-like object. encode, decode must adhere to the Codec interface, Reader, Writer must be factory functions or classes providing the StreamReader, StreamWriter interface resp. encode and decode are needed for the frontend translation, Reader and Writer for the backend translation. Unicode is used as intermediate encoding. Error handling is done in the same way as defined for the StreamWriter/Readers. """ self.stream = stream self.encode = encode self.decode = decode self.reader = Reader(stream, errors) self.writer = Writer(stream, errors) self.errors = errors def read(self, size=-1): data = self.reader.read(size) data, bytesencoded = self.encode(data, self.errors) return data def readline(self, size=None): if size is None: data = self.reader.readline() else: data = self.reader.readline(size) data, bytesencoded = self.encode(data, self.errors) return data def readlines(self, sizehint=None): if sizehint is None: data = self.reader.read() else: data = self.reader.read(sizehint) data, bytesencoded = self.encode(data, self.errors) return data.splitlines(1) def write(self, data): data, bytesdecoded = self.decode(data, self.errors) return self.writer.write(data) def writelines(self, list): data = ''.join(list) data, bytesdecoded = self.decode(data, self.errors) return self.writer.write(data) def reset(self): self.reader.reset() self.writer.reset() def __getattr__(self, name, getattr=getattr): """ Inherit all other methods from the underlying stream. """ return getattr(self.stream, name)### Shortcutsdef open(filename, mode='rb', encoding=None, errors='strict', buffering=1): """ Open an encoded file using the given mode and return a wrapped version providing transparent encoding/decoding. Note: The wrapped version will only accept the object format defined by the codecs, i.e. Unicode objects for most builtin codecs. Output is also codec dependent and will usually by Unicode as well. Files are always opened in binary mode, even if no binary mode was specified. Thisis done to avoid data loss due to encodings using 8-bit values. The default file mode is 'rb' meaning to open the file in binary read mode. encoding specifies the encoding which is to be used for the the file. errors may be given to define the error handling. It defaults to 'strict' which causes ValueErrors to be raised in case an encoding error occurs. buffering has the same meaning as for the builtin open() API. It defaults to line buffered. The returned wrapped file object provides an extra attribute .encoding which allows querying the used encoding. This attribute is only available if an encoding was specified as parameter. """ if encoding is not None and \ 'b' not in mode: # Force opening of the file in binary mode mode = mode + 'b' file = __builtin__.open(filename, mode, buffering) if encoding is None: return file (e, d, sr, sw) = lookup(encoding) srw = StreamReaderWriter(file, sr, sw, errors) # Add attributes to simplify introspection srw.encoding = encoding return srwdef EncodedFile(file, data_encoding, file_encoding=None, errors='strict'): """ Return a wrapped version of file which provides transparent encoding translation. Strings written to the wrapped file are interpreted according to the given data_encoding and then written to the original file as string using file_encoding. The intermediate encoding will usually be Unicode but depends on the specified codecs. Strings are read from the file using file_encoding and then passed back to the caller as string using data_encoding. If file_encoding is not given, it defaults to data_encoding. errors may be given to define the error handling. It defaults to 'strict' which causes ValueErrors to be raised in case an encoding error occurs. The returned wrapped file object provides two extra attributes .data_encoding and .file_encoding which reflect the given parameters of the same name. The attributes can be used for introspection by Python programs. """ if file_encoding is None: file_encoding = data_encoding encode, decode = lookup(data_encoding)[:2] Reader, Writer = lookup(file_encoding)[2:] sr = StreamRecoder(file, encode, decode, Reader, Writer, errors) # Add attributes to simplify introspection sr.data_encoding = data_encoding sr.file_encoding = file_encoding return sr### Helpers for codec lookupdef getencoder(encoding): """ Lookup up the codec for the given encoding and return its encoder function. Raises a LookupError in case the encoding cannot be found. """ return lookup(encoding)[0]def getdecoder(encoding): """ Lookup up the codec for the given encoding and return its decoder function. Raises a LookupError in case the encoding cannot be found. """ return lookup(encoding)[1]def getreader(encoding): """ Lookup up the codec for the given encoding and return its StreamReader class or factory function. Raises a LookupError in case the encoding cannot be found. """ return lookup(encoding)[2]def getwriter(encoding): """ Lookup up the codec for the given encoding and return its StreamWriter class or factory function. Raises a LookupError in case the encoding cannot be found. """ return lookup(encoding)[3]### Helpers for charmap-based codecsdef make_identity_dict(rng): """ make_identity_dict(rng) -> dict Return a dictionary where elements of the rng sequence are mapped to themselves. """ res = {} for i in rng: res[i]=i return resdef make_encoding_map(decoding_map): """ Creates an encoding map from a decoding map. If a target mapping in the decoding map occurrs multiple times, then that target is mapped to None (undefined mapping), causing an exception when encountered by the charmap codec during translation. One example where this happens is cp875.py which decodes multiple character to \u001a. """ m = {} for k,v in decoding_map.items(): if not m.has_key(v): m[v] = k else: m[v] = None return m# Tell modulefinder that using codecs probably needs the encodings# package_false = 0if _false: import encodings### Testsif __name__ == '__main__': import sys # Make stdout translate Latin-1 output into UTF-8 output sys.stdout = EncodedFile(sys.stdout, 'latin-1', 'utf-8') # Have stdin translate Latin-1 input into UTF-8 input sys.stdin = EncodedFile(sys.stdin, 'utf-8', 'latin-1')
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -