📄 zipfile.py
字号:
"Read and write ZIP files."# Written by James C. Ahlstrom jim@interet.com# All rights transferred to CNRI pursuant to the Python contribution agreementimport struct, os, timeimport binasciitry: import zlib # We may need its compression methodexcept ImportError: zlib = None__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile"]class BadZipfile(Exception): passerror = BadZipfile # The exception raised by this module# constants for Zip file compression methodsZIP_STORED = 0ZIP_DEFLATED = 8# Other ZIP compression methods not supported# Here are some struct module formats for reading headersstructEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytesstringEndArchive = "PK\005\006" # magic number for end of archive recordstructCentralDir = "<4s4B4H3l5H2l"# 19 items, central directory, 46 bytesstringCentralDir = "PK\001\002" # magic number for central directorystructFileHeader = "<4s2B4H3l2H" # 12 items, file header record, 30 bytesstringFileHeader = "PK\003\004" # magic number for file header# indexes of entries in the central directory structure_CD_SIGNATURE = 0_CD_CREATE_VERSION = 1_CD_CREATE_SYSTEM = 2_CD_EXTRACT_VERSION = 3_CD_EXTRACT_SYSTEM = 4 # is this meaningful?_CD_FLAG_BITS = 5_CD_COMPRESS_TYPE = 6_CD_TIME = 7_CD_DATE = 8_CD_CRC = 9_CD_COMPRESSED_SIZE = 10_CD_UNCOMPRESSED_SIZE = 11_CD_FILENAME_LENGTH = 12_CD_EXTRA_FIELD_LENGTH = 13_CD_COMMENT_LENGTH = 14_CD_DISK_NUMBER_START = 15_CD_INTERNAL_FILE_ATTRIBUTES = 16_CD_EXTERNAL_FILE_ATTRIBUTES = 17_CD_LOCAL_HEADER_OFFSET = 18# indexes of entries in the local file header structure_FH_SIGNATURE = 0_FH_EXTRACT_VERSION = 1_FH_EXTRACT_SYSTEM = 2 # is this meaningful?_FH_GENERAL_PURPOSE_FLAG_BITS = 3_FH_COMPRESSION_METHOD = 4_FH_LAST_MOD_TIME = 5_FH_LAST_MOD_DATE = 6_FH_CRC = 7_FH_COMPRESSED_SIZE = 8_FH_UNCOMPRESSED_SIZE = 9_FH_FILENAME_LENGTH = 10_FH_EXTRA_FIELD_LENGTH = 11# Used to compare file passed to ZipFileimport types_STRING_TYPES = (types.StringType,)if hasattr(types, "UnicodeType"): _STRING_TYPES = _STRING_TYPES + (types.UnicodeType,)def is_zipfile(filename): """Quickly see if file is a ZIP file by checking the magic number. Will not accept a ZIP archive with an ending comment. """ try: fpin = open(filename, "rb") fpin.seek(-22, 2) # Seek to end-of-file record endrec = fpin.read() fpin.close() if endrec[0:4] == "PK\005\006" and endrec[-2:] == "\000\000": return 1 # file has correct magic number except IOError: passclass ZipInfo: """Class with attributes describing each file in the ZIP archive.""" def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): self.filename = _normpath(filename) # Name of the file in the archive self.date_time = date_time # year, month, day, hour, min, sec # Standard values: self.compress_type = ZIP_STORED # Type of compression for the file self.comment = "" # Comment for each file self.extra = "" # ZIP extra data self.create_system = 0 # System which created ZIP archive self.create_version = 20 # Version which created ZIP archive self.extract_version = 20 # Version needed to extract archive self.reserved = 0 # Must be zero self.flag_bits = 0 # ZIP flag bits self.volume = 0 # Volume number of file header self.internal_attr = 0 # Internal attributes self.external_attr = 0 # External file attributes # Other attributes are set by class ZipFile: # header_offset Byte offset to the file header # file_offset Byte offset to the start of the file data # CRC CRC-32 of the uncompressed file # compress_size Size of the compressed file # file_size Size of the uncompressed file def FileHeader(self): """Return the per-file header as a string.""" dt = self.date_time dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) if self.flag_bits & 0x08: # Set these to zero because we write them after the file data CRC = compress_size = file_size = 0 else: CRC = self.CRC compress_size = self.compress_size file_size = self.file_size header = struct.pack(structFileHeader, stringFileHeader, self.extract_version, self.reserved, self.flag_bits, self.compress_type, dostime, dosdate, CRC, compress_size, file_size, len(self.filename), len(self.extra)) return header + self.filename + self.extra# This is used to ensure paths in generated ZIP files always use# forward slashes as the directory separator, as required by the# ZIP format specification.if os.sep != "/": def _normpath(path): return path.replace(os.sep, "/")else: def _normpath(path): return pathclass ZipFile: """ Class with methods to open, read, write, close, list zip files. z = ZipFile(file, mode="r", compression=ZIP_STORED) file: Either the path to the file, or a file-like object. If it is a path, the file will be opened and closed by ZipFile. mode: The mode can be either read "r", write "w" or append "a". compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib). """ fp = None # Set here since __del__ checks it def __init__(self, file, mode="r", compression=ZIP_STORED): """Open the ZIP file with mode read "r", write "w" or append "a".""" if compression == ZIP_STORED: pass elif compression == ZIP_DEFLATED: if not zlib: raise RuntimeError,\ "Compression requires the (missing) zlib module" else: raise RuntimeError, "That compression method is not supported" self.debug = 0 # Level of printing: 0 through 3 self.NameToInfo = {} # Find file info given name self.filelist = [] # List of ZipInfo instances for archive self.compression = compression # Method of compression self.mode = key = mode[0] # Check if we were passed a file-like object if type(file) in _STRING_TYPES: self._filePassed = 0 self.filename = file modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'} self.fp = open(file, modeDict[mode]) else: self._filePassed = 1 self.fp = file self.filename = getattr(file, 'name', None) if key == 'r': self._GetContents() elif key == 'w': pass elif key == 'a': fp = self.fp fp.seek(-22, 2) # Seek to end-of-file record endrec = fp.read() if endrec[0:4] == stringEndArchive and \ endrec[-2:] == "\000\000": self._GetContents() # file is a zip file # seek to start of directory and overwrite fp.seek(self.start_dir, 0) else: # file is not a zip file, just append fp.seek(0, 2) else: if not self._filePassed: self.fp.close() self.fp = None raise RuntimeError, 'Mode must be "r", "w" or "a"' def _GetContents(self): """Read the directory, making sure we close the file if the format is bad.""" try: self._RealGetContents() except BadZipfile: if not self._filePassed: self.fp.close() self.fp = None raise def _RealGetContents(self): """Read in the table of contents for the ZIP file.""" fp = self.fp fp.seek(-22, 2) # Start of end-of-archive record filesize = fp.tell() + 22 # Get file size endrec = fp.read(22) # Archive must not end with a comment! if endrec[0:4] != stringEndArchive or endrec[-2:] != "\000\000": raise BadZipfile, "File is not a zip file, or ends with a comment" endrec = struct.unpack(structEndArchive, endrec) if self.debug > 1: print endrec size_cd = endrec[5] # bytes in central directory offset_cd = endrec[6] # offset of central directory x = filesize - 22 - size_cd # "concat" is zero, unless zip was concatenated to another file concat = x - offset_cd if self.debug > 2: print "given, inferred, offset", offset_cd, x, concat # self.start_dir: Position of start of central directory self.start_dir = offset_cd + concat fp.seek(self.start_dir, 0) total = 0 while total < size_cd: centdir = fp.read(46) total = total + 46 if centdir[0:4] != stringCentralDir: raise BadZipfile, "Bad magic number for central directory" centdir = struct.unpack(structCentralDir, centdir) if self.debug > 2: print centdir filename = fp.read(centdir[_CD_FILENAME_LENGTH]) # Create ZipInfo instance to store file information x = ZipInfo(filename) x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) total = (total + centdir[_CD_FILENAME_LENGTH] + centdir[_CD_EXTRA_FIELD_LENGTH] + centdir[_CD_COMMENT_LENGTH]) x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat # file_offset must be computed below... (x.create_version, x.create_system, x.extract_version, x.reserved, x.flag_bits, x.compress_type, t, d, x.CRC, x.compress_size, x.file_size) = centdir[1:12] x.volume, x.internal_attr, x.external_attr = centdir[15:18] # Convert date/time code to (year, month, day, hour, min, sec) x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) self.filelist.append(x) self.NameToInfo[x.filename] = x if self.debug > 2: print "total", total for data in self.filelist: fp.seek(data.header_offset, 0) fheader = fp.read(30) if fheader[0:4] != stringFileHeader: raise BadZipfile, "Bad magic number for file header" fheader = struct.unpack(structFileHeader, fheader) # file_offset is computed here, since the extra field for # the central directory and for the local file header # refer to different fields, and they can have different # lengths data.file_offset = (data.header_offset + 30 + fheader[_FH_FILENAME_LENGTH] + fheader[_FH_EXTRA_FIELD_LENGTH]) fname = fp.read(fheader[_FH_FILENAME_LENGTH]) if fname != data.filename: raise RuntimeError, \ 'File name in directory "%s" and header "%s" differ.' % ( data.filename, fname) def namelist(self): """Return a list of file names in the archive.""" l = [] for data in self.filelist: l.append(data.filename) return l
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -