📄 zipfile.py
字号:
"Read and write ZIP files."
# Written by James C. Ahlstrom jim@interet.com
# All rights transferred to CNRI pursuant to the Python contribution agreement
import struct, os, time
import binascii
try:
import zlib # We may need its compression method
except ImportError:
zlib = None
__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
"ZipInfo", "ZipFile", "PyZipFile"]
class BadZipfile(Exception):
pass
error = BadZipfile # The exception raised by this module
# constants for Zip file compression methods
ZIP_STORED = 0
ZIP_DEFLATED = 8
# Other ZIP compression methods not supported
# Here are some struct module formats for reading headers
structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
stringEndArchive = "PK\005\006" # magic number for end of archive record
structCentralDir = "<4s4B4H3l5H2l"# 19 items, central directory, 46 bytes
stringCentralDir = "PK\001\002" # magic number for central directory
structFileHeader = "<4s2B4H3l2H" # 12 items, file header record, 30 bytes
stringFileHeader = "PK\003\004" # magic number for file header
# indexes of entries in the central directory structure
_CD_SIGNATURE = 0
_CD_CREATE_VERSION = 1
_CD_CREATE_SYSTEM = 2
_CD_EXTRACT_VERSION = 3
_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
_CD_FLAG_BITS = 5
_CD_COMPRESS_TYPE = 6
_CD_TIME = 7
_CD_DATE = 8
_CD_CRC = 9
_CD_COMPRESSED_SIZE = 10
_CD_UNCOMPRESSED_SIZE = 11
_CD_FILENAME_LENGTH = 12
_CD_EXTRA_FIELD_LENGTH = 13
_CD_COMMENT_LENGTH = 14
_CD_DISK_NUMBER_START = 15
_CD_INTERNAL_FILE_ATTRIBUTES = 16
_CD_EXTERNAL_FILE_ATTRIBUTES = 17
_CD_LOCAL_HEADER_OFFSET = 18
# indexes of entries in the local file header structure
_FH_SIGNATURE = 0
_FH_EXTRACT_VERSION = 1
_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
_FH_GENERAL_PURPOSE_FLAG_BITS = 3
_FH_COMPRESSION_METHOD = 4
_FH_LAST_MOD_TIME = 5
_FH_LAST_MOD_DATE = 6
_FH_CRC = 7
_FH_COMPRESSED_SIZE = 8
_FH_UNCOMPRESSED_SIZE = 9
_FH_FILENAME_LENGTH = 10
_FH_EXTRA_FIELD_LENGTH = 11
# Used to compare file passed to ZipFile
import types
_STRING_TYPES = (types.StringType,)
if hasattr(types, "UnicodeType"):
_STRING_TYPES = _STRING_TYPES + (types.UnicodeType,)
def is_zipfile(filename):
"""Quickly see if file is a ZIP file by checking the magic number.
Will not accept a ZIP archive with an ending comment.
"""
try:
fpin = open(filename, "rb")
fpin.seek(-22, 2) # Seek to end-of-file record
endrec = fpin.read()
fpin.close()
if endrec[0:4] == "PK\005\006" and endrec[-2:] == "\000\000":
return 1 # file has correct magic number
except IOError:
pass
class ZipInfo:
"""Class with attributes describing each file in the ZIP archive."""
def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
self.filename = _normpath(filename) # Name of the file in the archive
self.date_time = date_time # year, month, day, hour, min, sec
# Standard values:
self.compress_type = ZIP_STORED # Type of compression for the file
self.comment = "" # Comment for each file
self.extra = "" # ZIP extra data
self.create_system = 0 # System which created ZIP archive
self.create_version = 20 # Version which created ZIP archive
self.extract_version = 20 # Version needed to extract archive
self.reserved = 0 # Must be zero
self.flag_bits = 0 # ZIP flag bits
self.volume = 0 # Volume number of file header
self.internal_attr = 0 # Internal attributes
self.external_attr = 0 # External file attributes
# Other attributes are set by class ZipFile:
# header_offset Byte offset to the file header
# file_offset Byte offset to the start of the file data
# CRC CRC-32 of the uncompressed file
# compress_size Size of the compressed file
# file_size Size of the uncompressed file
def FileHeader(self):
"""Return the per-file header as a string."""
dt = self.date_time
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
if self.flag_bits & 0x08:
# Set these to zero because we write them after the file data
CRC = compress_size = file_size = 0
else:
CRC = self.CRC
compress_size = self.compress_size
file_size = self.file_size
header = struct.pack(structFileHeader, stringFileHeader,
self.extract_version, self.reserved, self.flag_bits,
self.compress_type, dostime, dosdate, CRC,
compress_size, file_size,
len(self.filename), len(self.extra))
return header + self.filename + self.extra
# This is used to ensure paths in generated ZIP files always use
# forward slashes as the directory separator, as required by the
# ZIP format specification.
if os.sep != "/":
def _normpath(path):
return path.replace(os.sep, "/")
else:
def _normpath(path):
return path
class ZipFile:
""" Class with methods to open, read, write, close, list zip files.
z = ZipFile(file, mode="r", compression=ZIP_STORED)
file: Either the path to the file, or a file-like object.
If it is a path, the file will be opened and closed by ZipFile.
mode: The mode can be either read "r", write "w" or append "a".
compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
"""
fp = None # Set here since __del__ checks it
def __init__(self, file, mode="r", compression=ZIP_STORED):
"""Open the ZIP file with mode read "r", write "w" or append "a"."""
if compression == ZIP_STORED:
pass
elif compression == ZIP_DEFLATED:
if not zlib:
raise RuntimeError,\
"Compression requires the (missing) zlib module"
else:
raise RuntimeError, "That compression method is not supported"
self.debug = 0 # Level of printing: 0 through 3
self.NameToInfo = {} # Find file info given name
self.filelist = [] # List of ZipInfo instances for archive
self.compression = compression # Method of compression
self.mode = key = mode[0]
# Check if we were passed a file-like object
if type(file) in _STRING_TYPES:
self._filePassed = 0
self.filename = file
modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
self.fp = open(file, modeDict[mode])
else:
self._filePassed = 1
self.fp = file
self.filename = getattr(file, 'name', None)
if key == 'r':
self._GetContents()
elif key == 'w':
pass
elif key == 'a':
fp = self.fp
fp.seek(-22, 2) # Seek to end-of-file record
endrec = fp.read()
if endrec[0:4] == stringEndArchive and \
endrec[-2:] == "\000\000":
self._GetContents() # file is a zip file
# seek to start of directory and overwrite
fp.seek(self.start_dir, 0)
else: # file is not a zip file, just append
fp.seek(0, 2)
else:
if not self._filePassed:
self.fp.close()
self.fp = None
raise RuntimeError, 'Mode must be "r", "w" or "a"'
def _GetContents(self):
"""Read the directory, making sure we close the file if the format
is bad."""
try:
self._RealGetContents()
except BadZipfile:
if not self._filePassed:
self.fp.close()
self.fp = None
raise
def _RealGetContents(self):
"""Read in the table of contents for the ZIP file."""
fp = self.fp
fp.seek(-22, 2) # Start of end-of-archive record
filesize = fp.tell() + 22 # Get file size
endrec = fp.read(22) # Archive must not end with a comment!
if endrec[0:4] != stringEndArchive or endrec[-2:] != "\000\000":
raise BadZipfile, "File is not a zip file, or ends with a comment"
endrec = struct.unpack(structEndArchive, endrec)
if self.debug > 1:
print endrec
size_cd = endrec[5] # bytes in central directory
offset_cd = endrec[6] # offset of central directory
x = filesize - 22 - size_cd
# "concat" is zero, unless zip was concatenated to another file
concat = x - offset_cd
if self.debug > 2:
print "given, inferred, offset", offset_cd, x, concat
# self.start_dir: Position of start of central directory
self.start_dir = offset_cd + concat
fp.seek(self.start_dir, 0)
total = 0
while total < size_cd:
centdir = fp.read(46)
total = total + 46
if centdir[0:4] != stringCentralDir:
raise BadZipfile, "Bad magic number for central directory"
centdir = struct.unpack(structCentralDir, centdir)
if self.debug > 2:
print centdir
filename = fp.read(centdir[_CD_FILENAME_LENGTH])
# Create ZipInfo instance to store file information
x = ZipInfo(filename)
x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
total = (total + centdir[_CD_FILENAME_LENGTH]
+ centdir[_CD_EXTRA_FIELD_LENGTH]
+ centdir[_CD_COMMENT_LENGTH])
x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat
# file_offset must be computed below...
(x.create_version, x.create_system, x.extract_version, x.reserved,
x.flag_bits, x.compress_type, t, d,
x.CRC, x.compress_size, x.file_size) = centdir[1:12]
x.volume, x.internal_attr, x.external_attr = centdir[15:18]
# Convert date/time code to (year, month, day, hour, min, sec)
x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
self.filelist.append(x)
self.NameToInfo[x.filename] = x
if self.debug > 2:
print "total", total
for data in self.filelist:
fp.seek(data.header_offset, 0)
fheader = fp.read(30)
if fheader[0:4] != stringFileHeader:
raise BadZipfile, "Bad magic number for file header"
fheader = struct.unpack(structFileHeader, fheader)
# file_offset is computed here, since the extra field for
# the central directory and for the local file header
# refer to different fields, and they can have different
# lengths
data.file_offset = (data.header_offset + 30
+ fheader[_FH_FILENAME_LENGTH]
+ fheader[_FH_EXTRA_FIELD_LENGTH])
fname = fp.read(fheader[_FH_FILENAME_LENGTH])
if fname != data.filename:
raise RuntimeError, \
'File name in directory "%s" and header "%s" differ.' % (
data.filename, fname)
def namelist(self):
"""Return a list of file names in the archive."""
l = []
for data in self.filelist:
l.append(data.filename)
return l
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -