📄 zipfile.py

📁 mallet是自然语言处理、机器学习领域的一个开源项目。
💻 PY
📖 第 1 页 / 共 2 页
字号:
12 下一页
"Read and write ZIP files."# Written by James C. Ahlstrom jim@interet.com# All rights transferred to CNRI pursuant to the Python contribution agreementimport struct, os, timeimport binasciitry:    import zlib # We may need its compression methodexcept ImportError:    zlib = None__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",           "ZipInfo", "ZipFile", "PyZipFile"]class BadZipfile(Exception):    passerror = BadZipfile      # The exception raised by this module# constants for Zip file compression methodsZIP_STORED = 0ZIP_DEFLATED = 8# Other ZIP compression methods not supported# Here are some struct module formats for reading headersstructEndArchive = "<4s4H2lH"     # 9 items, end of archive, 22 bytesstringEndArchive = "PK\005\006"   # magic number for end of archive recordstructCentralDir = "<4s4B4H3l5H2l"# 19 items, central directory, 46 bytesstringCentralDir = "PK\001\002"   # magic number for central directorystructFileHeader = "<4s2B4H3l2H"  # 12 items, file header record, 30 bytesstringFileHeader = "PK\003\004"   # magic number for file header# indexes of entries in the central directory structure_CD_SIGNATURE = 0_CD_CREATE_VERSION = 1_CD_CREATE_SYSTEM = 2_CD_EXTRACT_VERSION = 3_CD_EXTRACT_SYSTEM = 4                  # is this meaningful?_CD_FLAG_BITS = 5_CD_COMPRESS_TYPE = 6_CD_TIME = 7_CD_DATE = 8_CD_CRC = 9_CD_COMPRESSED_SIZE = 10_CD_UNCOMPRESSED_SIZE = 11_CD_FILENAME_LENGTH = 12_CD_EXTRA_FIELD_LENGTH = 13_CD_COMMENT_LENGTH = 14_CD_DISK_NUMBER_START = 15_CD_INTERNAL_FILE_ATTRIBUTES = 16_CD_EXTERNAL_FILE_ATTRIBUTES = 17_CD_LOCAL_HEADER_OFFSET = 18# indexes of entries in the local file header structure_FH_SIGNATURE = 0_FH_EXTRACT_VERSION = 1_FH_EXTRACT_SYSTEM = 2                  # is this meaningful?_FH_GENERAL_PURPOSE_FLAG_BITS = 3_FH_COMPRESSION_METHOD = 4_FH_LAST_MOD_TIME = 5_FH_LAST_MOD_DATE = 6_FH_CRC = 7_FH_COMPRESSED_SIZE = 8_FH_UNCOMPRESSED_SIZE = 9_FH_FILENAME_LENGTH = 10_FH_EXTRA_FIELD_LENGTH = 11# Used to compare file passed to ZipFileimport types_STRING_TYPES = (types.StringType,)if hasattr(types, "UnicodeType"):    _STRING_TYPES = _STRING_TYPES + (types.UnicodeType,)def is_zipfile(filename):    """Quickly see if file is a ZIP file by checking the magic number.    Will not accept a ZIP archive with an ending comment.    """    try:        fpin = open(filename, "rb")        fpin.seek(-22, 2)               # Seek to end-of-file record        endrec = fpin.read()        fpin.close()        if endrec[0:4] == "PK\005\006" and endrec[-2:] == "\000\000":            return 1    # file has correct magic number    except IOError:        passclass ZipInfo:    """Class with attributes describing each file in the ZIP archive."""    def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):        self.filename = _normpath(filename) # Name of the file in the archive        self.date_time = date_time      # year, month, day, hour, min, sec        # Standard values:        self.compress_type = ZIP_STORED # Type of compression for the file        self.comment = ""               # Comment for each file        self.extra = ""                 # ZIP extra data        self.create_system = 0          # System which created ZIP archive        self.create_version = 20        # Version which created ZIP archive        self.extract_version = 20       # Version needed to extract archive        self.reserved = 0               # Must be zero        self.flag_bits = 0              # ZIP flag bits        self.volume = 0                 # Volume number of file header        self.internal_attr = 0          # Internal attributes        self.external_attr = 0          # External file attributes        # Other attributes are set by class ZipFile:        # header_offset         Byte offset to the file header        # file_offset           Byte offset to the start of the file data        # CRC                   CRC-32 of the uncompressed file        # compress_size         Size of the compressed file        # file_size             Size of the uncompressed file    def FileHeader(self):        """Return the per-file header as a string."""        dt = self.date_time        dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]        dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)        if self.flag_bits & 0x08:            # Set these to zero because we write them after the file data            CRC = compress_size = file_size = 0        else:            CRC = self.CRC            compress_size = self.compress_size            file_size = self.file_size        header = struct.pack(structFileHeader, stringFileHeader,                 self.extract_version, self.reserved, self.flag_bits,                 self.compress_type, dostime, dosdate, CRC,                 compress_size, file_size,                 len(self.filename), len(self.extra))        return header + self.filename + self.extra# This is used to ensure paths in generated ZIP files always use# forward slashes as the directory separator, as required by the# ZIP format specification.if os.sep != "/":    def _normpath(path):        return path.replace(os.sep, "/")else:    def _normpath(path):        return pathclass ZipFile:    """ Class with methods to open, read, write, close, list zip files.    z = ZipFile(file, mode="r", compression=ZIP_STORED)    file: Either the path to the file, or a file-like object.          If it is a path, the file will be opened and closed by ZipFile.    mode: The mode can be either read "r", write "w" or append "a".    compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).    """    fp = None                   # Set here since __del__ checks it    def __init__(self, file, mode="r", compression=ZIP_STORED):        """Open the ZIP file with mode read "r", write "w" or append "a"."""        if compression == ZIP_STORED:            pass        elif compression == ZIP_DEFLATED:            if not zlib:                raise RuntimeError,\                      "Compression requires the (missing) zlib module"        else:            raise RuntimeError, "That compression method is not supported"        self.debug = 0  # Level of printing: 0 through 3        self.NameToInfo = {}    # Find file info given name        self.filelist = []      # List of ZipInfo instances for archive        self.compression = compression  # Method of compression        self.mode = key = mode[0]        # Check if we were passed a file-like object        if type(file) in _STRING_TYPES:            self._filePassed = 0            self.filename = file            modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}            self.fp = open(file, modeDict[mode])        else:            self._filePassed = 1            self.fp = file            self.filename = getattr(file, 'name', None)        if key == 'r':            self._GetContents()        elif key == 'w':            pass        elif key == 'a':            fp = self.fp            fp.seek(-22, 2)             # Seek to end-of-file record            endrec = fp.read()            if endrec[0:4] == stringEndArchive and \                       endrec[-2:] == "\000\000":                self._GetContents()     # file is a zip file                # seek to start of directory and overwrite                fp.seek(self.start_dir, 0)            else:               # file is not a zip file, just append                fp.seek(0, 2)        else:            if not self._filePassed:                self.fp.close()                self.fp = None            raise RuntimeError, 'Mode must be "r", "w" or "a"'    def _GetContents(self):        """Read the directory, making sure we close the file if the format        is bad."""        try:            self._RealGetContents()        except BadZipfile:            if not self._filePassed:                self.fp.close()                self.fp = None            raise    def _RealGetContents(self):        """Read in the table of contents for the ZIP file."""        fp = self.fp        fp.seek(-22, 2)         # Start of end-of-archive record        filesize = fp.tell() + 22       # Get file size        endrec = fp.read(22)    # Archive must not end with a comment!        if endrec[0:4] != stringEndArchive or endrec[-2:] != "\000\000":            raise BadZipfile, "File is not a zip file, or ends with a comment"        endrec = struct.unpack(structEndArchive, endrec)        if self.debug > 1:            print endrec        size_cd = endrec[5]             # bytes in central directory        offset_cd = endrec[6]   # offset of central directory        x = filesize - 22 - size_cd        # "concat" is zero, unless zip was concatenated to another file        concat = x - offset_cd        if self.debug > 2:            print "given, inferred, offset", offset_cd, x, concat        # self.start_dir:  Position of start of central directory        self.start_dir = offset_cd + concat        fp.seek(self.start_dir, 0)        total = 0        while total < size_cd:            centdir = fp.read(46)            total = total + 46            if centdir[0:4] != stringCentralDir:                raise BadZipfile, "Bad magic number for central directory"            centdir = struct.unpack(structCentralDir, centdir)            if self.debug > 2:                print centdir            filename = fp.read(centdir[_CD_FILENAME_LENGTH])            # Create ZipInfo instance to store file information            x = ZipInfo(filename)            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])            total = (total + centdir[_CD_FILENAME_LENGTH]                     + centdir[_CD_EXTRA_FIELD_LENGTH]                     + centdir[_CD_COMMENT_LENGTH])            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat            # file_offset must be computed below...            (x.create_version, x.create_system, x.extract_version, x.reserved,                x.flag_bits, x.compress_type, t, d,                x.CRC, x.compress_size, x.file_size) = centdir[1:12]            x.volume, x.internal_attr, x.external_attr = centdir[15:18]            # Convert date/time code to (year, month, day, hour, min, sec)            x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,                                     t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )            self.filelist.append(x)            self.NameToInfo[x.filename] = x            if self.debug > 2:                print "total", total        for data in self.filelist:            fp.seek(data.header_offset, 0)            fheader = fp.read(30)            if fheader[0:4] != stringFileHeader:                raise BadZipfile, "Bad magic number for file header"            fheader = struct.unpack(structFileHeader, fheader)            # file_offset is computed here, since the extra field for            # the central directory and for the local file header            # refer to different fields, and they can have different            # lengths            data.file_offset = (data.header_offset + 30                                + fheader[_FH_FILENAME_LENGTH]                                + fheader[_FH_EXTRA_FIELD_LENGTH])            fname = fp.read(fheader[_FH_FILENAME_LENGTH])            if fname != data.filename:                raise RuntimeError, \                      'File name in directory "%s" and header "%s" differ.' % (                          data.filename, fname)    def namelist(self):        """Return a list of file names in the archive."""        l = []        for data in self.filelist:            l.append(data.filename)        return l
12 下一页
💿 文件大小 15297 K
👤 上传用户 scorpioll
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#mallet #自然语言处理 #机器学习 #开源
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -