litezip.c

来自「是一个C语言的压缩和解压缩的DLL源程序.」· C语言 代码 · 共 1,988 行 · 第 1/5 页

C
1,988
字号
#include <windows.h>
#include "../LiteZip.h"
#define IDS_OK        20
#define IDS_UNKNOWN   21

// ================== CONSTANTS =====================

typedef unsigned char UCH;      // unsigned 8-bit value
typedef unsigned short USH;     // unsigned 16-bit value
typedef unsigned long ULG;      // unsigned 32-bit value

// internal file attribute
#define UNKNOWN (-1)
#define BINARY  0
#define ASCII   1

#define BEST		-1		// Use best method (deflation or store)
#define STORE		0		// Store method
#define DEFLATE		8		// Deflation method

// MSDOS file or directory attributes
#define MSDOS_HIDDEN_ATTR	0x02
#define MSDOS_DIR_ATTR		0x10

// Lengths of headers after signatures in bytes
#define LOCHEAD		26
#define CENHEAD		42
#define ENDHEAD		18

// Definitions for extra field handling:
#define EB_HEADSIZE			4		// length of a extra field block header
#define EB_LEN				2		// offset of data length field in header
#define EB_UT_MINLEN		1		// minimal UT field contains Flags byte
#define EB_UT_FLAGS			0		// byte offset of Flags field
#define EB_UT_TIME1			1		// byte offset of 1st time value
#define EB_UT_FL_MTIME		(1 << 0)	// mtime present
#define EB_UT_FL_ATIME		(1 << 1)	// atime present
#define EB_UT_FL_CTIME		(1 << 2)	// ctime present
#define EB_UT_LEN(n)		(EB_UT_MINLEN + 4 * (n))
#define EB_L_UT_SIZE		(EB_HEADSIZE + EB_UT_LEN(3))
#define EB_C_UT_SIZE		(EB_HEADSIZE + EB_UT_LEN(1))

// Signatures for zip file information headers
#define LOCSIG     0x04034b50L
#define CENSIG     0x02014b50L
#define ENDSIG     0x06054b50L
#define EXTLOCSIG  0x08074b50L

// The minimum and maximum match lengths
#define MIN_MATCH  3
#define MAX_MATCH  258

// Maximum window size = 32K. If you are really short of memory, compile
// with a smaller WSIZE but this reduces the compression ratio for files
// of size > WSIZE. WSIZE must be a power of two in the current implementation.
#define WSIZE  (0x8000)

// Minimum amount of lookahead, except at the end of the input file.
// See deflate.c for comments about the MIN_MATCH+1.
#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)

// In order to simplify the code, particularly on 16 bit machines, match
// distances are limited to MAX_DIST instead of WSIZE.
#define MAX_DIST  (WSIZE-MIN_LOOKAHEAD)

// All codes must not exceed MAX_BITS bits
#define MAX_BITS		15

// Bit length codes must not exceed MAX_BL_BITS bits
#define MAX_BL_BITS		7

// number of length codes, not counting the special END_BLOCK code
#define LENGTH_CODES	29

// number of literal bytes 0..255
#define LITERALS		256

// end of block literal code
#define END_BLOCK		256

// number of Literal or Length codes, including the END_BLOCK code
#define L_CODES			(LITERALS+1+LENGTH_CODES)

// number of distance codes
#define D_CODES			30

// number of codes used to transfer the bit lengths
#define BL_CODES		19

// The three kinds of block type
#define STORED_BLOCK	0
#define STATIC_TREES	1
#define DYN_TREES		2

// Sizes of match buffers for literals/lengths and distances.  There are
// 4 reasons for limiting LIT_BUFSIZE to 64K:
//   - frequencies can be kept in 16 bit counters
//   - if compression is not successful for the first block, all input data is
//     still in the window so we can still emit a stored block even when input
//     comes from standard input.  (This can also be done for all blocks if
//     LIT_BUFSIZE is not greater than 32K.)
//   - if compression is not successful for a file smaller than 64K, we can
//     even emit a stored file instead of a stored block (saving 5 bytes).
//   - creating new Huffman trees less frequently may not provide fast
//     adaptation to changes in the input data statistics. (Take for
//     example a binary file with poorly compressible code followed by
//     a highly compressible string table.) Smaller buffer sizes give
//     fast adaptation but have of course the overhead of transmitting trees
//     more frequently.
//   - I can't count above 4
// The current code is general and allows DIST_BUFSIZE < LIT_BUFSIZE (to save
// memory at the expense of compression). Some optimizations would be possible
// if we rely on DIST_BUFSIZE == LIT_BUFSIZE.
#define LIT_BUFSIZE		0x8000
#define DIST_BUFSIZE	LIT_BUFSIZE

// repeat previous bit length 3-6 times (2 bits of repeat count)
#define REP_3_6			16

// repeat a zero length 3-10 times  (3 bits of repeat count)
#define REPZ_3_10		17

// repeat a zero length 11-138 times  (7 bits of repeat count)
#define REPZ_11_138		18

// maximum heap size
#define HEAP_SIZE		(2*L_CODES+1)

// Number of bits used within bi_buf. (bi_buf may be implemented on
// more than 16 bits on some systems.)
#define ZIP_BUF_SIZE	(8 * 2*sizeof(char))

// For portability to 16 bit machines, do not use values above 15.
#define HASH_BITS	15

#define HASH_SIZE	(unsigned)(1<<HASH_BITS)
#define HASH_MASK	(HASH_SIZE-1)
#define WMASK		(WSIZE-1)
// HASH_SIZE and WSIZE must be powers of two

#define FAST		4
#define SLOW		2
// speed options for the general purpose bit flag

#define TOO_FAR		4096
// Matches of length 3 are discarded if their distance exceeds TOO_FAR

// Number of bits by which ins_h and del_h must be shifted at each
// input step. It must be such that after MIN_MATCH steps, the oldest
// byte no longer takes part in the hash key, that is:
//   H_SHIFT * MIN_MATCH >= HASH_BITS
#define H_SHIFT		((HASH_BITS+MIN_MATCH-1)/MIN_MATCH)

// Index within the heap array of least frequent node in the Huffman tree
#define SMALLEST 1















// ================== STRUCTS =====================

typedef struct {
	USH good_length; // reduce lazy search above this match length
	USH max_lazy;    // do not perform lazy search above this match length
	USH nice_length; // quit search above this match length
	USH max_chain;
} CONFIG;

// Data structure describing a single value and its code string.
typedef struct {
	union {
		USH		freq;		// frequency count
		USH		code;		// bit string
	} fc;
	union {
		USH		dad;		// father node in Huffman tree
		USH		len;		// length of bit string
	} dl;
} CT_DATA;

typedef struct {
	CT_DATA		*dyn_tree;		// the dynamic tree
	CT_DATA		*static_tree;	// corresponding static tree or NULL
	const int	*extra_bits;	// extra bits for each code or NULL
	int			extra_base;		// base index for extra_bits
	int			elems;			// max number of elements in the tree
	int			max_length;		// max bit length for the codes
	int			max_code;		// largest code with non zero frequency
} TREE_DESC;

typedef struct
{
	// ... Since the bit lengths are imposed, there is no need for the L_CODES
	// extra codes used during heap construction. However the codes 286 and 287
	// are needed to build a canonical tree (see ct_init below).
	CT_DATA		dyn_ltree[HEAP_SIZE];		// literal and length tree
	CT_DATA		dyn_dtree[2*D_CODES+1];		// distance tree
	CT_DATA		static_ltree[L_CODES+2];	// the static literal tree...
	CT_DATA		static_dtree[D_CODES];		// the static distance tree...
	// ... (Actually a trivial tree since all codes use 5 bits.)
	CT_DATA		bl_tree[2*BL_CODES+1];		// Huffman tree for the bit lengths

	TREE_DESC	l_desc;
	TREE_DESC	d_desc;
	TREE_DESC	bl_desc;

	USH			bl_count[MAX_BITS+1];	// number of codes at each bit length for an optimal tree

	// The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
	// The same heap array is used to build all trees.
	int			heap[2*L_CODES+1];		// heap used to build the Huffman trees
	int			heap_len;				// number of elements in the heap
	int			heap_max;				// element of largest frequency

	// Depth of each subtree used as tie breaker for trees of equal frequency
	UCH			depth[2*L_CODES+1];

	UCH			length_code[MAX_MATCH-MIN_MATCH+1];
	// length code for each normalized match length (0 == MIN_MATCH)

	// distance codes. The first 256 values correspond to the distances
	// 3 .. 258, the last 256 values correspond to the top 8 bits of
	// the 15 bit distances.
	UCH			dist_code[512];

	// First normalized length for each code (0 = MIN_MATCH)
	int			base_length[LENGTH_CODES];

	// First normalized distance for each code (0 = distance of 1)
	int			base_dist[D_CODES];

	UCH			l_buf[LIT_BUFSIZE];		// buffer for literals/lengths
	USH			d_buf[DIST_BUFSIZE];	// buffer for distances

	// flag_buf is a bit array distinguishing literals from lengths in
	// l_buf, and thus indicating the presence or absence of a distance.
	UCH			flag_buf[(LIT_BUFSIZE/8)];

	// bits are filled in flags starting at bit 0 (least significant).
	// Note: these flags are overkill in the current code since we don't
	// take advantage of DIST_BUFSIZE == LIT_BUFSIZE.
	unsigned	last_lit;			// running index in l_buf
	unsigned	last_dist;			// running index in d_buf
	unsigned	last_flags;			// running index in flag_buf
	UCH			flags;				// current flags not yet saved in flag_buf
	UCH			flag_bit;			// current bit used in flags

	ULG			opt_len;			// bit length of current block with optimal trees
	ULG			static_len;			// bit length of current block with static trees

	ULG			cmpr_bytelen;		// total byte length of compressed file (within the ZIP)
	ULG			cmpr_len_bits;		// number of bits past 'cmpr_bytelen'

	USH			*file_type;			// pointer to UNKNOWN, BINARY or ASCII

#ifndef NDEBUG
	// input_len is for debugging only since we can't get it by other means.
	ULG			input_len;			// total byte length of source file
#endif

} TTREESTATE;

typedef struct
{
	unsigned	bi_buf;			// Output buffer. bits are inserted starting at the bottom (least significant
								// bits). The width of bi_buf must be at least 16 bits.
	int			bi_valid;		// Number of valid bits in bi_buf. All bits above the last valid bit are always zero.
	char		*out_buf;		// Current output buffer.
	DWORD		out_offset;		// Current offset in output buffer
	DWORD		out_size;		// Size of current output buffer
#ifndef NDEBUG
	ULG			bits_sent;		// bit length of the compressed data  only needed for debugging
#endif
} TBITSTATE;

typedef struct
{
	// Sliding window. Input bytes are read into the second half of the window,
	// and move to the first half later to keep a dictionary of at least WSIZE
	// bytes. With this organization, matches are limited to a distance of
	// WSIZE-MAX_MATCH bytes, but this ensures that IO is always
	// performed with a length multiple of the block size. Also, it limits
	// the window size to 64K, which is quite useful on MSDOS.
	// To do: limit the window size to WSIZE+CBSZ if SMALL_MEM (the code would
	// be less efficient since the data would have to be copied WSIZE/CBSZ times)
	UCH				window[2L*WSIZE];

	// Link to older string with same hash index. To limit the size of this
	// array to 64K, this link is maintained only for the last 32K strings.
	// An index in this array is thus a window index modulo 32K.
	unsigned		prev[WSIZE];

	// Heads of the hash chains or 0. If your compiler thinks that
	// HASH_SIZE is a dynamic value, recompile with -DDYN_ALLOC.
	unsigned		head[HASH_SIZE];

	// window size, 2*WSIZE except for MMAP or BIG_MEM, where it is the
	// input file length plus MIN_LOOKAHEAD.
	ULG				window_size;

	// window position at the beginning of the current output block. Gets
	// negative when the window is moved backwards.
	long			block_start;

	// hash index of string to be inserted
	unsigned		ins_h;

	// Length of the best match at previous step. Matches not greater than this
	// are discarded. This is used in the lazy match evaluation.
	unsigned int	prev_length;

	unsigned		strstart;		// start of string to insert
	unsigned		match_start;	// start of matching string
	unsigned		lookahead;		// number of valid bytes ahead in window

	// To speed up deflation, hash chains are never searched beyond this length.
	// A higher limit improves compression ratio but degrades the speed.
	unsigned		max_chain_length;

	// Attempt to find a better match only when the current match is strictly
	// smaller than this value. This mechanism is used only for compression
	// levels >= 4.
	unsigned int	max_lazy_match;
	unsigned		good_match;		// Use a faster search when the previous match is longer than this
	int				nice_match;		// Stop searching when current match exceeds this

	unsigned char	eofile;			// flag set at end of source file
	unsigned char	sliding;		// Set to false when the source is already in memory

} TDEFLATESTATE;

typedef struct
{
	struct _TZIP	*tzip;
	TTREESTATE		ts;
	TBITSTATE		bs;
	TDEFLATESTATE	ds;
#ifndef NDEBUG
	const char		*err;
#endif
	unsigned char	level;		// compression level
//	unsigned char	seekable;	// 1 if we can seek() in the source
}  TSTATE;

typedef long lutime_t;       // define it ourselves since we don't include time.h

// Holds the Access, Modify, Create times, and DOS timestamp. Also the file attributes
typedef struct {
	lutime_t		atime, mtime, ctime;
	unsigned long	timestamp;
	unsigned long	attributes;
} IZTIMES;

// For storing values to be written to the ZIP Central Directory. Note: We write
// default values for some of the fields
typedef struct _TZIPFILEINFO {
	USH			flg, how;
	ULG			tim, crc, siz, len;
	DWORD		nam, ext, cext;			// offset of ext must be >= LOCHEAD
	USH			dsk, att, lflg;			// offset of lflg must be >= LOCHEAD
	ULG			atx, off;
	char		*extra;					// Extra field (set only if ext != 0)
	char		*cextra;				// Extra in central (set only if cext != 0)
	char		iname[MAX_PATH];		// Internal file name after cleanup
	struct _TZIPFILEINFO	*nxt;		// Pointer to next header in list
} TZIPFILEINFO;

// For TZIP->flags
#define TZIP_DESTMEMORY			0x0000001	// Set if TZIP->destination is memory, instead of a file, handle
#define TZIP_DESTCLOSEFH		0x0000002	// Set if we open the file handle in zipCreate() and must close it later
#define TZIP_CANSEEK			0x0000004	// Set if the destination (where we write the ZIP) is seekable
#define TZIP_DONECENTRALDIR		0x0000008	// Set after we've written out the Central Directory
#define TZIP_ENCRYPT			0x0000010	// Set if we should apply encrpytion to the output
#define TZIP_SRCCANSEEK			0x0000020	// Set if source (that supplies the data to add to the ZIP file) is seekable
#define TZIP_SRCCLOSEFH			0x0000040	// Set if we've opened the source file handle, and therefore need to close it
#define TZIP_SRCMEMORY			0x0000080	// Set if TZIP->source is memory, instead of a file, handle
//#define TZIP_OPTION_GZIP		0x8000000	// Defined in LiteZip.h. Must not be used for another purpose

typedef struct _TZIP
{
	DWORD		flags;

	// ====================================================================
	// These variables are for the destination (that we're writing the ZIP to).
	// We can write to pipe, file-by-handle, file-by-name, memory-to-memmapfile
	HANDLE		destination;	// If not TZIP_DESTMEMORY, this is the handle to the zip file we write to. Otherwise.
								// it points to a memory buffer where we write the zip.

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?