litezip.c

来自「是一个C语言的压缩和解压缩的DLL源程序.」· C语言 代码 · 共 1,988 行 · 第 1/5 页

C
1,988
字号


/************************ ct_init() ******************
 * Allocates the match buffer, initializes the various
 * tables and saves the location of the internal file
 * attribute (ascii/binary) and method (DEFLATE/STORE).
 */

static void ct_init(register TSTATE *state, USH *attr)
{
	int		n;			// iterates over tree elements
	int		bits;		// bit counter
	int		length;		// length value
	int		code;		// code value
	int		dist;		// distance index

	state->ts.file_type = attr;

	state->ts.cmpr_bytelen = state->ts.cmpr_len_bits = 0;
#ifndef NDEBUG
	state->ts.input_len = 0;
#endif

	if (state->ts.static_dtree[0].dl.len) return;	// ct_init already called

	// Initialize the mapping length (0..255) -> length code (0..28)
	length = 0;
	for (code = 0; code < LENGTH_CODES - 1; code++)
	{
		state->ts.base_length[code] = length;
		for (n = 0; n < (1 << Extra_lbits[code]); n++)
			state->ts.length_code[length++] = (UCH)code;
	}

	// Note that the length 255 (match length 258) can be represented
	// in two different ways: code 284 + 5 bits or code 285, so we
	// overwrite length_code[255] to use the best encoding:
	state->ts.length_code[length - 1] = (UCH)code;

	// Initialize the mapping dist (0..32K) -> dist code (0..29)
	dist = 0;
	for (code = 0 ; code < 16; code++)
	{
		state->ts.base_dist[code] = dist;
		for (n = 0; n < (1 << Extra_dbits[code]); n++)
			state->ts.dist_code[dist++] = (UCH)code;
	}

	dist >>= 7; // from now on, all distances are divided by 128
	for ( ; code < D_CODES; code++)
	{
		state->ts.base_dist[code] = dist << 7;
		for (n = 0; n < (1 << (Extra_dbits[code] - 7)); n++)
			state->ts.dist_code[256 + dist++] = (UCH)code;
	}

	// Construct the codes of the static literal tree
	for (bits = 0; bits <= MAX_BITS; bits++) state->ts.bl_count[bits] = 0;
	n = 0;
	while (n <= 143) state->ts.static_ltree[n++].dl.len = 8, state->ts.bl_count[8]++;
	while (n <= 255) state->ts.static_ltree[n++].dl.len = 9, state->ts.bl_count[9]++;
	while (n <= 279) state->ts.static_ltree[n++].dl.len = 7, state->ts.bl_count[7]++;
	while (n <= 287) state->ts.static_ltree[n++].dl.len = 8, state->ts.bl_count[8]++;
	// fc.codes 286 and 287 do not exist, but we must include them in the
	// tree construction to get a canonical Huffman tree (longest code
	// all ones)
	gen_codes(state, (CT_DATA *)state->ts.static_ltree, L_CODES+1);

	// The static distance tree is trivial
	for (n = 0; n < D_CODES; n++)
	{
		state->ts.static_dtree[n].dl.len = 5;
		state->ts.static_dtree[n].fc.code = (USH)bi_reverse(n, 5);
	}

	// Initialize the first block of the first file
	init_block(state);
}





/********************** pqremove() ********************
 * Removes the smallest element from the heap and recreates
 * the heap with one less element. Updates heap and heap_len.
 */

#define pqremove(tree, top) \
{\
	top = state->ts.heap[SMALLEST]; \
	state->ts.heap[SMALLEST] = state->ts.heap[state->ts.heap_len--]; \
	pqdownheap(state, tree, SMALLEST); \
}





/*********************** smaller() *******************
 * Compares two subtrees, using the tree depth as tie
 * breaker when the subtrees have equal frequency. This
 * minimizes the worst case length.
 */

#define smaller(tree, n, m) \
	(tree[n].fc.freq < tree[m].fc.freq || \
	(tree[n].fc.freq == tree[m].fc.freq && state->ts.depth[n] <= state->ts.depth[m]))




/********************** pdownheap() ********************
 * Restores the heap property by moving down the tree
 * starting at node k, exchanging a node with the smallest
 * of its two sons if necessary, stopping when the heap
 * property is re-established (each father smaller than its
 * two sons).
 */

static void pqdownheap(register TSTATE *state, CT_DATA *tree, int k)
{
	int		v = state->ts.heap[k];
	int		j = k << 1;  // left son of k
	int		htemp;       // required because of bug in SASC compiler

	while (j <= state->ts.heap_len)
	{
		// Set j to the smallest of the two sons:
		if (j < state->ts.heap_len && smaller(tree, state->ts.heap[j+1], state->ts.heap[j])) j++;

		// Exit if v is smaller than both sons
		htemp = state->ts.heap[j];
		if (smaller(tree, v, htemp)) break;

		// Exchange v with the smallest son
		state->ts.heap[k] = htemp;
		k = j;

		// And continue down the tree, setting j to the left son of k
		j <<= 1;
	}
	state->ts.heap[k] = v;
}





/********************** gen_bitlen() ********************
 * Computes the optimal bit lengths for a tree and updates
 * the total bit length for the current block.
 *
 * IN assertion: the fields freq and dad are set, heap[heap_max] and
 *    above are the tree nodes sorted by increasing frequency.
 * OUT assertions: the field len is set to the optimal bit length, the
 *     array bl_count contains the frequencies for each bit length.
 *     The length opt_len is updated; static_len is also updated if stree is
 *     not null.
 */
static void gen_bitlen(register TSTATE *state, TREE_DESC *desc)
{
	CT_DATA		*tree		= desc->dyn_tree;
	const int	*extra		= desc->extra_bits;
	int			base		= desc->extra_base;
	int			max_code	= desc->max_code;
	int			max_length	= desc->max_length;
	CT_DATA		*stree		= desc->static_tree;
	int			h;              // heap index
	int			n, m;           // iterate over the tree elements
	int			bits;           // bit length
	int			xbits;          // extra bits
	USH			f;              // frequency
	int			overflow = 0;   // number of elements with bit length too large

	for (bits = 0; bits <= MAX_BITS; bits++) state->ts.bl_count[bits] = 0;

	// In a first pass, compute the optimal bit lengths (which may
	// overflow in the case of the bit length tree).
	tree[state->ts.heap[state->ts.heap_max]].dl.len = 0; // root of the heap

	for (h = state->ts.heap_max + 1; h < HEAP_SIZE; h++)
	{
		n = state->ts.heap[h];
		bits = tree[tree[n].dl.dad].dl.len + 1;
		if (bits > max_length)
		{	
			bits = max_length;
			++overflow;
		}
		tree[n].dl.len = (USH)bits;
		// We overwrite tree[n].dl.dad which is no longer needed

		if (n > max_code) continue; // not a leaf node

		state->ts.bl_count[bits]++;
		xbits = 0;
		if (n >= base) xbits = extra[n-base];
		f = tree[n].fc.freq;
		state->ts.opt_len += (ULG)f * (bits + xbits);
		if (stree) state->ts.static_len += (ULG)f * (stree[n].dl.len + xbits);
	}
	if (overflow == 0) return;

#ifndef NDEBUG
	Trace("\nbit length overflow\n");
#endif
	// This happens for example on obj2 and pic of the Calgary corpus

	// Find the first bit length which could increase:
	do
	{
		bits = max_length - 1;
		while (state->ts.bl_count[bits] == 0) bits--;
		state->ts.bl_count[bits]--;           // move one leaf down the tree
		state->ts.bl_count[bits+1] += (USH)2; // move one overflow item as its brother
		state->ts.bl_count[max_length]--;
		// The brother of the overflow item also moves one step up,
		// but this does not affect bl_count[max_length]
		overflow -= 2;
	} while (overflow > 0);

	// Now recompute all bit lengths, scanning in increasing frequency.
	// h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
	// lengths instead of fixing only the wrong ones. This idea is taken
	// from 'ar' written by Haruhiko Okumura.)
	for (bits = max_length; bits != 0; bits--)
	{
		n = state->ts.bl_count[bits];
		while (n != 0)
		{
			m = state->ts.heap[--h];
			if (m > max_code) continue;
			if (tree[m].dl.len != (USH)bits)
			{
#ifndef NDEBUG
				Trace("code %d bits %d->%d\n", m, tree[m].dl.len, bits);
#endif
				state->ts.opt_len += ((long)bits-(long)tree[m].dl.len)*(long)tree[m].fc.freq;
				tree[m].dl.len = (USH)bits;
			}
			--n;
		}
	}
}





/************************ gen_codes() *******************
 * Generates the codes for a given tree and bit counts
 * (which need not be optimal).
 *
 * IN assertion: the array bl_count contains the bit length statistics for
 * the given tree and the field len is set for all tree elements.
 *
 * OUT assertion: the field code is set for all tree elements of non
 * zero code length.
 */
static void gen_codes(register TSTATE *state, CT_DATA *tree, int max_code)
{
	USH				next_code[MAX_BITS+1];	// next code value for each bit length
	{
	register DWORD	bits;
	USH				code;

	// The distribution counts are first used to generate the code values
	// without bit reversal
	code = 0;
	for (bits = 1; bits <= MAX_BITS; bits++)
		next_code[bits] = code = (USH)((code + state->ts.bl_count[bits - 1]) << 1);

	// Check that the bit counts in bl_count are consistent. The last code
	// must be all ones
#ifndef NDEBUG
	Assert(state, code + state->ts.bl_count[MAX_BITS]-1 == (1<< ((USH) MAX_BITS)) - 1, "inconsistent bit counts");
	Trace("\ngen_codes: max_code %d ", max_code);
#endif
	}

	{
	int		n;

	for (n = 0;  n <= max_code; n++)
	{
		register DWORD len;

		// Reverse the bits
		if ((len = tree[n].dl.len)) tree[n].fc.code = (USH)bi_reverse(next_code[len]++, (unsigned char)len);
	}
	}
}





/************************** build_tree() **********************
 * Constructs one Huffman tree and assigns the code bit strings
 * and lengths. Updates the total bit length for the current block.
 *
 * RETURNS: The fields len and code are set to the optimal bit length
 * and corresponding code. The length opt_len is updated; static_len is
 * also updated if stree is not null. The field max_code is set.
 *
 * NOTE: Before calling here, the "freq" field of all tree
 * elements must be set to an appropriate value.
 */

static void build_tree(register TSTATE *state, TREE_DESC *desc)
{
	CT_DATA		*tree = desc->dyn_tree;
	CT_DATA		*stree = desc->static_tree;
	int			elems = desc->elems;
	int			n, m;				// iterate over heap elements
	int			max_code = -1;		// largest code with non zero frequency
	int			node = elems;		// next internal node of the tree

	// Construct the initial heap, with least frequent element in
	// heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
	// heap[0] is not used.
	state->ts.heap_len = 0;
	state->ts.heap_max = HEAP_SIZE;

	for (n = 0; n < elems; n++)
	{
		if (tree[n].fc.freq)
		{
			state->ts.heap[++state->ts.heap_len] = max_code = n;
			state->ts.depth[n] = 0;
		}
		else
			tree[n].dl.len = 0;
	}

	// The pkzip format requires that at least one distance code exists,
	// and that at least one bit should be sent even if there is only one
	// possible code. So to avoid special checks later on we force at least
	// two codes of non zero frequency
	while (state->ts.heap_len < 2)
	{
		int newcp = state->ts.heap[++state->ts.heap_len] = (max_code < 2 ? ++max_code : 0);
		tree[newcp].fc.freq = 1;
		state->ts.depth[newcp] = 0;
		state->ts.opt_len--;
		if (stree) state->ts.static_len -= stree[newcp].dl.len;
		// new is 0 or 1 so it does not have extra bits
	}
	desc->max_code = max_code;

	// The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
	// establish sub-heaps of increasing lengths:
	for (n = state->ts.heap_len/2; n >= 1; n--) pqdownheap(state,tree, n);

	// Construct the Huffman tree by repeatedly combining the least two
	// frequent nodes
	do
	{
		pqremove(tree, n);						// n = node of least frequency
		m = state->ts.heap[SMALLEST];			// m = node of next least frequency

		state->ts.heap[--state->ts.heap_max] = n; // keep the nodes sorted by frequency
		state->ts.heap[--state->ts.heap_max] = m;

		// Create a new node father of n and m
		tree[node].fc.freq = (USH)(tree[n].fc.freq + tree[m].fc.freq);
		state->ts.depth[node] = (UCH) (Max(state->ts.depth[n], state->ts.depth[m]) + 1);
		tree[n].dl.dad = tree[m].dl.dad = (USH)node;
		// and insert the new node in the heap
		state->ts.heap[SMALLEST] = node++;
		pqdownheap(state,tree, SMALLEST);

	} while (state->ts.heap_len >= 2);

	state->ts.heap[--state->ts.heap_max] = state->ts.heap[SMALLEST];

	// At this point, the fields freq and dad are set. We can now
	// generate the bit lengths.
	gen_bitlen(state, desc);

	// The field len is now set, we can generate the bit codes
	gen_codes(state, tree, max_code);
}





/************************** scan_tree() **********************
 * Scans a literal or distance tree to determine the frequencies
 * of the codes in the bit length tree. Updates opt_len to take
 * into account the repeat counts. (The contribution of the bit
 * length codes will be added later during the construction of
 * bl_tree.)
 */

static void scan_tree(register TSTATE *state, CT_DATA *tree, int max_code)

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?