litezip.c
来自「是一个C语言的压缩和解压缩的DLL源程序.」· C语言 代码 · 共 1,988 行 · 第 1/5 页
C
1,988 行
/************************ ct_init() ******************
* Allocates the match buffer, initializes the various
* tables and saves the location of the internal file
* attribute (ascii/binary) and method (DEFLATE/STORE).
*/
static void ct_init(register TSTATE *state, USH *attr)
{
int n; // iterates over tree elements
int bits; // bit counter
int length; // length value
int code; // code value
int dist; // distance index
state->ts.file_type = attr;
state->ts.cmpr_bytelen = state->ts.cmpr_len_bits = 0;
#ifndef NDEBUG
state->ts.input_len = 0;
#endif
if (state->ts.static_dtree[0].dl.len) return; // ct_init already called
// Initialize the mapping length (0..255) -> length code (0..28)
length = 0;
for (code = 0; code < LENGTH_CODES - 1; code++)
{
state->ts.base_length[code] = length;
for (n = 0; n < (1 << Extra_lbits[code]); n++)
state->ts.length_code[length++] = (UCH)code;
}
// Note that the length 255 (match length 258) can be represented
// in two different ways: code 284 + 5 bits or code 285, so we
// overwrite length_code[255] to use the best encoding:
state->ts.length_code[length - 1] = (UCH)code;
// Initialize the mapping dist (0..32K) -> dist code (0..29)
dist = 0;
for (code = 0 ; code < 16; code++)
{
state->ts.base_dist[code] = dist;
for (n = 0; n < (1 << Extra_dbits[code]); n++)
state->ts.dist_code[dist++] = (UCH)code;
}
dist >>= 7; // from now on, all distances are divided by 128
for ( ; code < D_CODES; code++)
{
state->ts.base_dist[code] = dist << 7;
for (n = 0; n < (1 << (Extra_dbits[code] - 7)); n++)
state->ts.dist_code[256 + dist++] = (UCH)code;
}
// Construct the codes of the static literal tree
for (bits = 0; bits <= MAX_BITS; bits++) state->ts.bl_count[bits] = 0;
n = 0;
while (n <= 143) state->ts.static_ltree[n++].dl.len = 8, state->ts.bl_count[8]++;
while (n <= 255) state->ts.static_ltree[n++].dl.len = 9, state->ts.bl_count[9]++;
while (n <= 279) state->ts.static_ltree[n++].dl.len = 7, state->ts.bl_count[7]++;
while (n <= 287) state->ts.static_ltree[n++].dl.len = 8, state->ts.bl_count[8]++;
// fc.codes 286 and 287 do not exist, but we must include them in the
// tree construction to get a canonical Huffman tree (longest code
// all ones)
gen_codes(state, (CT_DATA *)state->ts.static_ltree, L_CODES+1);
// The static distance tree is trivial
for (n = 0; n < D_CODES; n++)
{
state->ts.static_dtree[n].dl.len = 5;
state->ts.static_dtree[n].fc.code = (USH)bi_reverse(n, 5);
}
// Initialize the first block of the first file
init_block(state);
}
/********************** pqremove() ********************
* Removes the smallest element from the heap and recreates
* the heap with one less element. Updates heap and heap_len.
*/
#define pqremove(tree, top) \
{\
top = state->ts.heap[SMALLEST]; \
state->ts.heap[SMALLEST] = state->ts.heap[state->ts.heap_len--]; \
pqdownheap(state, tree, SMALLEST); \
}
/*********************** smaller() *******************
* Compares two subtrees, using the tree depth as tie
* breaker when the subtrees have equal frequency. This
* minimizes the worst case length.
*/
#define smaller(tree, n, m) \
(tree[n].fc.freq < tree[m].fc.freq || \
(tree[n].fc.freq == tree[m].fc.freq && state->ts.depth[n] <= state->ts.depth[m]))
/********************** pdownheap() ********************
* Restores the heap property by moving down the tree
* starting at node k, exchanging a node with the smallest
* of its two sons if necessary, stopping when the heap
* property is re-established (each father smaller than its
* two sons).
*/
static void pqdownheap(register TSTATE *state, CT_DATA *tree, int k)
{
int v = state->ts.heap[k];
int j = k << 1; // left son of k
int htemp; // required because of bug in SASC compiler
while (j <= state->ts.heap_len)
{
// Set j to the smallest of the two sons:
if (j < state->ts.heap_len && smaller(tree, state->ts.heap[j+1], state->ts.heap[j])) j++;
// Exit if v is smaller than both sons
htemp = state->ts.heap[j];
if (smaller(tree, v, htemp)) break;
// Exchange v with the smallest son
state->ts.heap[k] = htemp;
k = j;
// And continue down the tree, setting j to the left son of k
j <<= 1;
}
state->ts.heap[k] = v;
}
/********************** gen_bitlen() ********************
* Computes the optimal bit lengths for a tree and updates
* the total bit length for the current block.
*
* IN assertion: the fields freq and dad are set, heap[heap_max] and
* above are the tree nodes sorted by increasing frequency.
* OUT assertions: the field len is set to the optimal bit length, the
* array bl_count contains the frequencies for each bit length.
* The length opt_len is updated; static_len is also updated if stree is
* not null.
*/
static void gen_bitlen(register TSTATE *state, TREE_DESC *desc)
{
CT_DATA *tree = desc->dyn_tree;
const int *extra = desc->extra_bits;
int base = desc->extra_base;
int max_code = desc->max_code;
int max_length = desc->max_length;
CT_DATA *stree = desc->static_tree;
int h; // heap index
int n, m; // iterate over the tree elements
int bits; // bit length
int xbits; // extra bits
USH f; // frequency
int overflow = 0; // number of elements with bit length too large
for (bits = 0; bits <= MAX_BITS; bits++) state->ts.bl_count[bits] = 0;
// In a first pass, compute the optimal bit lengths (which may
// overflow in the case of the bit length tree).
tree[state->ts.heap[state->ts.heap_max]].dl.len = 0; // root of the heap
for (h = state->ts.heap_max + 1; h < HEAP_SIZE; h++)
{
n = state->ts.heap[h];
bits = tree[tree[n].dl.dad].dl.len + 1;
if (bits > max_length)
{
bits = max_length;
++overflow;
}
tree[n].dl.len = (USH)bits;
// We overwrite tree[n].dl.dad which is no longer needed
if (n > max_code) continue; // not a leaf node
state->ts.bl_count[bits]++;
xbits = 0;
if (n >= base) xbits = extra[n-base];
f = tree[n].fc.freq;
state->ts.opt_len += (ULG)f * (bits + xbits);
if (stree) state->ts.static_len += (ULG)f * (stree[n].dl.len + xbits);
}
if (overflow == 0) return;
#ifndef NDEBUG
Trace("\nbit length overflow\n");
#endif
// This happens for example on obj2 and pic of the Calgary corpus
// Find the first bit length which could increase:
do
{
bits = max_length - 1;
while (state->ts.bl_count[bits] == 0) bits--;
state->ts.bl_count[bits]--; // move one leaf down the tree
state->ts.bl_count[bits+1] += (USH)2; // move one overflow item as its brother
state->ts.bl_count[max_length]--;
// The brother of the overflow item also moves one step up,
// but this does not affect bl_count[max_length]
overflow -= 2;
} while (overflow > 0);
// Now recompute all bit lengths, scanning in increasing frequency.
// h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
// lengths instead of fixing only the wrong ones. This idea is taken
// from 'ar' written by Haruhiko Okumura.)
for (bits = max_length; bits != 0; bits--)
{
n = state->ts.bl_count[bits];
while (n != 0)
{
m = state->ts.heap[--h];
if (m > max_code) continue;
if (tree[m].dl.len != (USH)bits)
{
#ifndef NDEBUG
Trace("code %d bits %d->%d\n", m, tree[m].dl.len, bits);
#endif
state->ts.opt_len += ((long)bits-(long)tree[m].dl.len)*(long)tree[m].fc.freq;
tree[m].dl.len = (USH)bits;
}
--n;
}
}
}
/************************ gen_codes() *******************
* Generates the codes for a given tree and bit counts
* (which need not be optimal).
*
* IN assertion: the array bl_count contains the bit length statistics for
* the given tree and the field len is set for all tree elements.
*
* OUT assertion: the field code is set for all tree elements of non
* zero code length.
*/
static void gen_codes(register TSTATE *state, CT_DATA *tree, int max_code)
{
USH next_code[MAX_BITS+1]; // next code value for each bit length
{
register DWORD bits;
USH code;
// The distribution counts are first used to generate the code values
// without bit reversal
code = 0;
for (bits = 1; bits <= MAX_BITS; bits++)
next_code[bits] = code = (USH)((code + state->ts.bl_count[bits - 1]) << 1);
// Check that the bit counts in bl_count are consistent. The last code
// must be all ones
#ifndef NDEBUG
Assert(state, code + state->ts.bl_count[MAX_BITS]-1 == (1<< ((USH) MAX_BITS)) - 1, "inconsistent bit counts");
Trace("\ngen_codes: max_code %d ", max_code);
#endif
}
{
int n;
for (n = 0; n <= max_code; n++)
{
register DWORD len;
// Reverse the bits
if ((len = tree[n].dl.len)) tree[n].fc.code = (USH)bi_reverse(next_code[len]++, (unsigned char)len);
}
}
}
/************************** build_tree() **********************
* Constructs one Huffman tree and assigns the code bit strings
* and lengths. Updates the total bit length for the current block.
*
* RETURNS: The fields len and code are set to the optimal bit length
* and corresponding code. The length opt_len is updated; static_len is
* also updated if stree is not null. The field max_code is set.
*
* NOTE: Before calling here, the "freq" field of all tree
* elements must be set to an appropriate value.
*/
static void build_tree(register TSTATE *state, TREE_DESC *desc)
{
CT_DATA *tree = desc->dyn_tree;
CT_DATA *stree = desc->static_tree;
int elems = desc->elems;
int n, m; // iterate over heap elements
int max_code = -1; // largest code with non zero frequency
int node = elems; // next internal node of the tree
// Construct the initial heap, with least frequent element in
// heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
// heap[0] is not used.
state->ts.heap_len = 0;
state->ts.heap_max = HEAP_SIZE;
for (n = 0; n < elems; n++)
{
if (tree[n].fc.freq)
{
state->ts.heap[++state->ts.heap_len] = max_code = n;
state->ts.depth[n] = 0;
}
else
tree[n].dl.len = 0;
}
// The pkzip format requires that at least one distance code exists,
// and that at least one bit should be sent even if there is only one
// possible code. So to avoid special checks later on we force at least
// two codes of non zero frequency
while (state->ts.heap_len < 2)
{
int newcp = state->ts.heap[++state->ts.heap_len] = (max_code < 2 ? ++max_code : 0);
tree[newcp].fc.freq = 1;
state->ts.depth[newcp] = 0;
state->ts.opt_len--;
if (stree) state->ts.static_len -= stree[newcp].dl.len;
// new is 0 or 1 so it does not have extra bits
}
desc->max_code = max_code;
// The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
// establish sub-heaps of increasing lengths:
for (n = state->ts.heap_len/2; n >= 1; n--) pqdownheap(state,tree, n);
// Construct the Huffman tree by repeatedly combining the least two
// frequent nodes
do
{
pqremove(tree, n); // n = node of least frequency
m = state->ts.heap[SMALLEST]; // m = node of next least frequency
state->ts.heap[--state->ts.heap_max] = n; // keep the nodes sorted by frequency
state->ts.heap[--state->ts.heap_max] = m;
// Create a new node father of n and m
tree[node].fc.freq = (USH)(tree[n].fc.freq + tree[m].fc.freq);
state->ts.depth[node] = (UCH) (Max(state->ts.depth[n], state->ts.depth[m]) + 1);
tree[n].dl.dad = tree[m].dl.dad = (USH)node;
// and insert the new node in the heap
state->ts.heap[SMALLEST] = node++;
pqdownheap(state,tree, SMALLEST);
} while (state->ts.heap_len >= 2);
state->ts.heap[--state->ts.heap_max] = state->ts.heap[SMALLEST];
// At this point, the fields freq and dad are set. We can now
// generate the bit lengths.
gen_bitlen(state, desc);
// The field len is now set, we can generate the bit codes
gen_codes(state, tree, max_code);
}
/************************** scan_tree() **********************
* Scans a literal or distance tree to determine the frequencies
* of the codes in the bit length tree. Updates opt_len to take
* into account the repeat counts. (The contribution of the bit
* length codes will be added later during the construction of
* bl_tree.)
*/
static void scan_tree(register TSTATE *state, CT_DATA *tree, int max_code)
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?