📄 trees.c
字号:
/* Copyright (C) 1990-1993 Mark Adler, Richard B. Wales, Jean-loup Gailly, Kai Uwe Rommel and Igor Mandrichenko. Permission is granted to any individual or institution to use, copy, or redistribute this software so long as all of the original files are included, that it is not sold for profit, and that this copyright notice is retained.*//* * trees.c by Jean-loup Gailly * * This is a new version of im_ctree.c originally written by Richard B. Wales * for the defunct implosion method. * * PURPOSE * * Encode various sets of source values using variable-length * binary code trees. * * DISCUSSION * * The PKZIP "deflation" process uses several Huffman trees. The more * common source values are represented by shorter bit sequences. * * Each code tree is stored in the ZIP file in a compressed form * which is itself a Huffman encoding of the lengths of * all the code strings (in ascending order by source values). * The actual code strings are reconstructed from the lengths in * the UNZIP process, as described in the "application note" * (APPNOTE.TXT) distributed as part of PKWARE's PKZIP program. * * REFERENCES * * Lynch, Thomas J. * Data Compression: Techniques and Applications, pp. 53-55. * Lifetime Learning Publications, 1985. ISBN 0-534-03418-7. * * Storer, James A. * Data Compression: Methods and Theory, pp. 49-50. * Computer Science Press, 1988. ISBN 0-7167-8156-5. * * Sedgewick, R. * Algorithms, p290. * Addison-Wesley, 1983. ISBN 0-201-06672-6. * * INTERFACE * * void ct_init (ush *attr, int *method) * Allocate the match buffer, initialize the various tables and save * the location of the internal file attribute (ascii/binary) and * method (DEFLATE/STORE) * * void ct_tally (int dist, int lc); * Save the match info and tally the frequency counts. * * long flush_block (char *buf, ulg stored_len, int eof) * Determine the best encoding for the current block: dynamic trees, * static trees or store, and output the encoded block to the zip * file. Returns the total compressed length for the file so far. * */#include <ctype.h>#include "zip.h"/* =========================================================================== * Constants */#define MAX_BITS 15/* All codes must not exceed MAX_BITS bits */#define MAX_BL_BITS 7/* Bit length codes must not exceed MAX_BL_BITS bits */#define LENGTH_CODES 29/* number of length codes, not counting the special END_BLOCK code */#define LITERALS 256/* number of literal bytes 0..255 */#define END_BLOCK 256/* end of block literal code */#define L_CODES (LITERALS+1+LENGTH_CODES)/* number of Literal or Length codes, including the END_BLOCK code */#define D_CODES 30/* number of distance codes */#define BL_CODES 19/* number of codes used to transfer the bit lengths */local int near extra_lbits[LENGTH_CODES] /* extra bits for each length code */ = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};local int near extra_dbits[D_CODES] /* extra bits for each distance code */ = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};local int near extra_blbits[BL_CODES]/* extra bits for each bit length code */ = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};#define STORED_BLOCK 0#define STATIC_TREES 1#define DYN_TREES 2/* The three kinds of block type */#ifndef LIT_BUFSIZE# ifdef SMALL_MEM# define LIT_BUFSIZE 0x2000# else# ifdef MEDIUM_MEM# define LIT_BUFSIZE 0x4000# else# define LIT_BUFSIZE 0x8000# endif# endif#endif#define DIST_BUFSIZE LIT_BUFSIZE/* Sizes of match buffers for literals/lengths and distances. There are * 4 reasons for limiting LIT_BUFSIZE to 64K: * - frequencies can be kept in 16 bit counters * - if compression is not successful for the first block, all input data is * still in the window so we can still emit a stored block even when input * comes from standard input. (This can also be done for all blocks if * LIT_BUFSIZE is not greater than 32K.) * - if compression is not successful for a file smaller than 64K, we can * even emit a stored file instead of a stored block (saving 5 bytes). * - creating new Huffman trees less frequently may not provide fast * adaptation to changes in the input data statistics. (Take for * example a binary file with poorly compressible code followed by * a highly compressible string table.) Smaller buffer sizes give * fast adaptation but have of course the overhead of transmitting trees * more frequently. * - I can't count above 4 * The current code is general and allows DIST_BUFSIZE < LIT_BUFSIZE (to save * memory at the expense of compression). Some optimizations would be possible * if we rely on DIST_BUFSIZE == LIT_BUFSIZE. */#define REP_3_6 16/* repeat previous bit length 3-6 times (2 bits of repeat count) */#define REPZ_3_10 17/* repeat a zero length 3-10 times (3 bits of repeat count) */#define REPZ_11_138 18/* repeat a zero length 11-138 times (7 bits of repeat count) *//* =========================================================================== * Local data *//* Data structure describing a single value and its code string. */typedef struct ct_data { union { ush freq; /* frequency count */ ush code; /* bit string */ } fc; union { ush dad; /* father node in Huffman tree */ ush len; /* length of bit string */ } dl;} ct_data;#define Freq fc.freq#define Code fc.code#define Dad dl.dad#define Len dl.len#define HEAP_SIZE (2*L_CODES+1)/* maximum heap size */local ct_data near dyn_ltree[HEAP_SIZE]; /* literal and length tree */local ct_data near dyn_dtree[2*D_CODES+1]; /* distance tree */local ct_data near static_ltree[L_CODES+2];/* The static literal tree. Since the bit lengths are imposed, there is no * need for the L_CODES extra codes used during heap construction. However * The codes 286 and 287 are needed to build a canonical tree (see ct_init * below). */local ct_data near static_dtree[D_CODES];/* The static distance tree. (Actually a trivial tree since all codes use * 5 bits.) */local ct_data near bl_tree[2*BL_CODES+1];/* Huffman tree for the bit lengths */typedef struct tree_desc { ct_data near *dyn_tree; /* the dynamic tree */ ct_data near *static_tree; /* corresponding static tree or NULL */ int near *extra_bits; /* extra bits for each code or NULL */ int extra_base; /* base index for extra_bits */ int elems; /* max number of elements in the tree */ int max_length; /* max bit length for the codes */ int max_code; /* largest code with non zero frequency */} tree_desc;local tree_desc near l_desc ={dyn_ltree, static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS, 0};local tree_desc near d_desc ={dyn_dtree, static_dtree, extra_dbits, 0, D_CODES, MAX_BITS, 0};local tree_desc near bl_desc ={bl_tree, NULL, extra_blbits, 0, BL_CODES, MAX_BL_BITS, 0};local ush near bl_count[MAX_BITS+1];/* number of codes at each bit length for an optimal tree */local uch near bl_order[BL_CODES] = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};/* The lengths of the bit length codes are sent in order of decreasing * probability, to avoid transmitting the lengths for unused bit length codes. */local int near heap[2*L_CODES+1]; /* heap used to build the Huffman trees */local int heap_len; /* number of elements in the heap */local int heap_max; /* element of largest frequency *//* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. * The same heap array is used to build all trees. */local uch near depth[2*L_CODES+1];/* Depth of each subtree used as tie breaker for trees of equal frequency */local uch length_code[MAX_MATCH-MIN_MATCH+1];/* length code for each normalized match length (0 == MIN_MATCH) */local uch dist_code[512];/* distance codes. The first 256 values correspond to the distances * 3 .. 258, the last 256 values correspond to the top 8 bits of * the 15 bit distances. */local int near base_length[LENGTH_CODES];/* First normalized length for each code (0 = MIN_MATCH) */local int near base_dist[D_CODES];/* First normalized distance for each code (0 = distance of 1) */#ifndef DYN_ALLOC local uch far l_buf[LIT_BUFSIZE]; /* buffer for literals/lengths */ local ush far d_buf[DIST_BUFSIZE]; /* buffer for distances */#else local uch far *l_buf; local ush far *d_buf;#endiflocal uch near flag_buf[(LIT_BUFSIZE/8)];/* flag_buf is a bit array distinguishing literals from lengths in * l_buf, and thus indicating the presence or absence of a distance. */local unsigned last_lit; /* running index in l_buf */local unsigned last_dist; /* running index in d_buf */local unsigned last_flags; /* running index in flag_buf */local uch flags; /* current flags not yet saved in flag_buf */local uch flag_bit; /* current bit used in flags *//* bits are filled in flags starting at bit 0 (least significant). * Note: these flags are overkill in the current code since we don't * take advantage of DIST_BUFSIZE == LIT_BUFSIZE. */local ulg opt_len; /* bit length of current block with optimal trees */local ulg static_len; /* bit length of current block with static trees */local ulg compressed_len; /* total bit length of compressed file */local ulg input_len; /* total byte length of input file *//* input_len is for debugging only since we can get it by other means. */ush *file_type; /* pointer to UNKNOWN, BINARY or ASCII */int *file_method; /* pointer to DEFLATE or STORE */#ifdef DEBUGextern ulg bits_sent; /* bit length of the compressed data */extern ulg isize; /* byte length of input file */#endifextern long block_start; /* window offset of current block */extern unsigned near strstart; /* window offset of current string *//* =========================================================================== * Local (static) routines in this file. */local void init_block OF((void));local void pqdownheap OF((ct_data near *tree, int k));local void gen_bitlen OF((tree_desc near *desc));local void gen_codes OF((ct_data near *tree, int max_code));local void build_tree OF((tree_desc near *desc));local void scan_tree OF((ct_data near *tree, int max_code));local void send_tree OF((ct_data near *tree, int max_code));local int build_bl_tree OF((void));local void send_all_trees OF((int lcodes, int dcodes, int blcodes));local void compress_block OF((ct_data near *ltree, ct_data near *dtree));local void set_file_type OF((void));#ifndef DEBUG# define send_code(c, tree) send_bits(tree[c].Code, tree[c].Len) /* Send a code of the given tree. c and tree must not have side effects */#else /* DEBUG */# define send_code(c, tree) \ { if (verbose>1) fprintf(stderr,"\ncd %3d ",(c)); \ send_bits(tree[c].Code, tree[c].Len); }#endif#define d_code(dist) \ ((dist) < 256 ? dist_code[dist] : dist_code[256+((dist)>>7)])/* Mapping from a distance to a distance code. dist is the distance - 1 and * must not have side effects. dist_code[256] and dist_code[257] are never * used. */#define MAX(a,b) (a >= b ? a : b)/* the arguments must not have side effects *//* =========================================================================== * Allocate the match buffer, initialize the various tables and save the * location of the internal file attribute (ascii/binary) and method * (DEFLATE/STORE). */void ct_init(attr, method) ush *attr; /* pointer to internal file attribute */ int *method; /* pointer to compression method */{ int n; /* iterates over tree elements */ int bits; /* bit counter */ int length; /* length value */ int code; /* code value */ int dist; /* distance index */ file_type = attr; file_method = method; compressed_len = input_len = 0L; if (static_dtree[0].Len != 0) return; /* ct_init already called */#ifdef DYN_ALLOC d_buf = (ush far*) fcalloc(DIST_BUFSIZE, sizeof(ush)); l_buf = (uch far*) fcalloc(LIT_BUFSIZE/2, 2); /* Avoid using the value 64K on 16 bit machines */ if (l_buf == NULL || d_buf == NULL) error("ct_init: out of memory");#endif /* Initialize the mapping length (0..255) -> length code (0..28) */ length = 0; for (code = 0; code < LENGTH_CODES-1; code++) { base_length[code] = length; for (n = 0; n < (1<<extra_lbits[code]); n++) { length_code[length++] = (uch)code; } } Assert (length == 256, "ct_init: length != 256"); /* Note that the length 255 (match length 258) can be represented * in two different ways: code 284 + 5 bits or code 285, so we * overwrite length_code[255] to use the best encoding: */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -