📄 trees.c
字号:
/* Copyright (c) 1990-2006 Info-ZIP. All rights reserved. See the accompanying file LICENSE, version 2005-Feb-10 or later (the contents of which are also included in zip.h) for terms of use. If, for some reason, all these files are missing, the Info-ZIP license also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html*//* * trees.c by Jean-loup Gailly * * This is a new version of im_ctree.c originally written by Richard B. Wales * for the defunct implosion method. * The low level bit string handling routines from bits.c (originally * im_bits.c written by Richard B. Wales) have been merged into this version * of trees.c. * * PURPOSE * * Encode various sets of source values using variable-length * binary code trees. * Output the resulting variable-length bit strings. * Compression can be done to a file or to memory. * * DISCUSSION * * The PKZIP "deflation" process uses several Huffman trees. The more * common source values are represented by shorter bit sequences. * * Each code tree is stored in the ZIP file in a compressed form * which is itself a Huffman encoding of the lengths of * all the code strings (in ascending order by source values). * The actual code strings are reconstructed from the lengths in * the UNZIP process, as described in the "application note" * (APPNOTE.TXT) distributed as part of PKWARE's PKZIP program. * * The PKZIP "deflate" file format interprets compressed file data * as a sequence of bits. Multi-bit strings in the file may cross * byte boundaries without restriction. * The first bit of each byte is the low-order bit. * * The routines in this file allow a variable-length bit value to * be output right-to-left (useful for literal values). For * left-to-right output (useful for code strings from the tree routines), * the bits must have been reversed first with bi_reverse(). * * For in-memory compression, the compressed bit stream goes directly * into the requested output buffer. The buffer is limited to 64K on * 16 bit machines; flushing of the output buffer during compression * process is not supported. * The input data is read in blocks by the (*read_buf)() function. * * For more details about input to and output from the deflation routines, * see the actual input functions for (*read_buf)(), flush_outbuf(), and * the filecompress() resp. memcompress() wrapper functions which handle * the I/O setup. * * REFERENCES * * Lynch, Thomas J. * Data Compression: Techniques and Applications, pp. 53-55. * Lifetime Learning Publications, 1985. ISBN 0-534-03418-7. * * Storer, James A. * Data Compression: Methods and Theory, pp. 49-50. * Computer Science Press, 1988. ISBN 0-7167-8156-5. * * Sedgewick, R. * Algorithms, p290. * Addison-Wesley, 1983. ISBN 0-201-06672-6. * * INTERFACE * * void ct_init (ush *attr, int *method) * Allocate the match buffer, initialize the various tables and save * the location of the internal file attribute (ascii/binary) and * method (DEFLATE/STORE) * * void ct_tally (int dist, int lc); * Save the match info and tally the frequency counts. * * ulg flush_block (char *buf, ulg stored_len, int eof) * Determine the best encoding for the current block: dynamic trees, * static trees or store, and output the encoded block to the zip * file. Returns the total compressed length for the file so far. * * void bi_init (char *tgt_buf, unsigned tgt_size, int flsh_allowed) * Initialize the bit string routines. * * Most of the bit string output functions are only used internally * in this source file, they are normally declared as "local" routines: * * local void send_bits (int value, int length) * Write out a bit string, taking the source bits right to * left. * * local unsigned bi_reverse (unsigned code, int len) * Reverse the bits of a bit string, taking the source bits left to * right and emitting them right to left. * * local void bi_windup (void) * Write out any remaining bits in an incomplete byte. * * local void copy_block(char *buf, unsigned len, int header) * Copy a stored block to the zip file, storing first the length and * its one's complement if requested. * * All output that exceeds the bitstring output buffer size (as initialized * by bi_init() is fed through an externally provided transfer routine * which flushes the bitstring output buffer on request and resets the * buffer fill counter: * * extern void flush_outbuf(char *o_buf, unsigned *o_idx); * */#define __TREES_C#include "zip.h"#include <ctype.h>#ifndef USE_ZLIB/* =========================================================================== * Constants */#define MAX_BITS 15/* All codes must not exceed MAX_BITS bits */#define MAX_BL_BITS 7/* Bit length codes must not exceed MAX_BL_BITS bits */#define LENGTH_CODES 29/* number of length codes, not counting the special END_BLOCK code */#define LITERALS 256/* number of literal bytes 0..255 */#define END_BLOCK 256/* end of block literal code */#define L_CODES (LITERALS+1+LENGTH_CODES)/* number of Literal or Length codes, including the END_BLOCK code */#define D_CODES 30/* number of distance codes */#define BL_CODES 19/* number of codes used to transfer the bit lengths */local int near extra_lbits[LENGTH_CODES] /* extra bits for each length code */ = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};local int near extra_dbits[D_CODES] /* extra bits for each distance code */ = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};local int near extra_blbits[BL_CODES]/* extra bits for each bit length code */ = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};#define STORED_BLOCK 0#define STATIC_TREES 1#define DYN_TREES 2/* The three kinds of block type */#ifndef LIT_BUFSIZE# ifdef SMALL_MEM# define LIT_BUFSIZE 0x2000# else# ifdef MEDIUM_MEM# define LIT_BUFSIZE 0x4000# else# define LIT_BUFSIZE 0x8000# endif# endif#endif#define DIST_BUFSIZE LIT_BUFSIZE/* Sizes of match buffers for literals/lengths and distances. There are * 4 reasons for limiting LIT_BUFSIZE to 64K: * - frequencies can be kept in 16 bit counters * - if compression is not successful for the first block, all input data is * still in the window so we can still emit a stored block even when input * comes from standard input. (This can also be done for all blocks if * LIT_BUFSIZE is not greater than 32K.) * - if compression is not successful for a file smaller than 64K, we can * even emit a stored file instead of a stored block (saving 5 bytes). * - creating new Huffman trees less frequently may not provide fast * adaptation to changes in the input data statistics. (Take for * example a binary file with poorly compressible code followed by * a highly compressible string table.) Smaller buffer sizes give * fast adaptation but have of course the overhead of transmitting trees * more frequently. * - I can't count above 4 * The current code is general and allows DIST_BUFSIZE < LIT_BUFSIZE (to save * memory at the expense of compression). Some optimizations would be possible * if we rely on DIST_BUFSIZE == LIT_BUFSIZE. */#define REP_3_6 16/* repeat previous bit length 3-6 times (2 bits of repeat count) */#define REPZ_3_10 17/* repeat a zero length 3-10 times (3 bits of repeat count) */#define REPZ_11_138 18/* repeat a zero length 11-138 times (7 bits of repeat count) *//* =========================================================================== * Local data *//* Data structure describing a single value and its code string. */typedef struct ct_data { union { ush freq; /* frequency count */ ush code; /* bit string */ } fc; union { ush dad; /* father node in Huffman tree */ ush len; /* length of bit string */ } dl;} ct_data;#define Freq fc.freq#define Code fc.code#define Dad dl.dad#define Len dl.len#define HEAP_SIZE (2*L_CODES+1)/* maximum heap size */local ct_data near dyn_ltree[HEAP_SIZE]; /* literal and length tree */local ct_data near dyn_dtree[2*D_CODES+1]; /* distance tree */local ct_data near static_ltree[L_CODES+2];/* The static literal tree. Since the bit lengths are imposed, there is no * need for the L_CODES extra codes used during heap construction. However * The codes 286 and 287 are needed to build a canonical tree (see ct_init * below). */local ct_data near static_dtree[D_CODES];/* The static distance tree. (Actually a trivial tree since all codes use * 5 bits.) */local ct_data near bl_tree[2*BL_CODES+1];/* Huffman tree for the bit lengths */typedef struct tree_desc { ct_data near *dyn_tree; /* the dynamic tree */ ct_data near *static_tree; /* corresponding static tree or NULL */ int near *extra_bits; /* extra bits for each code or NULL */ int extra_base; /* base index for extra_bits */ int elems; /* max number of elements in the tree */ int max_length; /* max bit length for the codes */ int max_code; /* largest code with non zero frequency */} tree_desc;local tree_desc near l_desc ={dyn_ltree, static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS, 0};local tree_desc near d_desc ={dyn_dtree, static_dtree, extra_dbits, 0, D_CODES, MAX_BITS, 0};local tree_desc near bl_desc ={bl_tree, NULL, extra_blbits, 0, BL_CODES, MAX_BL_BITS, 0};local ush near bl_count[MAX_BITS+1];/* number of codes at each bit length for an optimal tree */local uch near bl_order[BL_CODES] = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};/* The lengths of the bit length codes are sent in order of decreasing * probability, to avoid transmitting the lengths for unused bit length codes. */local int near heap[2*L_CODES+1]; /* heap used to build the Huffman trees */local int heap_len; /* number of elements in the heap */local int heap_max; /* element of largest frequency *//* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. * The same heap array is used to build all trees. */local uch near depth[2*L_CODES+1];/* Depth of each subtree used as tie breaker for trees of equal frequency */local uch length_code[MAX_MATCH-MIN_MATCH+1];/* length code for each normalized match length (0 == MIN_MATCH) */local uch dist_code[512];/* distance codes. The first 256 values correspond to the distances * 3 .. 258, the last 256 values correspond to the top 8 bits of * the 15 bit distances. */local int near base_length[LENGTH_CODES];/* First normalized length for each code (0 = MIN_MATCH) */local int near base_dist[D_CODES];/* First normalized distance for each code (0 = distance of 1) */#ifndef DYN_ALLOC local uch far l_buf[LIT_BUFSIZE]; /* buffer for literals/lengths */ local ush far d_buf[DIST_BUFSIZE]; /* buffer for distances */#else local uch far *l_buf; local ush far *d_buf;#endiflocal uch near flag_buf[(LIT_BUFSIZE/8)];/* flag_buf is a bit array distinguishing literals from lengths in * l_buf, and thus indicating the presence or absence of a distance. */local unsigned last_lit; /* running index in l_buf */local unsigned last_dist; /* running index in d_buf */local unsigned last_flags; /* running index in flag_buf */local uch flags; /* current flags not yet saved in flag_buf */local uch flag_bit; /* current bit used in flags *//* bits are filled in flags starting at bit 0 (least significant). * Note: these flags are overkill in the current code since we don't * take advantage of DIST_BUFSIZE == LIT_BUFSIZE. */local ulg opt_len; /* bit length of current block with optimal trees */local ulg static_len; /* bit length of current block with static trees */local ulg cmpr_bytelen; /* total byte length of compressed file */local ulg cmpr_len_bits; /* number of bits past 'cmpr_bytelen' */#ifdef DEBUGlocal ulg input_len; /* total byte length of input file *//* input_len is for debugging only since we can get it by other means. */#endiflocal ush *file_type; /* pointer to UNKNOWN, BINARY or ASCII */local int *file_method; /* pointer to DEFLATE or STORE *//* =========================================================================== * Local data used by the "bit string" routines. */local int flush_flg;#if (!defined(ASMV) || !defined(RISCOS))local unsigned bi_buf;#elseunsigned bi_buf;#endif/* Output buffer. bits are inserted starting at the bottom (least significant * bits). The width of bi_buf must be at least 16 bits. */#define Buf_size (8 * 2*sizeof(char))/* Number of bits used within bi_buf. (bi_buf may be implemented on * more than 16 bits on some systems.) */#if (!defined(ASMV) || !defined(RISCOS))local int bi_valid;#elseint bi_valid;#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -