⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 trees.c

📁 GZip Compress Souce Code
💻 C
📖 第 1 页 / 共 3 页
字号:
/* trees.c -- output deflated data using Huffman coding * Copyright (C) 1992-1993 Jean-loup Gailly * This is free software; you can redistribute it and/or modify it under the * terms of the GNU General Public License, see the file COPYING. *//* *  PURPOSE * *      Encode various sets of source values using variable-length *      binary code trees. * *  DISCUSSION * *      The PKZIP "deflation" process uses several Huffman trees. The more *      common source values are represented by shorter bit sequences. * *      Each code tree is stored in the ZIP file in a compressed form *      which is itself a Huffman encoding of the lengths of *      all the code strings (in ascending order by source values). *      The actual code strings are reconstructed from the lengths in *      the UNZIP process, as described in the "application note" *      (APPNOTE.TXT) distributed as part of PKWARE's PKZIP program. * *  REFERENCES * *      Lynch, Thomas J. *          Data Compression:  Techniques and Applications, pp. 53-55. *          Lifetime Learning Publications, 1985.  ISBN 0-534-03418-7. * *      Storer, James A. *          Data Compression:  Methods and Theory, pp. 49-50. *          Computer Science Press, 1988.  ISBN 0-7167-8156-5. * *      Sedgewick, R. *          Algorithms, p290. *          Addison-Wesley, 1983. ISBN 0-201-06672-6. * *  INTERFACE * *      void ct_init (ush *attr, int *methodp) *          Allocate the match buffer, initialize the various tables and save *          the location of the internal file attribute (ascii/binary) and *          method (DEFLATE/STORE) * *      void ct_tally (int dist, int lc); *          Save the match info and tally the frequency counts. * *      long flush_block (char *buf, ulg stored_len, int eof) *          Determine the best encoding for the current block: dynamic trees, *          static trees or store, and output the encoded block to the zip *          file. Returns the total compressed length for the file so far. * */#include <ctype.h>#include "tailor.h"#include "gzip.h"#include "bsdebug.h"//#define _DEBUG_TREES_C_#ifdef DBGPrintfi#undef DBGPrintfi#endif#ifdef DBGPrintfo#undef DBGPrintfo#endif#if defined(_TRACEHELPER_DEBUG_ALL_)  #define DBGPrintfi(a) DbgPrintfi a;  #define DBGPrintfo(a) DbgPrintfo a;#elif defined(_DEBUG_TREES_C_) && defined(_TRACEHELPER_DEBUG_EACH_OF_FILE_)  #define DBGPrintfi(a) DbgPrintfi a;  #define DBGPrintfo(a) DbgPrintfo a;#else  #define DBGPrintfi(a)  #define DBGPrintfo(a)#endif#ifdef RCSIDstatic char rcsid[] = "$Id: trees.c,v 0.12 1993/06/10 13:27:54 jloup Exp $";#endif/* =========================================================================== * Constants */#define MAX_BITS 15/* All codes must not exceed MAX_BITS bits */#define MAX_BL_BITS 7/* Bit length codes must not exceed MAX_BL_BITS bits */#define LENGTH_CODES 29/* number of length codes, not counting the special END_BLOCK code */#define LITERALS  256/* number of literal bytes 0..255 */#define END_BLOCK 256/* end of block literal code */#define L_CODES (LITERALS+1+LENGTH_CODES)/* number of Literal or Length codes, including the END_BLOCK code */#define D_CODES   30/* number of distance codes */#define BL_CODES  19/* number of codes used to transfer the bit lengths */local int near extra_lbits[LENGTH_CODES] /* extra bits for each length code */   = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};local int near extra_dbits[D_CODES] /* extra bits for each distance code */   = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};local int near extra_blbits[BL_CODES]/* extra bits for each bit length code */   = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};#define STORED_BLOCK 0#define STATIC_TREES 1#define DYN_TREES    2/* The three kinds of block type */#ifndef LIT_BUFSIZE#  ifdef SMALL_MEM#    define LIT_BUFSIZE  0x2000#  else#  ifdef MEDIUM_MEM//#    define LIT_BUFSIZE  0x4000   #    define LIT_BUFSIZE  0x2000#  else//#    define LIT_BUFSIZE  0x8000   #    define LIT_BUFSIZE  0x2000#  endif#  endif#endif#ifndef DIST_BUFSIZE#  define DIST_BUFSIZE  LIT_BUFSIZE#endif/* Sizes of match buffers for literals/lengths and distances.  There are * 4 reasons for limiting LIT_BUFSIZE to 64K: *   - frequencies can be kept in 16 bit counters *   - if compression is not successful for the first block, all input data is *     still in the window so we can still emit a stored block even when input *     comes from standard input.  (This can also be done for all blocks if *     LIT_BUFSIZE is not greater than 32K.) *   - if compression is not successful for a file smaller than 64K, we can *     even emit a stored file instead of a stored block (saving 5 bytes). *   - creating new Huffman trees less frequently may not provide fast *     adaptation to changes in the input data statistics. (Take for *     example a binary file with poorly compressible code followed by *     a highly compressible string table.) Smaller buffer sizes give *     fast adaptation but have of course the overhead of transmitting trees *     more frequently. *   - I can't count above 4 * The current code is general and allows DIST_BUFSIZE < LIT_BUFSIZE (to save * memory at the expense of compression). Some optimizations would be possible * if we rely on DIST_BUFSIZE == LIT_BUFSIZE. */#if LIT_BUFSIZE > INBUFSIZ    error cannot overlay l_buf and inbuf#endif#define REP_3_6      16/* repeat previous bit length 3-6 times (2 bits of repeat count) */#define REPZ_3_10    17/* repeat a zero length 3-10 times  (3 bits of repeat count) */#define REPZ_11_138  18/* repeat a zero length 11-138 times  (7 bits of repeat count) *//* =========================================================================== * Local data *//* Data structure describing a single value and its code string. */typedef struct ct_data {    union {        ush  freq;       /* frequency count */        ush  code;       /* bit string */    } fc;    union {        ush  dad;        /* father node in Huffman tree */        ush  len;        /* length of bit string */    } dl;} ct_data;#define Freq fc.freq#define Code fc.code#define Dad  dl.dad#define Len  dl.len#define HEAP_SIZE (2*L_CODES+1)/* maximum heap size */local ct_data near dyn_ltree[HEAP_SIZE];   /* literal and length tree */local ct_data near dyn_dtree[2*D_CODES+1]; /* distance tree */local ct_data near static_ltree[L_CODES+2];/* The static literal tree. Since the bit lengths are imposed, there is no * need for the L_CODES extra codes used during heap construction. However * The codes 286 and 287 are needed to build a canonical tree (see ct_init * below). */local ct_data near static_dtree[D_CODES];/* The static distance tree. (Actually a trivial tree since all codes use * 5 bits.) */local ct_data near bl_tree[2*BL_CODES+1];/* Huffman tree for the bit lengths */typedef struct tree_desc {    ct_data near *dyn_tree;      /* the dynamic tree */    ct_data near *static_tree;   /* corresponding static tree or NULL */    int     near *extra_bits;    /* extra bits for each code or NULL */    int     extra_base;          /* base index for extra_bits */    int     elems;               /* max number of elements in the tree */    int     max_length;          /* max bit length for the codes */    int     max_code;            /* largest code with non zero frequency */} tree_desc;local tree_desc near l_desc ={dyn_ltree, static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS, 0};local tree_desc near d_desc ={dyn_dtree, static_dtree, extra_dbits, 0,          D_CODES, MAX_BITS, 0};local tree_desc near bl_desc ={bl_tree, (ct_data near *)0, extra_blbits, 0,      BL_CODES, MAX_BL_BITS, 0};local ush near bl_count[MAX_BITS+1];/* number of codes at each bit length for an optimal tree */local uch near bl_order[BL_CODES]   = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};/* The lengths of the bit length codes are sent in order of decreasing * probability, to avoid transmitting the lengths for unused bit length codes. */local int near heap[2*L_CODES+1]; /* heap used to build the Huffman trees */local int heap_len;               /* number of elements in the heap */local int heap_max;               /* element of largest frequency *//* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. * The same heap array is used to build all trees. */local uch near depth[2*L_CODES+1];/* Depth of each subtree used as tie breaker for trees of equal frequency */local uch length_code[MAX_MATCH-MIN_MATCH+1];/* length code for each normalized match length (0 == MIN_MATCH) */local uch dist_code[512];/* distance codes. The first 256 values correspond to the distances * 3 .. 258, the last 256 values correspond to the top 8 bits of * the 15 bit distances. */local int near base_length[LENGTH_CODES];/* First normalized length for each code (0 = MIN_MATCH) */local int near base_dist[D_CODES];/* First normalized distance for each code (0 = distance of 1) */#define l_buf inbuf/* DECLARE(uch, l_buf, LIT_BUFSIZE);  buffer for literals or lengths *//* DECLARE(ush, d_buf, DIST_BUFSIZE); buffer for distances */local uch near flag_buf[(LIT_BUFSIZE/8)];/* flag_buf is a bit array distinguishing literals from lengths in * l_buf, thus indicating the presence or absence of a distance. */local unsigned last_lit;    /* running index in l_buf */local unsigned last_dist;   /* running index in d_buf */local unsigned last_flags;  /* running index in flag_buf */local uch flags;            /* current flags not yet saved in flag_buf */local uch flag_bit;         /* current bit used in flags *//* bits are filled in flags starting at bit 0 (least significant). * Note: these flags are overkill in the current code since we don't * take advantage of DIST_BUFSIZE == LIT_BUFSIZE. */local ulg opt_len;        /* bit length of current block with optimal trees */local ulg static_len;     /* bit length of current block with static trees */local ulg compressed_len; /* total bit length of compressed file */local ulg input_len;      /* total byte length of input file *//* input_len is for debugging only since we can get it by other means. */ush *file_type;        /* pointer to UNKNOWN, BINARY or ASCII */int *file_method;      /* pointer to DEFLATE or STORE */#ifdef DEBUGextern ulg bits_sent;  /* bit length of the compressed data */extern long isize;     /* byte length of input file */#endifextern long block_start;       /* window offset of current block */extern unsigned near strstart; /* window offset of current string *//* =========================================================================== * Local (static) routines in this file. */local void init_block     OF((void));local void pqdownheap     OF((ct_data near *tree, int k));local void gen_bitlen     OF((tree_desc near *desc));local void gen_codes      OF((ct_data near *tree, int max_code));local void build_tree     OF((tree_desc near *desc));local void scan_tree      OF((ct_data near *tree, int max_code));local void send_tree      OF((ct_data near *tree, int max_code));local int  build_bl_tree  OF((void));local void send_all_trees OF((int lcodes, int dcodes, int blcodes));local void compress_block OF((ct_data near *ltree, ct_data near *dtree));local void set_file_type  OF((void));#ifndef DEBUG#  define send_code(c, tree) send_bits(tree[c].Code, tree[c].Len)   /* Send a code of the given tree. c and tree must not have side effects */#else /* DEBUG */#  define send_code(c, tree) \     { if (verbose>1) fprintf(stderr,"\ncd %3d ",(c)); \       send_bits(tree[c].Code, tree[c].Len); }#endif#define d_code(dist) \   ((dist) < 256 ? dist_code[dist] : dist_code[256+((dist)>>7)])/* Mapping from a distance to a distance code. dist is the distance - 1 and * must not have side effects. dist_code[256] and dist_code[257] are never * used. */#define MAX(a,b) (a >= b ? a : b)/* the arguments must not have side effects *//* =========================================================================== * Allocate the match buffer, initialize the various tables and save the * location of the internal file attribute (ascii/binary) and method * (DEFLATE/STORE). */void ct_init(attr, methodp)    ush  *attr;   /* pointer to internal file attribute */    int  *methodp; /* pointer to compression method */{  DBGPrintfi(("ct_init(In)\r\n"));  {    int n;        /* iterates over tree elements */    int bits;     /* bit counter */    int length;   /* length value */    int code;     /* code value */    int dist;     /* distance index */    file_type = attr;    file_method = methodp;    compressed_len = input_len = 0L;            if (static_dtree[0].Len != 0) {                                    DBGPrintfo(("ct_init(out1)\r\n"));                                   return ;                                   } /* ct_init already called */    /* Initialize the mapping length (0..255) -> length code (0..28) */    length = 0;    for (code = 0; code < LENGTH_CODES-1; code++) {        base_length[code] = length;        for (n = 0; n < (1<<extra_lbits[code]); n++) {            length_code[length++] = (uch)code;        }    }    //Assert (length == 256, "ct_init: length != 256");    /* Note that the length 255 (match length 258) can be represented     * in two different ways: code 284 + 5 bits or code 285, so we     * overwrite length_code[255] to use the best encoding:     */    length_code[length-1] = (uch)code;    /* Initialize the mapping dist (0..32K) -> dist code (0..29) */    dist = 0;    for (code = 0 ; code < 16; code++) {        base_dist[code] = dist;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -