📄 gzip.c
字号:
if (foreground) { (void) signal(SIGINT, (sig_type) abort_gzip); }#ifdef SIGTERM if (signal(SIGTERM, SIG_IGN) != SIG_IGN) { (void) signal(SIGTERM, (sig_type) abort_gzip); }#endif#ifdef SIGHUP if (signal(SIGHUP, SIG_IGN) != SIG_IGN) { (void) signal(SIGHUP, (sig_type) abort_gzip); }#endif strncpy(z_suffix, Z_SUFFIX, sizeof(z_suffix) - 1); z_len = strlen(z_suffix); /* Allocate all global buffers (for DYN_ALLOC option) */ ALLOC(uch, inbuf, INBUFSIZ + INBUF_EXTRA); ALLOC(uch, outbuf, OUTBUFSIZ + OUTBUF_EXTRA); ALLOC(ush, d_buf, DIST_BUFSIZE); ALLOC(uch, window, 2L * WSIZE); ALLOC(ush, tab_prefix, 1L << BITS); clear_bufs(); part_nb = 0; if (optind == argc) { time_stamp = 0; ifile_size = -1L; zip(STDIN_FILENO, STDOUT_FILENO); } else { int i; for (i = optind; i < argc; i++) { char *path = NULL; if (strcmp(argv[i], "-") == 0) { time_stamp = 0; ifile_size = -1L; inFileNum = STDIN_FILENO; outFileNum = STDOUT_FILENO; } else { inFileNum = open(argv[i], O_RDONLY); if (inFileNum < 0 || fstat (inFileNum, &statBuf) < 0) perror_msg_and_die("%s", argv[i]); time_stamp = statBuf.st_ctime; ifile_size = statBuf.st_size; if (!tostdout) { path = xmalloc(strlen(argv[i]) + 4); strcpy(path, argv[i]); strcat(path, ".gz"); /* Open output file */#if (__GLIBC__ >= 2) && (__GLIBC_MINOR__ >= 1) outFileNum = open(path, O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW);#else outFileNum = open(path, O_RDWR | O_CREAT | O_EXCL);#endif if (outFileNum < 0) { perror_msg("%s", path); free(path); continue; } /* Set permissions on the file */ fchmod(outFileNum, statBuf.st_mode); } else outFileNum = STDOUT_FILENO; } if (path == NULL && isatty(outFileNum) && force == 0) { error_msg("compressed data not written to a terminal. Use -f to force compression."); free(path); continue; } result = zip(inFileNum, outFileNum); if (path != NULL) { close (inFileNum); close (outFileNum); /* Delete the original file */ if (result == OK) delFileName = argv[i]; else delFileName = path; if (unlink(delFileName) < 0) perror_msg("%s", delFileName); } free(path); } } return(exit_code);}/* trees.c -- output deflated data using Huffman coding * Copyright (C) 1992-1993 Jean-loup Gailly * This is free software; you can redistribute it and/or modify it under the * terms of the GNU General Public License, see the file COPYING. *//* * PURPOSE * * Encode various sets of source values using variable-length * binary code trees. * * DISCUSSION * * The PKZIP "deflation" process uses several Huffman trees. The more * common source values are represented by shorter bit sequences. * * Each code tree is stored in the ZIP file in a compressed form * which is itself a Huffman encoding of the lengths of * all the code strings (in ascending order by source values). * The actual code strings are reconstructed from the lengths in * the UNZIP process, as described in the "application note" * (APPNOTE.TXT) distributed as part of PKWARE's PKZIP program. * * REFERENCES * * Lynch, Thomas J. * Data Compression: Techniques and Applications, pp. 53-55. * Lifetime Learning Publications, 1985. ISBN 0-534-03418-7. * * Storer, James A. * Data Compression: Methods and Theory, pp. 49-50. * Computer Science Press, 1988. ISBN 0-7167-8156-5. * * Sedgewick, R. * Algorithms, p290. * Addison-Wesley, 1983. ISBN 0-201-06672-6. * * INTERFACE * * void ct_init (ush *attr, int *methodp) * Allocate the match buffer, initialize the various tables and save * the location of the internal file attribute (ascii/binary) and * method (DEFLATE/STORE) * * void ct_tally (int dist, int lc); * Save the match info and tally the frequency counts. * * long flush_block (char *buf, ulg stored_len, int eof) * Determine the best encoding for the current block: dynamic trees, * static trees or store, and output the encoded block to the zip * file. Returns the total compressed length for the file so far. * *//* =========================================================================== * Constants */#define MAX_BITS 15/* All codes must not exceed MAX_BITS bits */#define MAX_BL_BITS 7/* Bit length codes must not exceed MAX_BL_BITS bits */#define LENGTH_CODES 29/* number of length codes, not counting the special END_BLOCK code */#define LITERALS 256/* number of literal bytes 0..255 */#define END_BLOCK 256/* end of block literal code */#define L_CODES (LITERALS+1+LENGTH_CODES)/* number of Literal or Length codes, including the END_BLOCK code */#define D_CODES 30/* number of distance codes */#define BL_CODES 19/* number of codes used to transfer the bit lengths */typedef uch extra_bits_t;/* extra bits for each length code */static const extra_bits_t extra_lbits[LENGTH_CODES] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 };/* extra bits for each distance code */static const extra_bits_t extra_dbits[D_CODES] = { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13 };/* extra bits for each bit length code */static const extra_bits_t extra_blbits[BL_CODES] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 7 };#define STORED_BLOCK 0#define STATIC_TREES 1#define DYN_TREES 2/* The three kinds of block type */#ifndef LIT_BUFSIZE# ifdef SMALL_MEM# define LIT_BUFSIZE 0x2000# else# ifdef MEDIUM_MEM# define LIT_BUFSIZE 0x4000# else# define LIT_BUFSIZE 0x8000# endif# endif#endif#ifndef DIST_BUFSIZE# define DIST_BUFSIZE LIT_BUFSIZE#endif/* Sizes of match buffers for literals/lengths and distances. There are * 4 reasons for limiting LIT_BUFSIZE to 64K: * - frequencies can be kept in 16 bit counters * - if compression is not successful for the first block, all input data is * still in the window so we can still emit a stored block even when input * comes from standard input. (This can also be done for all blocks if * LIT_BUFSIZE is not greater than 32K.) * - if compression is not successful for a file smaller than 64K, we can * even emit a stored file instead of a stored block (saving 5 bytes). * - creating new Huffman trees less frequently may not provide fast * adaptation to changes in the input data statistics. (Take for * example a binary file with poorly compressible code followed by * a highly compressible string table.) Smaller buffer sizes give * fast adaptation but have of course the overhead of transmitting trees * more frequently. * - I can't count above 4 * The current code is general and allows DIST_BUFSIZE < LIT_BUFSIZE (to save * memory at the expense of compression). Some optimizations would be possible * if we rely on DIST_BUFSIZE == LIT_BUFSIZE. */#if LIT_BUFSIZE > INBUFSIZ#error cannot overlay l_buf and inbuf#endif#define REP_3_6 16/* repeat previous bit length 3-6 times (2 bits of repeat count) */#define REPZ_3_10 17/* repeat a zero length 3-10 times (3 bits of repeat count) */#define REPZ_11_138 18/* repeat a zero length 11-138 times (7 bits of repeat count) *//* =========================================================================== * Local data *//* Data structure describing a single value and its code string. */ typedef struct ct_data { union { ush freq; /* frequency count */ ush code; /* bit string */ } fc; union { ush dad; /* father node in Huffman tree */ ush len; /* length of bit string */ } dl;} ct_data;#define Freq fc.freq#define Code fc.code#define Dad dl.dad#define Len dl.len#define HEAP_SIZE (2*L_CODES+1)/* maximum heap size */static ct_data dyn_ltree[HEAP_SIZE]; /* literal and length tree */static ct_data dyn_dtree[2 * D_CODES + 1]; /* distance tree */static ct_data static_ltree[L_CODES + 2];/* The static literal tree. Since the bit lengths are imposed, there is no * need for the L_CODES extra codes used during heap construction. However * The codes 286 and 287 are needed to build a canonical tree (see ct_init * below). */static ct_data static_dtree[D_CODES];/* The static distance tree. (Actually a trivial tree since all codes use * 5 bits.) */static ct_data bl_tree[2 * BL_CODES + 1];/* Huffman tree for the bit lengths */typedef struct tree_desc { ct_data *dyn_tree; /* the dynamic tree */ ct_data *static_tree; /* corresponding static tree or NULL */ const extra_bits_t *extra_bits; /* extra bits for each code or NULL */ int extra_base; /* base index for extra_bits */ int elems; /* max number of elements in the tree */ int max_length; /* max bit length for the codes */ int max_code; /* largest code with non zero frequency */} tree_desc;static tree_desc l_desc = { dyn_ltree, static_ltree, extra_lbits, LITERALS + 1, L_CODES, MAX_BITS, 0 };static tree_desc d_desc = { dyn_dtree, static_dtree, extra_dbits, 0, D_CODES, MAX_BITS, 0 };static tree_desc bl_desc = { bl_tree, (ct_data *) 0, extra_blbits, 0, BL_CODES, MAX_BL_BITS, 0 };static ush bl_count[MAX_BITS + 1];/* number of codes at each bit length for an optimal tree */static const uch bl_order[BL_CODES]= { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };/* The lengths of the bit length codes are sent in order of decreasing * probability, to avoid transmitting the lengths for unused bit length codes. */static int heap[2 * L_CODES + 1]; /* heap used to build the Huffman trees */static int heap_len; /* number of elements in the heap */static int heap_max; /* element of largest frequency *//* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. * The same heap array is used to build all trees. */static uch depth[2 * L_CODES + 1];/* Depth of each subtree used as tie breaker for trees of equal frequency */static uch length_code[MAX_MATCH - MIN_MATCH + 1];/* length code for each normalized match length (0 == MIN_MATCH) */static uch dist_code[512];/* distance codes. The first 256 values correspond to the distances * 3 .. 258, the last 256 values correspond to the top 8 bits of * the 15 bit distances. */static int base_length[LENGTH_CODES];/* First normalized length for each code (0 = MIN_MATCH) */static int base_dist[D_CODES];/* First normalized distance for each code (0 = distance of 1) */#define l_buf inbuf/* DECLARE(uch, l_buf, LIT_BUFSIZE); buffer for literals or lengths *//* DECLARE(ush, d_buf, DIST_BUFSIZE); buffer for distances */static uch flag_buf[(LIT_BUFSIZE / 8)];/* flag_buf is a bit array distinguishing literals from lengths in * l_buf, thus indicating the presence or absence of a distance. */static unsigned last_lit; /* running index in l_buf */static unsigned last_dist; /* running index in d_buf */static unsigned last_flags; /* running index in flag_buf */static uch flags; /* current flags not yet saved in flag_buf */static uch flag_bit; /* current bit used in flags *//* bits are filled in flags starting at bit 0 (least significant). * Note: these flags are overkill in the current code since we don't * take advantage of DIST_BUFSIZE == LIT_BUFSIZE. */static ulg opt_len; /* bit length of current block with optimal trees */static ulg static_len; /* bit length of current block with static trees */static ulg compressed_len; /* total bit length of compressed file */static ush *file_type; /* pointer to UNKNOWN, BINARY or ASCII */static int *file_method; /* pointer to DEFLATE or STORE *//* =========================================================================== * Local (static) routines in this file. */static void init_block (void);static void pqdownheap (ct_data * tree, int k);static void gen_bitlen (tree_desc * desc);static void gen_codes (ct_data * tree, int max_code);static void build_tree (tree_desc * desc);static void scan_tree (ct_data * tree, int max_code);static void send_tree (ct_data * tree, int max_code);static int build_bl_tree (void);static void send_all_trees (int lcodes, int dcodes, int blcodes);static void compress_block (ct_data * ltree, ct_data * dtree);static void set_file_type (void);#ifndef DEBUG# define send_code(c, tree) send_bits(tree[c].Code, tree[c].Len) /* Send a code of the given tree. c and tree must not have side effects */#else /* DEBUG */# define send_code(c, tree) \ { if (verbose>1) fprintf(stderr,"\ncd %3d ",(c)); \ send_bits(tree[c].Code, tree[c].Len); }#endif#define d_code(dist) \ ((dist) < 256 ? dist_code[dist] : dist_code[256+((dist)>>7)])/* Mapping from a distance to a distance code. dist is the distance - 1 and
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -