📄 deflate.c
字号:
/* TiMidity++ -- MIDI to WAVE converter and player Copyright (C) 1999-2002 Masanao Izumo <mo@goice.co.jp> Copyright (C) 1995 Tuukka Toivonen <tt@cgs.fi> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/#ifdef HAVE_CONFIG_H#include "config.h"#endif /* HAVE_CONFIG_H *//* deflate.c -- compress data using the deflation algorithm * Copyright (C) 1992-1993 Jean-loup Gailly * This is free software; you can redistribute it and/or modify it under the * terms of the GNU General Public License, see the file COPYING. *//* * PURPOSE * * Identify new text as repetitions of old text within a fixed- * length sliding window trailing behind the new text. * * DISCUSSION * * The "deflation" process depends on being able to identify portions * of the input text which are identical to earlier input (within a * sliding window trailing behind the input currently being processed). * * The most straightforward technique turns out to be the fastest for * most input files: try all possible matches and select the longest. * The key feature of this algorithm is that insertions into the string * dictionary are very simple and thus fast, and deletions are avoided * completely. Insertions are performed at each input character, whereas * string matches are performed only when the previous match ends. So it * is preferable to spend more time in matches to allow very fast string * insertions and avoid deletions. The matching algorithm for small * strings is inspired from that of Rabin & Karp. A brute force approach * is used to find longer strings when a small match has been found. * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze * (by Leonid Broukhis). * A previous version of this file used a more sophisticated algorithm * (by Fiala and Greene) which is guaranteed to run in linear amortized * time, but has a larger average cost, uses more memory and is patented. * However the F&G algorithm may be faster for some highly redundant * files if the parameter max_chain_length (described below) is too large. * * ACKNOWLEDGEMENTS * * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and * I found it in 'freeze' written by Leonid Broukhis. * Thanks to many info-zippers for bug reports and testing. * * REFERENCES * * APPNOTE.TXT documentation file in PKZIP 1.93a distribution. * * A description of the Rabin and Karp algorithm is given in the book * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. * * Fiala,E.R., and Greene,D.H. * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595 * * INTERFACE * * void lm_init (void) * Initialize the "longest match" routines for a new file * * ulg deflate (void) * Processes a new input file and return its compressed length. Sets * the compressed length, crc, deflate flags and internal file * attributes. */#include <stdio.h>#include <stdlib.h>#ifndef NO_STRING_H#include <string.h>#else#include <strings.h>#endif#include <ctype.h>#define FULL_SEARCH/* #define UNALIGNED_OK *//* #define DEBUG */#include "timidity.h"#include "common.h"#include "mblock.h"#include "zip.h"#define local static#define MIN_MATCH 3#define MAX_MATCH 258/* The minimum and maximum match lengths */#define BITS 16/* Compile with MEDIUM_MEM to reduce the memory requirements or * with SMALL_MEM to use as little memory as possible. Use BIG_MEM if the * entire input file can be held in memory (not possible on 16 bit systems). * Warning: defining these symbols affects HASH_BITS (see below) and thus * affects the compression ratio. The compressed output * is still correct, and might even be smaller in some cases. */#ifdef SMALL_MEM# define LIT_BUFSIZE 0x2000# define HASH_BITS 13 /* Number of bits used to hash strings */#else#ifdef MEDIUM_MEM# define LIT_BUFSIZE 0x4000# define HASH_BITS 14#else# define LIT_BUFSIZE 0x8000# define HASH_BITS 15/* For portability to 16 bit machines, do not use values above 15. */#endif#endif#if LIT_BUFSIZE > INBUFSIZ error cannot overlay l_buf and inbuf#endif#if (WSIZE<<1) > (1<<BITS) error: cannot overlay window with tab_suffix and prev with tab_prefix0#endif#if HASH_BITS > BITS-1 error: cannot overlay head with tab_prefix1#endif#define DIST_BUFSIZE LIT_BUFSIZE#define HASH_SIZE (unsigned)(1<<HASH_BITS)#define HASH_MASK (HASH_SIZE-1)#define WMASK (WSIZE-1)/* HASH_SIZE and WSIZE must be powers of two */#define NIL 0 /* Tail of hash chains */#define EQUAL 0 /* result of memcmp for equal strings */#define TOO_FAR 4096/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)/* Minimum amount of lookahead, except at the end of the input file. * See deflate.c for comments about the MIN_MATCH+1. */#define MAX_DIST (WSIZE-MIN_LOOKAHEAD)/* In order to simplify the code, particularly on 16 bit machines, match * distances are limited to MAX_DIST instead of WSIZE. */#define SMALLEST 1/* Index within the heap array of least frequent node in the Huffman tree */#define MAX_BITS 15 /* All codes must not exceed MAX_BITS bits */#define MAX_BL_BITS 7 /* Bit length codes must not exceed MAX_BL_BITS bits */#define LENGTH_CODES 29/* number of length codes, not counting the special END_BLOCK code */#define LITERALS 256 /* number of literal bytes 0..255 */#define END_BLOCK 256 /* end of block literal code */#define L_CODES (LITERALS+1+LENGTH_CODES)/* number of Literal or Length codes, including the END_BLOCK code */#define D_CODES 30 /* number of distance codes */#define BL_CODES 19 /* number of codes used to transfer the bit lengths */#define REP_3_6 16/* repeat previous bit length 3-6 times (2 bits of repeat count) */#define REPZ_3_10 17/* repeat a zero length 3-10 times (3 bits of repeat count) */#define REPZ_11_138 18/* repeat a zero length 11-138 times (7 bits of repeat count) */#define HEAP_SIZE (2*L_CODES+1) /* maximum heap size */#define H_SHIFT ((HASH_BITS+MIN_MATCH-1)/MIN_MATCH)/* Number of bits by which ins_h and del_h must be shifted at each * input step. It must be such that after MIN_MATCH steps, the oldest * byte no longer takes part in the hash key, that is: * H_SHIFT * MIN_MATCH >= HASH_BITS *//* Data structure describing a single value and its code string. */typedef struct ct_data { union { ush freq; /* frequency count */ ush code; /* bit string */ } fc; union { ush dad; /* father node in Huffman tree */ ush len; /* length of bit string */ } dl;} ct_data;#define Freq fc.freq#define Code fc.code#define Dad dl.dad#define Len dl.lentypedef struct tree_desc { ct_data near *dyn_tree; /* the dynamic tree */ ct_data near *static_tree; /* corresponding static tree or NULL */ int near *extra_bits; /* extra bits for each code or NULL */ int extra_base; /* base index for extra_bits */ int elems; /* max number of elements in the tree */ int max_length; /* max bit length for the codes */ int max_code; /* largest code with non zero frequency */} tree_desc;local int near extra_lbits[LENGTH_CODES] /* extra bits for each length code */ = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};local int near extra_dbits[D_CODES] /* extra bits for each distance code */ = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};local int near extra_blbits[BL_CODES]/* extra bits for each bit length code */ = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};local uch near bl_order[BL_CODES] = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};/* The lengths of the bit length codes are sent in order of decreasing * probability, to avoid transmitting the lengths for unused bit length codes. *//* Values for max_lazy_match, good_match and max_chain_length, depending on * the desired pack level (0..9). The values given below have been tuned to * exclude worst case performance for pathological files. Better values may be * found for specific files. */local struct{ ush good_length; /* reduce lazy search above this match length */ ush max_lazy; /* do not perform lazy search above this match length */ ush nice_length; /* quit search above this match length */ ush max_chain;} configuration_table[10] = {/* good lazy nice chain *//* 0 */ {0, 0, 0, 0}, /* store only *//* 1 */ {4, 4, 8, 4}, /* maximum speed, no lazy matches *//* 2 */ {4, 5, 16, 8},/* 3 */ {4, 6, 32, 32},/* 4 */ {4, 4, 16, 16}, /* lazy matches *//* 5 */ {8, 16, 32, 32},/* 6 */ {8, 16, 128, 128},/* 7 */ {8, 32, 128, 256},/* 8 */ {32, 128, 258, 1024},/* 9 */ {32, 258, 258, 4096}}; /* maximum compression */struct deflate_buff_queue{ struct deflate_buff_queue *next; unsigned len; uch *ptr;};local struct deflate_buff_queue *free_queue = NULL;local void reuse_queue(struct deflate_buff_queue *p){ p->next = free_queue; free_queue = p;}local struct deflate_buff_queue *new_queue(void){ struct deflate_buff_queue *p; if(free_queue) { p = free_queue; free_queue = free_queue->next; } else p = (struct deflate_buff_queue *) safe_malloc(sizeof(struct deflate_buff_queue) + OUTBUFSIZ); p->next = NULL; p->len = 0; p->ptr = (uch *)p + sizeof(struct deflate_buff_queue); return p;}struct _DeflateHandler{ void *user_val; long (* read_func)(char *buf, long size, void *user_val); int initflag; struct deflate_buff_queue *qhead; struct deflate_buff_queue *qtail; uch outbuf[OUTBUFSIZ]; unsigned outcnt, outoff; int complete;#define window_size ((ulg)2*WSIZE) uch window[window_size]; ush d_buf[DIST_BUFSIZE]; /* buffer for distances */ uch l_buf[INBUFSIZ + INBUF_EXTRA]; /* buffer for literals or lengths */ ush prev[1L<<BITS]; unsigned short bi_buf; int bi_valid; long block_start;/* window position at the beginning of the current output block. Gets * negative when the window is moved backwards. */ unsigned ins_h; /* hash index of string to be inserted */ unsigned hash_head; /* head of hash chain */ unsigned prev_match; /* previous match */ int match_available; /* set if previous match exists */ unsigned match_length; /* length of best match */ unsigned int near prev_length;/* Length of the best match at previous step. Matches not greater than this * are discarded. This is used in the lazy match evaluation. */ unsigned near strstart; /* start of string to insert */ unsigned near match_start; /* start of matching string */ int eofile; /* flag set at end of input file */ unsigned lookahead; /* number of valid bytes ahead in window */ unsigned near max_chain_length;/* To speed up deflation, hash chains are never searched beyond this length. * A higher limit improves compression ratio but degrades the speed. */ unsigned int max_lazy_match;/* Attempt to find a better match only when the current match is strictly * smaller than this value. This mechanism is used only for compression * levels >= 4. */ int compr_level; /* compression level (1..9) */ unsigned near good_match;/* Use a faster search when the previous match is longer than this */#ifndef FULL_SEARCH int near nice_match; /* Stop searching when current match exceeds this */#endif ct_data near dyn_ltree[HEAP_SIZE]; /* literal and length tree */ ct_data near dyn_dtree[2*D_CODES+1]; /* distance tree */ ct_data near static_ltree[L_CODES+2];/* The static literal tree. Since the bit lengths are imposed, there is no * need for the L_CODES extra codes used during heap construction. However * The codes 286 and 287 are needed to build a canonical tree (see ct_init * below). */ ct_data near static_dtree[D_CODES];/* The static distance tree. (Actually a trivial tree since all codes use * 5 bits.) */ ct_data near bl_tree[2*BL_CODES+1];/* Huffman tree for the bit lengths */ tree_desc near l_desc; tree_desc near d_desc; tree_desc near bl_desc; ush near bl_count[MAX_BITS+1];/* number of codes at each bit length for an optimal tree */ int near heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ int heap_len; /* number of elements in the heap */ int heap_max; /* element of largest frequency *//* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -