📄 deflate.c
字号:
/* Copyright (c) 1990-2005 Info-ZIP. All rights reserved. See the accompanying file LICENSE, version 2005-Feb-10 or later (the contents of which are also included in zip.h) for terms of use. If, for some reason, all these files are missing, the Info-ZIP license also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html*//* * deflate.c by Jean-loup Gailly. * * PURPOSE * * Identify new text as repetitions of old text within a fixed- * length sliding window trailing behind the new text. * * DISCUSSION * * The "deflation" process depends on being able to identify portions * of the input text which are identical to earlier input (within a * sliding window trailing behind the input currently being processed). * * The most straightforward technique turns out to be the fastest for * most input files: try all possible matches and select the longest. * The key feature of this algorithm is that insertions into the string * dictionary are very simple and thus fast, and deletions are avoided * completely. Insertions are performed at each input character, whereas * string matches are performed only when the previous match ends. So it * is preferable to spend more time in matches to allow very fast string * insertions and avoid deletions. The matching algorithm for small * strings is inspired from that of Rabin & Karp. A brute force approach * is used to find longer strings when a small match has been found. * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze * (by Leonid Broukhis). * A previous version of this file used a more sophisticated algorithm * (by Fiala and Greene) which is guaranteed to run in linear amortized * time, but has a larger average cost, uses more memory and is patented. * However the F&G algorithm may be faster for some highly redundant * files if the parameter max_chain_length (described below) is too large. * * ACKNOWLEDGEMENTS * * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and * I found it in 'freeze' written by Leonid Broukhis. * Thanks to many info-zippers for bug reports and testing. * * REFERENCES * * APPNOTE.TXT documentation file in PKZIP 1.93a distribution. * * A description of the Rabin and Karp algorithm is given in the book * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. * * Fiala,E.R., and Greene,D.H. * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595 * * INTERFACE * * void lm_init (int pack_level, ush *flags) * Initialize the "longest match" routines for a new file * * ulg deflate (void) * Processes a new input file and return its compressed length. Sets * the compressed length, crc, deflate flags and internal file * attributes. */#define __DEFLATE_C#include "zip.h"#ifndef USE_ZLIB/* =========================================================================== * Configuration parameters *//* Compile with MEDIUM_MEM to reduce the memory requirements or * with SMALL_MEM to use as little memory as possible. Use BIG_MEM if the * entire input file can be held in memory (not possible on 16 bit systems). * Warning: defining these symbols affects HASH_BITS (see below) and thus * affects the compression ratio. The compressed output * is still correct, and might even be smaller in some cases. */#ifdef SMALL_MEM# define HASH_BITS 13 /* Number of bits used to hash strings */#endif#ifdef MEDIUM_MEM# define HASH_BITS 14#endif#ifndef HASH_BITS# define HASH_BITS 15 /* For portability to 16 bit machines, do not use values above 15. */#endif#define HASH_SIZE (unsigned)(1<<HASH_BITS)#define HASH_MASK (HASH_SIZE-1)#define WMASK (WSIZE-1)/* HASH_SIZE and WSIZE must be powers of two */#define NIL 0/* Tail of hash chains */#define FAST 4#define SLOW 2/* speed options for the general purpose bit flag */#ifndef TOO_FAR# define TOO_FAR 4096#endif/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */#if (defined(ASMV) && !defined(MSDOS16) && defined(DYN_ALLOC)) error: DYN_ALLOC not yet supported in match.S or match32.asm#endif#ifdef MEMORY16# define MAXSEG_64K#endif/* =========================================================================== * Local data used by the "longest match" routines. */#if defined(MMAP) || defined(BIG_MEM) typedef unsigned Pos; /* must be at least 32 bits */#else typedef ush Pos;#endiftypedef unsigned IPos;/* A Pos is an index in the character window. We use short instead of int to * save space in the various tables. IPos is used only for parameter passing. */#ifndef DYN_ALLOC uch window[2L*WSIZE]; /* Sliding window. Input bytes are read into the second half of the window, * and move to the first half later to keep a dictionary of at least WSIZE * bytes. With this organization, matches are limited to a distance of * WSIZE-MAX_MATCH bytes, but this ensures that IO is always * performed with a length multiple of the block size. Also, it limits * the window size to 64K, which is quite useful on MSDOS. * To do: limit the window size to WSIZE+CBSZ if SMALL_MEM (the code would * be less efficient since the data would have to be copied WSIZE/CBSZ times) */ Pos prev[WSIZE]; /* Link to older string with same hash index. To limit the size of this * array to 64K, this link is maintained only for the last 32K strings. * An index in this array is thus a window index modulo 32K. */ Pos head[HASH_SIZE]; /* Heads of the hash chains or NIL. If your compiler thinks that * HASH_SIZE is a dynamic value, recompile with -DDYN_ALLOC. */#else uch far * near window = NULL; Pos far * near prev = NULL; Pos far * near head;#endifulg window_size;/* window size, 2*WSIZE except for MMAP or BIG_MEM, where it is the * input file length plus MIN_LOOKAHEAD. */long block_start;/* window position at the beginning of the current output block. Gets * negative when the window is moved backwards. */local int sliding;/* Set to false when the input file is already in memory */local unsigned ins_h; /* hash index of string to be inserted */#define H_SHIFT ((HASH_BITS+MIN_MATCH-1)/MIN_MATCH)/* Number of bits by which ins_h and del_h must be shifted at each * input step. It must be such that after MIN_MATCH steps, the oldest * byte no longer takes part in the hash key, that is: * H_SHIFT * MIN_MATCH >= HASH_BITS */unsigned int near prev_length;/* Length of the best match at previous step. Matches not greater than this * are discarded. This is used in the lazy match evaluation. */ unsigned near strstart; /* start of string to insert */ unsigned near match_start; /* start of matching string */local int eofile; /* flag set at end of input file */local unsigned lookahead; /* number of valid bytes ahead in window */unsigned near max_chain_length;/* To speed up deflation, hash chains are never searched beyond this length. * A higher limit improves compression ratio but degrades the speed. */local unsigned int max_lazy_match;/* Attempt to find a better match only when the current match is strictly * smaller than this value. This mechanism is used only for compression * levels >= 4. */#define max_insert_length max_lazy_match/* Insert new strings in the hash table only if the match length * is not greater than this length. This saves time but degrades compression. * max_insert_length is used only for compression levels <= 3. */unsigned near good_match;/* Use a faster search when the previous match is longer than this */#ifdef FULL_SEARCH# define nice_match MAX_MATCH#else int near nice_match; /* Stop searching when current match exceeds this */#endif/* Values for max_lazy_match, good_match, nice_match and max_chain_length, * depending on the desired pack level (0..9). The values given below have * been tuned to exclude worst case performance for pathological files. * Better values may be found for specific files. */typedef struct config { ush good_length; /* reduce lazy search above this match length */ ush max_lazy; /* do not perform lazy search above this match length */ ush nice_length; /* quit search above this match length */ ush max_chain;} config;local config configuration_table[10] = {/* good lazy nice chain *//* 0 */ {0, 0, 0, 0}, /* store only *//* 1 */ {4, 4, 8, 4}, /* maximum speed, no lazy matches *//* 2 */ {4, 5, 16, 8},/* 3 */ {4, 6, 32, 32},/* 4 */ {4, 4, 16, 16}, /* lazy matches *//* 5 */ {8, 16, 32, 32},/* 6 */ {8, 16, 128, 128},/* 7 */ {8, 32, 128, 256},/* 8 */ {32, 128, 258, 1024},/* 9 */ {32, 258, 258, 4096}}; /* maximum compression *//* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 * For deflate_fast() (levels <= 3) good is ignored and lazy has a different * meaning. */#define EQUAL 0/* result of memcmp for equal strings *//* =========================================================================== * Prototypes for local functions. */local void fill_window OF((void));local ulg deflate_fast OF((void)); int longest_match OF((IPos cur_match));#if defined(ASMV) && !defined(RISCOS) void match_init OF((void)); /* asm code initialization */#endif#ifdef DEBUGlocal void check_match OF((IPos start, IPos match, int length));#endif/* =========================================================================== * Update a hash value with the given input byte * IN assertion: all calls to to UPDATE_HASH are made with consecutive * input characters, so that a running hash key can be computed from the * previous key instead of complete recalculation each time. */#define UPDATE_HASH(h,c) (h = (((h)<<H_SHIFT) ^ (c)) & HASH_MASK)/* =========================================================================== * Insert string s in the dictionary and set match_head to the previous head * of the hash chain (the most recent string with same hash key). Return * the previous length of the hash chain. * IN assertion: all calls to to INSERT_STRING are made with consecutive * input characters and the first MIN_MATCH bytes of s are valid * (except for the last MIN_MATCH-1 bytes of the input file). */#define INSERT_STRING(s, match_head) \ (UPDATE_HASH(ins_h, window[(s) + (MIN_MATCH-1)]), \ prev[(s) & WMASK] = match_head = head[ins_h], \ head[ins_h] = (s))/* =========================================================================== * Initialize the "longest match" routines for a new file * * IN assertion: window_size is > 0 if the input file is already read or * mmap'ed in the window[] array, 0 otherwise. In the first case, * window_size is sufficient to contain the whole input file plus * MIN_LOOKAHEAD bytes (to avoid referencing memory beyond the end * of window[] when looking for matches towards the end). */void lm_init (pack_level, flags) int pack_level; /* 0: store, 1: best speed, 9: best compression */ ush *flags; /* general purpose bit flag */{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -