📄 pgpzdeflate.c
字号:
/*
* Copyright (C) 1990-1993 Mark Adler, Richard B. Wales, Jean-loup Gailly,
* Kai Uwe Rommel and Igor Mandrichenko.
* Permission is granted to any individual or institution to use, copy,
* or redistribute this software so long as all of the original files
* are included, that it is not sold for profit, and that this copyright
* notice is retained.
*
* Hacked up for PGP by Colin Plumb
*
* $Id: pgpZDeflate.c,v 1.10 1997/12/12 01:14:11 heller Exp $
*/
/*
* deflate.c by Jean-loup Gailly.
*
* PURPOSE
*
* Identify new text as repetitions of old text within a fixed-
* length sliding window trailing behind the new text.
*
* DISCUSSION
*
* The "deflation" process depends on being able to identify portions
* of the input text which are identical to earlier input (within a
* sliding window trailing behind the input currently being processed).
*
* The most straightforward technique turns out to be the fastest for
* most input files: try all possible matches and select the longest.
* The key feature of this algorithm is that insertions into the string
* dictionary are very simple and thus fast, and deletions are avoided
* completely. Insertions are performed at each input character,
* whereas string matches are performed only when the previous match
* ends. So it is preferable to spend more time in matches to allow very
* fast string insertions and avoid deletions. The matching algorithm
* for small strings is inspired from that of Rabin & Karp. A brute
* force approach is used to find longer strings when a small match has
* been found. A similar algorithm is used in comic (by Jan-Mark Wams)
* and freeze (by Leonid Broukhis).
*
* A previous version of this file used a more sophisticated algorithm
* (by Fiala and Greene) which is guaranteed to run in linear amortized
* time, but has a larger average cost, uses more memory and is patented.
* However the F&G algorithm may be faster for some highly redundant
* files if the parameter max_chain_length (described below) is too large.
*
* ACKNOWLEDGEMENTS
*
* The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
* I found it in 'freeze' written by Leonid Broukhis.
* Thanks to many info-zippers for bug reports and testing.
*
* REFERENCES
*
* APPNOTE.TXT documentation file in PKZIP 1.93a distribution.
*
* A description of the Rabin and Karp algorithm is given in the book
* "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
*
* Fiala,E.R., and Greene,D.H.
* Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
*
* INTERFACE
*
* int lm_init (int pack_level)
* Initialize the "longest match" routines for a new file
*
* PGPUInt32 deflate (void)
* Processes a new input file and return its compressed length. Sets
* the compressed length, crc, deflate flags and internal file
* attributes.
*/
#include "pgpConfig.h"
#include "pgpContext.h"
#include "pgpZip.h"
/* ===========================================================================
* Configuration parameters
*/
/*
* Compile with MEDIUM_MEM to reduce the memory requirements or
* with SMALL_MEM to use as little memory as possible. Use BIG_MEM if the
* entire input file can be held in memory (not possible on 16 bit systems).
* Warning: defining these symbols affects HASH_BITS (see below) and thus
* affects the compression ratio. The compressed output
* is still correct, and might even be smaller in some cases.
*/
#ifdef SMALL_MEM
# define HASH_BITS 13 /* Number of bits used to hash strings */
#endif
#ifdef MEDIUM_MEM
# define HASH_BITS 14
#endif
#ifndef HASH_BITS
# define HASH_BITS 15
/* For portability to 16 bit machines, do not use values above 15. */
#endif
#define HASH_SIZE (unsigned)(1<<HASH_BITS)
#define HASH_MASK (HASH_SIZE-1)
#define WMASK (WSIZE-1)
/* HASH_SIZE and WSIZE must be powers of two */
#define NIL 0
/* Tail of hash chains */
#ifndef TOO_FAR
# define TOO_FAR 4096
#endif
/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
#ifdef ATARI_ST
# undef MSDOS /* avoid the processor specific parts */
/* (but the Atari should never define MSDOS anyway ...) */
#endif
#if defined(MSDOS) && !defined(NO_ASM) && !defined(ASMV)
# define ASMV
#endif
#if defined(ASMV) && !defined(MSDOS) && defined(DYN_ALLOC)
error: DYN_ALLOC not yet supported in match.s
#endif
/* ===========================================================================
* Local data used by the "longest match" routines.
*/
#if defined(BIG_MEM) || defined(MMAP)
typedef size_t Pos; /* must be at least 32 bits */
#else
typedef PGPUInt16 Pos;
#endif
typedef unsigned IPos;
/* A Pos is an index in the character window. We use short instead of int to
* save space in the various tables. IPos is used only for parameter passing.
*/
/* Private context structure for use by all functions. */
typedef struct ZDeflateContext {
PGPContextRef cdkContext;
#ifndef DYN_ALLOC
PGPByte window[2L*WSIZE];
/* Sliding window. Input bytes are read into the second half of
* the window, and move to the first half later to keep a
* dictionary of at least WSIZE bytes. With this organization,
* matches are limited to a distance of WSIZE-MAX_MATCH bytes, but
* this ensures that IO is always performed with a length multiple
* of the block size. Also, it limits the window size to 64K,
* which is quite useful on MSDOS. To do: limit the window size
* to WSIZE+BSZ if SMALL_MEM (the code would be less efficient
* since the data would have to be copied WSIZE/BSZ times)
*/
Pos prev[WSIZE];
/* Link to older string with same hash index. To limit the size of this
* array to 64K, this link is maintained only for the last 32K strings.
* An index in this array is thus a window index modulo 32K.
*/
Pos head[HASH_SIZE];
/* Heads of the hash chains or NIL. If your compiler thinks that
* HASH_SIZE is a dynamic value, recompile with -DDYN_ALLOC.
*/
#else
/*
* MS-DOS adjusts the pointers for zero offset, so we store the
* original pointers in these variables for later free()ing
*/
PGPByte * near o_window;
Pos * near o_prev;
Pos * near o_head;
PGPByte * near window;
Pos * near prev;
Pos * near head;
#endif
PGPUInt32 window_size;
/* window size, 2*WSIZE except for MMAP or BIG_MEM, where it is the
* input file length plus MIN_LOOKAHEAD.
*/
long block_start;
/* window position at the beginning of the current output block. Gets
* negative when the window is moved backwards.
*/
unsigned ins_h; /* hash index of string to be inserted */
unsigned int near prev_length;
/* Length of the best match at previous step. Matches not greater than this
* are discarded. This is used in the lazy match evaluation.
*/
unsigned near strstart; /* start of string to insert */
unsigned near match_start; /* start of matching string */
int eofile; /* flag set at end of input file */
unsigned lookahead; /* number of valid bytes ahead in window */
/* Saved copies of lazy match variables */
int s_match_available;
int s_match_length;
unsigned near max_chain_length;
/* To speed up deflation, hash chains are never searched beyond
* this length. A higher limit improves compression ratio but
* degrades the speed.
*/
unsigned int max_lazy_match;
/* Attempt to find a better match only when the current match is strictly
* smaller than this value. This mechanism is used only for compression
* levels >= 4.
*/
#define max_insert_length max_lazy_match
/* Insert new strings in the hash table only if the match length
* is not greater than this length. This saves time but degrades
* compression. max_insert_length is used only for compression
* levels <= 3.
*/
unsigned near good_match;
/* Use a faster search when the previous match is longer than this */
#ifdef FULL_SEARCH
# define nice_match MAX_MATCH
#else
int near nice_match; /* Stop searching when current match exceeds this */
#endif
/* DEBUG_STRUCT_CONSTRUCTOR( ZDeflateContext ) */
} ZDeflateContext;
#define H_SHIFT ((HASH_BITS+MIN_MATCH-1)/MIN_MATCH)
/* Number of bits by which ins_h and del_h must be shifted at each
* input step. It must be such that after MIN_MATCH steps, the oldest
* byte no longer takes part in the hash key, that is:
* H_SHIFT * MIN_MATCH >= HASH_BITS
*/
/* Values for max_lazy_match, good_match and max_chain_length, depending on
* the desired pack level (0..9). The values given below have been tuned to
* exclude worst case performance for pathological files. Better values may be
* found for specific files.
*/
static const struct {
PGPUInt16 good_length; /* reduce lazy search above this match length */
PGPUInt16 max_lazy;/* don't perform lazy search above this match length */
PGPUInt16 nice_length; /* quit search above this match length */
PGPUInt16 max_chain;
} configuration_table[10] = {
/* good lazy nice chain */
/* 0 */ {0, 0, 0, 0}, /* store only */
/* 1 */ {4, 4, 8, 4}, /* maximum speed, no lazy matches */
/* 2 */ {4, 5, 16, 8},
/* 3 */ {4, 6, 32, 32},
/* 4 */ {4, 4, 16, 16}, /* lazy matches */
#if 0
/* 5 */ {8, 16, 32, 32}, /* Change to new? */
#else
{8, 64, 258, 128}, /* Match old PGP params */
#endif
/* 6 */ {8, 16, 128, 128},
/* 7 */ {8, 32, 128, 256},
/* 8 */ {32, 128, 258, 1024},
/* 9 */ {32, 258, 258, 4096}}; /* maximum compression */
/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4
* For deflate_fast() (levels <= 3) good is ignored and lazy has a different
* meaning.
*/
/* ===========================================================================
* Prototypes for local functions.
*/
static void slide_window(ZDeflateContext *);
static int deflate_sub(ZDeflateContext *, struct ZTreesContext *,
struct ZBitsContext *);
int longest_match(ZDeflateContext *, IPos cur_match);
#ifdef ASMV
void match_init(void); /* asm code initialization */
#endif
#ifdef ZIPDEBUG
static void check_match(ZDeflateContext *, IPos start, IPos match, int length);
#endif
/* ===========================================================================
* Update a hash value with the given input byte
* IN assertion: all calls to to UPDATE_HASH are made with consecutive
* input characters, so that a running hash key can be computed from the
* previous key instead of complete recalculation each time.
*/
#define UPDATE_HASH(h,c) (h = (((h)<<H_SHIFT) ^ (c)) & HASH_MASK)
/* ===========================================================================
* Insert string s in the dictionary and set match_head to the previous head
* of the hash chain (the most recent string with same hash key). Return
* the previous length of the hash chain.
* IN assertion: all calls to to INSERT_STRING are made with consecutive
* input characters and the first MIN_MATCH bytes of s are valid
* (except for the last MIN_MATCH-1 bytes of the input file).
*/
#define INSERT_STRING(ctx, s, match_head) \
(UPDATE_HASH((ctx)->ins_h, (ctx)->window[(s) + MIN_MATCH-1]), \
(ctx)->prev[(s) & WMASK] = (Pos)((match_head) = (ctx)->head[(ctx)->ins_h]), \
(ctx)->head[(ctx)->ins_h] = (Pos)(s))
/* ===========================================================================
* Initialize the "longest match" routines for a new file
*
* IN assertion: window_size is > 0 if the input file is already read or
* mmap'ed in the window[] array, 0 otherwise. In the first case,
* window_size is sufficient to contain the whole input file plus
* MIN_LOOKAHEAD bytes (to avoid referencing memory beyond the end
* of window[] when looking for matches towards the end).
*/
ZDeflateContext *
lm_init(PGPContextRef cdkContext, int pack_level)
/* 0: store, 1: best speed, 9: best compression */
{
ZDeflateContext *ctx;
if (pack_level < 1 || pack_level > 9)
return NULL;
/* error("bad pack level"); */
ctx = (ZDeflateContext *) pgpContextMemAlloc (cdkContext,
sizeof(*ctx), kPGPMemoryMgrFlags_Clear);
if (IsNull (ctx) )
return NULL;
ctx->cdkContext = cdkContext;
/* Do not slide the window if the whole input is already in memory
* (window_size > 0)
*/
if (ctx->window_size == 0)
ctx->window_size = (PGPUInt32)2*WSIZE;
/* Use dynamic allocation if compiler does not like big static arrays: */
/* The +8 is for MS-DOS, so the pointers can be segment-aligned */
#ifdef DYN_ALLOC
pgpAssert (ctx->o_window == NULL);
ctx->window = ctx->o_window = (PGPByte *)pgpContextMemAlloc (cdkContext,
(WSIZE+8)*2*sizeof(PGPByte), kPGPMemoryMgrFlags_Clear);
if (ctx->o_window == NULL) {
pgpContextMemFree (cdkContext, ctx);
return NULL;
/* err(ZE_MEM, "window allocation"); */
}
pgpAssert (ctx->o_prev == NULL);
ctx->prev = ctx->o_prev = (Pos*) pgpContextMemAlloc (cdkContext,
(WSIZE+8) * sizeof(Pos), kPGPMemoryMgrFlags_Clear);
ctx->head = ctx->o_head = (Pos*) pgpContextMemAlloc (cdkContext,
(HASH_SIZE+8) * sizeof(Pos), kPGPMemoryMgrFlags_Clear);
if (ctx->o_prev == NULL || ctx->o_head == NULL) {
if (ctx->o_prev)
pgpContextMemFree (cdkContext, ctx->o_prev);
pgpContextMemFree (cdkContext, ctx->o_window);
pgpContextMemFree (cdkContext, ctx);
return NULL;
/* err(ZE_MEM, "hash table allocation"); */
}
#endif /* DYN_ALLOC */
/* Initialize the hash table (avoiding 64K overflow for 16 bit systems).
* prev[] will be initialized on the fly.
*/
ctx->head[HASH_SIZE-1] = NIL;
pgpClearMemory((char*)ctx->head, (unsigned)(HASH_SIZE-1)*
sizeof(*ctx->head));
/* Set the default configuration parameters:
*/
ctx->max_lazy_match = configuration_table[pack_level].max_lazy;
ctx->good_match = configuration_table[pack_level].good_length;
#ifndef FULL_SEARCH
ctx->nice_match = configuration_table[pack_level].nice_length;
#endif
ctx->max_chain_length = configuration_table[pack_level].max_chain;
/* ??? reduce max_chain_length for binary files */
ctx->strstart = 0;
ctx->block_start = 0L;
#ifdef ASMV
match_init(ctx); /* initialize the asm code */
#endif
ctx->lookahead = 0;
ctx->eofile = 0;
ctx->s_match_available = 0; /* set if previous match exists */
ctx->s_match_length = MIN_MATCH-1; /* length of best match */
return ctx;
}
/* ===========================================================================
* Free the window and hash table
*/
void
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -