📄 deflate.cpp
字号:
/* deflate.c -- compress data using the deflation algorithm
* Copyright (C) 1995-1998 Jean-loup Gailly.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/*
* ALGORITHM
*
* The "deflation" process depends on being able to identify portions
* of the input text which are identical to earlier input (within a
* sliding window trailing behind the input currently being processed).
*
* The most straightforward technique turns out to be the fastest for
* most input files: try all possible matches and select the longest.
* The key feature of this algorithm is that insertions into the string
* dictionary are very simple and thus fast, and deletions are avoided
* completely. Insertions are performed at each input character, whereas
* string matches are performed only when the previous match ends. So it
* is preferable to spend more time in matches to allow very fast string
* insertions and avoid deletions. The matching algorithm for small
* strings is inspired from that of Rabin & Karp. A brute force approach
* is used to find longer strings when a small match has been found.
* A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
* (by Leonid Broukhis).
* A previous version of this file used a more sophisticated algorithm
* (by Fiala and Greene) which is guaranteed to run in linear amortized
* time, but has a larger average cost, uses more memory and is patented.
* However the F&G algorithm may be faster for some highly redundant
* files if the parameter max_chain_length (described below) is too large.
*
* ACKNOWLEDGEMENTS
*
* The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
* I found it in 'freeze' written by Leonid Broukhis.
* Thanks to many people for bug reports and testing.
*
* REFERENCES
*
* Deutsch, L.P.,"DEFLATE Compressed Data Format Specification".
* Available in ftp://ds.internic.net/rfc/rfc1951.txt
*
* A description of the Rabin and Karp algorithm is given in the book
* "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
*
* Fiala,E.R., and Greene,D.H.
* Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
*
*/
#include "stdafx.h"
#include "deflate.h"
/*const char deflate_copyright[] =
" deflate 1.1.3 Copyright 1995-1998 Jean-loup Gailly ";
/*
If you use the zlib library in a product, an acknowledgment is welcome
in the documentation of your product. If for some reason you cannot
include such an acknowledgment, I would appreciate that you keep this
copyright string in the executable of your product.
*/
/* ===========================================================================
* Function prototypes.
*/
typedef enum {
need_more, /* block not completed, need more input or more output */
block_done, /* block flush performed */
finish_started, /* finish started, need only more output at next deflate */
finish_done /* finish done, accept no more input or output */
} block_state;
typedef block_state (*compress_func)(deflate_state *s, int flush);
/* Compression function. Returns the block state after the call. */
static void fill_window (deflate_state *s);
static block_state deflate_stored(deflate_state *s, int flush);
static block_state deflate_fast (deflate_state *s, int flush);
static block_state deflate_slow (deflate_state *s, int flush);
static void lm_init (deflate_state *s);
static void putShortMSB (deflate_state *s, unsigned int b);
static void flush_pending (z_streamp strm);
static int read_buf (z_streamp strm, BYTE *buf, unsigned size);
static unsigned int longest_match (deflate_state *s, IPos cur_match);
/* ===========================================================================
* static data
*/
#define NIL 0
/* Tail of hash chains */
#ifndef TOO_FAR
# define TOO_FAR 4096
#endif
/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
/* Minimum amount of lookahead, except at the end of the input file.
* See deflate.c for comments about the MIN_MATCH+1.
*/
/* Values for max_lazy_match, good_match and max_chain_length, depending on
* the desired pack level (0..9). The values given below have been tuned to
* exclude worst case performance for pathological files. Better values may be
* found for specific files.
*/
typedef struct config_s {
unsigned short good_length; /* reduce lazy search above this match length */
unsigned short max_lazy; /* do not perform lazy search above this match length */
unsigned short nice_length; /* quit search above this match length */
unsigned short max_chain;
compress_func func;
} config;
static const config configuration_table[10] = {
/* good lazy nice chain */
/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */
/* 1 */ {4, 4, 8, 4, deflate_fast}, /* maximum speed, no lazy matches */
/* 2 */ {4, 5, 16, 8, deflate_fast},
/* 3 */ {4, 6, 32, 32, deflate_fast},
/* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */
/* 5 */ {8, 16, 32, 32, deflate_slow},
/* 6 */ {8, 16, 128, 128, deflate_slow},
/* 7 */ {8, 32, 128, 256, deflate_slow},
/* 8 */ {32, 128, 258, 1024, deflate_slow},
/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* maximum compression */
/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4
* For deflate_fast() (levels <= 3) good is ignored and lazy has a different
* meaning.
*/
#define EQUAL 0
/* result of memcmp for equal strings */
struct static_tree_desc_s {int dummy;}; /* for buggy compilers */
/* ===========================================================================
* Update a hash value with the given input byte
* IN assertion: all calls to to UPDATE_HASH are made with consecutive
* input characters, so that a running hash key can be computed from the
* previous key instead of complete recalculation each time.
*/
#define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask)
/* ===========================================================================
* Insert string str in the dictionary and set match_head to the previous head
* of the hash chain (the most recent string with same hash key). Return
* the previous length of the hash chain.
* If this file is compiled with -DFASTEST, the compression level is forced
* to 1, and no hash chains are maintained.
* IN assertion: all calls to to INSERT_STRING are made with consecutive
* input characters and the first MIN_MATCH bytes of str are valid
* (except for the last MIN_MATCH-1 bytes of the input file).
*/
#ifdef FASTEST
#define INSERT_STRING(s, str, match_head) \
(UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \
match_head = s->head[s->ins_h], \
s->head[s->ins_h] = (Pos)(str))
#else
#define INSERT_STRING(s, str, match_head) \
(UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \
s->prev[(str) & s->w_mask] = match_head = s->head[s->ins_h], \
s->head[s->ins_h] = (Pos)(str))
#endif
/* ===========================================================================
* Initialize the hash table (avoiding 64K overflow for 16 bit systems).
* prev[] will be initialized on the fly.
*/
#define CLEAR_HASH(s) \
s->head[s->hash_size-1] = NIL; \
memset((BYTE *)s->head, 0, (unsigned)(s->hash_size-1)*sizeof(*s->head));
/* ========================================================================= */
int deflateInit_(
z_streamp strm,
int level,
const char *version,
int stream_size)
{
return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL,
Z_DEFAULT_STRATEGY, version, stream_size);
/* To do: ignore strm->next_in if we use it as window */
}
/* ========================================================================= */
int deflateInit2_(
z_streamp strm,
int level,
int method,
int windowBits,
int memLevel,
int strategy,
const char *version,
int stream_size)
{
deflate_state *s;
int noheader = 0;
static const char* my_version = ZLIB_VERSION;
unsigned short *overlay;
/* We overlay pending_buf and d_buf+l_buf. This works since the average
* output size for (length,distance) codes is <= 24 bits.
*/
if (version == Z_NULL || version[0] != my_version[0] ||
stream_size != sizeof(z_stream)) {
return Z_VERSION_ERROR;
}
if (strm == Z_NULL) return Z_STREAM_ERROR;
strm->msg = Z_NULL;
if (strm->zalloc == Z_NULL) {
strm->zalloc = zcalloc;
strm->opaque = (void *)0;
}
if (strm->zfree == Z_NULL) strm->zfree = zcfree;
if (level == Z_DEFAULT_COMPRESSION) level = 6;
#ifdef FASTEST
level = 1;
#endif
if (windowBits < 0) { /* undocumented feature: suppress zlib header */
noheader = 1;
windowBits = -windowBits;
}
if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED ||
windowBits < 8 || windowBits > 15 || level < 0 || level > 9 ||
strategy < 0 || strategy > Z_HUFFMAN_ONLY) {
return Z_STREAM_ERROR;
}
s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state));
if (s == Z_NULL) return Z_MEM_ERROR;
strm->state = (struct internal_state *)s;
s->strm = strm;
s->noheader = noheader;
s->w_bits = windowBits;
s->w_size = 1 << s->w_bits;
s->w_mask = s->w_size - 1;
s->hash_bits = memLevel + 7;
s->hash_size = 1 << s->hash_bits;
s->hash_mask = s->hash_size - 1;
s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH);
s->window = (BYTE *) ZALLOC(strm, s->w_size, 2*sizeof(BYTE));
s->prev = (Pos *) ZALLOC(strm, s->w_size, sizeof(Pos));
s->head = (Pos *) ZALLOC(strm, s->hash_size, sizeof(Pos));
s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */
overlay = (unsigned short *) ZALLOC(strm, s->lit_bufsize, sizeof(unsigned short)+2);
s->pending_buf = (unsigned char *) overlay;
s->pending_buf_size = (unsigned long)s->lit_bufsize * (sizeof(unsigned short)+2L);
if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL ||
s->pending_buf == Z_NULL) {
strm->msg = (char*)ERR_MSG(Z_MEM_ERROR);
deflateEnd (strm);
return Z_MEM_ERROR;
}
s->d_buf = overlay + s->lit_bufsize/sizeof(unsigned short);
s->l_buf = s->pending_buf + (1+sizeof(unsigned short))*s->lit_bufsize;
s->level = level;
s->strategy = strategy;
s->method = (BYTE)method;
return deflateReset(strm);
}
/* ========================================================================= */
int deflateSetDictionary (
z_streamp strm,
const BYTE *dictionary,
unsigned int dictLength)
{
deflate_state *s;
unsigned int length = dictLength;
unsigned int n;
IPos hash_head = 0;
if (strm == Z_NULL || strm->state == Z_NULL || dictionary == Z_NULL ||
strm->state->status != INIT_STATE) return Z_STREAM_ERROR;
s = strm->state;
strm->adler = adler32(strm->adler, dictionary, dictLength);
if (length < MIN_MATCH) return Z_OK;
if (length > MAX_DIST(s)) {
length = MAX_DIST(s);
#ifndef USE_DICT_HEAD
dictionary += dictLength - length; /* use the tail of the dictionary */
#endif
}
memcpy(s->window, dictionary, length);
s->strstart = length;
s->block_start = (long)length;
/* Insert all strings in the hash table (except for the last two bytes).
* s->lookahead stays null, so s->ins_h will be recomputed at the next
* call of fill_window.
*/
s->ins_h = s->window[0];
UPDATE_HASH(s, s->ins_h, s->window[1]);
for (n = 0; n <= length - MIN_MATCH; n++) {
INSERT_STRING(s, n, hash_head);
}
if (hash_head) hash_head = 0; /* to make compiler happy */
return Z_OK;
}
/* ========================================================================= */
int deflateReset (z_streamp strm)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -