⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fstenc.c

📁 windows gzip source code
💻 C
📖 第 1 页 / 共 2 页
字号:
/*
 * fstenc.c
 *
 * Fast encoder
 *
 * This is a one pass encoder which uses predefined trees.  However, since these are not the same
 * trees defined for a fixed block (we use better trees than that), we output a dynamic block header.
 */
#include <string.h>
#include <stdio.h>
#include <crtdbg.h>
#include "deflate.h"
#include "fasttbl.h"


//
// For debugging purposes:
//
// Verifies that all of the hash pointers in the hash table are correct, and that everything
// in the same hash chain has the same hash value
//
#ifdef FULL_DEBUG
#define VERIFY_HASHES(bufpos) FastEncoderVerifyHashes(context, bufpos)
#else
#define VERIFY_HASHES(bufpos) ;
#endif


//
// Update hash variable "h" with character c
//
#define UPDATE_HASH(h,c) \
	h = ((h) << FAST_ENCODER_HASH_SHIFT) ^ (c);


//
// Insert a string into the hash chain at location bufpos
//
#define INSERT_STRING(search,bufpos) \
{ \
	UPDATE_HASH(hash, window[bufpos+2]); \
\
	_ASSERT((unsigned int) FAST_ENCODER_RECALCULATE_HASH(bufpos) == (unsigned int) (hash & FAST_ENCODER_HASH_MASK)); \
\
    search = lookup[hash & FAST_ENCODER_HASH_MASK]; \
	lookup[hash & FAST_ENCODER_HASH_MASK] = (t_search_node) (bufpos); \
	prev[bufpos & FAST_ENCODER_WINDOW_MASK] = (t_search_node) (search); \
}


//
// Output bits function which uses local variables for the bit buffer
//
#define LOCAL_OUTPUT_BITS(n, x) \
{ \
	bitbuf |= ((x) << bitcount); \
	bitcount += (n); \
	if (bitcount >= 16) \
    { \
		*output_curpos++ = (BYTE) bitbuf; \
		*output_curpos++ = (BYTE) (bitbuf >> 8); \
		bitcount -= 16; \
		bitbuf >>= 16; \
	} \
}


//
// Output unmatched symbol c
//
#define OUTPUT_CHAR(c) \
    LOCAL_OUTPUT_BITS(g_FastEncoderLiteralCodeInfo[c] & 31, g_FastEncoderLiteralCodeInfo[c] >> 5);


//
// Output a match with length match_len (>= MIN_MATCH) and displacement match_pos
//
// Optimisation: unlike the other encoders, here we have an array of codes for each match
// length (not just each match length slot), complete with all the extra bits filled in, in
// a single array element.  
//
// There are many advantages to doing this:
//
// 1. A single array lookup on g_FastEncoderLiteralCodeInfo, instead of separate array lookups
//    on g_LengthLookup (to get the length slot), g_FastEncoderLiteralTreeLength, 
//    g_FastEncoderLiteralTreeCode, g_ExtraLengthBits, and g_BitMask
//
// 2. The array is an array of ULONGs, so no access penalty, unlike for accessing those USHORT
//    code arrays in the other encoders (although they could be made into ULONGs with some
//    modifications to the source).
//
// Note, if we could guarantee that code_len <= 16 always, then we could skip an if statement here.
//
// A completely different optimisation is used for the distance codes since, obviously, a table for 
// all 8192 distances combining their extra bits is not feasible.  The distance codeinfo table is 
// made up of code[], len[] and # extra_bits for this code.
//
// The advantages are similar to the above; a ULONG array instead of a USHORT and BYTE array, better
// cache locality, fewer memory operations.
//
#define OUTPUT_MATCH(match_len, match_pos) \
{ \
    int extra_bits; \
    int code_len; \
    ULONG code_info; \
\
	_ASSERT(match_len >= MIN_MATCH && match_len <= MAX_MATCH); \
\
    code_info = g_FastEncoderLiteralCodeInfo[(NUM_CHARS+1-MIN_MATCH)+match_len]; \
    code_len = code_info & 31; \
    _ASSERT(code_len != 0); \
    if (code_len <= 16) \
    { \
        LOCAL_OUTPUT_BITS(code_len, code_info >> 5); \
    } \
    else \
    { \
        LOCAL_OUTPUT_BITS(16, (code_info >> 5) & 65535); \
        LOCAL_OUTPUT_BITS(code_len-16, code_info >> (5+16)); \
    } \
    code_info = g_FastEncoderDistanceCodeInfo[POS_SLOT(match_pos)]; \
    LOCAL_OUTPUT_BITS(code_info & 15, code_info >> 8); \
    extra_bits = (code_info >> 4) & 15; \
    if (extra_bits != 0) LOCAL_OUTPUT_BITS(extra_bits, (match_pos) & g_BitMask[extra_bits]); \
}


//
// This commented out code is the old way of doing things, which is what the other encoders use
//
#if 0
#define OUTPUT_MATCH(match_len, match_pos) \
{ \
	int pos_slot = POS_SLOT(match_pos); \
	int len_slot = g_LengthLookup[match_len - MIN_MATCH]; \
    int extra_bits; \
\
	_ASSERT(match_len >= MIN_MATCH && match_len <= MAX_MATCH); \
    _ASSERT(g_FastEncoderLiteralTreeLength[(NUM_CHARS+1)+len_slot] != 0); \
    _ASSERT(g_FastEncoderDistanceTreeLength[pos_slot] != 0); \
\
    LOCAL_OUTPUT_BITS(g_FastEncoderLiteralTreeLength[(NUM_CHARS+1)+len_slot], g_FastEncoderLiteralTreeCode[(NUM_CHARS+1)+len_slot]); \
    extra_bits = g_ExtraLengthBits[len_slot]; \
    if (extra_bits != 0) LOCAL_OUTPUT_BITS(extra_bits, (match_len-MIN_MATCH) & g_BitMask[extra_bits]); \
\
    LOCAL_OUTPUT_BITS(g_FastEncoderDistanceTreeLength[pos_slot], g_FastEncoderDistanceTreeCode[pos_slot]); \
    extra_bits = g_ExtraDistanceBits[pos_slot]; \
    if (extra_bits != 0) LOCAL_OUTPUT_BITS(extra_bits, (match_pos) & g_BitMask[extra_bits]); \
}
#endif


//
// Local function prototypes
//
static void FastEncoderMoveWindows(t_encoder_context *context);

static int FastEncoderFindMatch(
    const BYTE *    window,
    const USHORT *  prev,
    long            bufpos, 
    long            search, 
    t_match_pos *   match_pos, 
    int             cutoff,
    int             nice_length
);


//
// Output the block type and tree structure for our hard-coded trees.
//
// Functionally equivalent to:
//
// outputBits(context, 1, 1); // "final" block flag
// outputBits(context, 2, BLOCKTYPE_DYNAMIC);
// outputTreeStructure(context, g_FastEncoderLiteralTreeLength, g_FastEncoderDistanceTreeLength);
//
// However, all of the above has smartly been cached in global data, so we just memcpy().
//
void FastEncoderOutputPreamble(t_encoder_context *context)
{
#if 0
    // slow way:
    outputBits(context, 1+2, 1 | (BLOCKTYPE_DYNAMIC << 1));
    outputTreeStructure(context, g_FastEncoderLiteralTreeLength, g_FastEncoderDistanceTreeLength);
#endif

    // make sure tree has been init
    _ASSERT(g_FastEncoderTreeLength > 0);

    // make sure we have enough space to output tree
    _ASSERT(context->output_curpos + g_FastEncoderTreeLength < context->output_endpos);

    // fast way:
    memcpy(context->output_curpos, g_FastEncoderTreeStructureData, g_FastEncoderTreeLength);
    context->output_curpos += g_FastEncoderTreeLength;

    // need to get final states of bitbuf and bitcount after outputting all that stuff
    context->bitbuf = g_FastEncoderPostTreeBitbuf;
    context->bitcount = g_FastEncoderPostTreeBitcount;
}


//
// Fast encoder deflate function
//
void FastEncoderDeflate(
	t_encoder_context *	context, 
    int                 search_depth, // # hash links to traverse
	int					lazy_match_threshold, // don't search @ X+1 if match length @ X is > lazy
    int                 good_length, // divide traversal depth by 4 if match length > good
    int                 nice_length // in match finder, if we find >= nice_length match, quit immediately
)
{
	long			bufpos;
	unsigned int	hash;
    unsigned long   bitbuf;
    int             bitcount;
    BYTE *          output_curpos;
    t_fast_encoder *encoder = context->fast_encoder;
	byte *			window = encoder->window; // make local copies of context variables
	t_search_node *	prev = encoder->prev;
	t_search_node *	lookup = encoder->lookup;

    //
    // If this is the first time in here (since last reset) then we need to output our dynamic
    // block header
    //
    if (encoder->fOutputBlockHeader == FALSE)
    {
        encoder->fOutputBlockHeader = TRUE;

        //
        // Watch out!  Calls to outputBits() and outputTreeStructure() use the bit buffer 
        // variables stored in the context, not our local cached variables.
        //
        FastEncoderOutputPreamble(context);
    }

    //
    // Copy bitbuf vars into local variables since we're now using OUTPUT_BITS macro.
    // Do not call anything that uses the context structure's bit buffer variables!
    //
    output_curpos   = context->output_curpos;
    bitbuf          = context->bitbuf;
    bitcount        = context->bitcount;

    // copy bufpos into local variable
    bufpos = context->bufpos;

	VERIFY_HASHES(bufpos); // debug mode: verify that the hash table is correct

    // initialise the value of the hash
    // no problem if locations bufpos, bufpos+1 are invalid (not enough data), since we will 
    // never insert using that hash value
	hash = 0;
	UPDATE_HASH(hash, window[bufpos]);
	UPDATE_HASH(hash, window[bufpos+1]);

    // while we haven't come to the end of the input, and we still aren't close to the end
    // of the output
	while (bufpos < context->bufpos_end && output_curpos < context->output_near_end_threshold)
	{
		int				match_len;
		t_match_pos		match_pos;
		t_match_pos		search;

    	VERIFY_HASHES(bufpos); // debugger: verify that hash table is correct

		if (context->bufpos_end - bufpos <= 3)
		{
			// The hash value becomes corrupt when we get within 3 characters of the end of the
            // input buffer, since the hash value is based on 3 characters.  We just stop
            // inserting into the hash table at this point, and allow no matches.
			match_len = 0;
		}
		else
		{
            // insert string into hash table and return most recent location of same hash value
			INSERT_STRING(search,bufpos);

            // did we find a recent location of this hash value?
			if (search != 0)
			{
    			// yes, now find a match at what we'll call position X
				match_len = FastEncoderFindMatch(window, prev, bufpos, search, &match_pos, search_depth, nice_length);

				// truncate match if we're too close to the end of the input buffer
				if (bufpos + match_len > context->bufpos_end)
					match_len = context->bufpos_end - bufpos;
			}
			else
			{
                // no most recent location found
				match_len = 0;
			}
		}

		if (match_len < MIN_MATCH)
		{
            // didn't find a match, so output unmatched char
			OUTPUT_CHAR(window[bufpos]);
    		bufpos++;
		}
		else
		{
	    	// bufpos now points to X+1
    		bufpos++;

			// is this match so good (long) that we should take it automatically without
			// checking X+1 ?
			if (match_len <= lazy_match_threshold)
			{
				int				next_match_len;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -