⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 stdenc.c

📁 windows gzip source code
💻 C
字号:
/*
 * stdenc.c
 *
 * Standard encoder
 */
#include <string.h>
#include <stdio.h>
#include <crtdbg.h>
#include "deflate.h"


//
// Update hash variable "h" with character c
//
#define UPDATE_HASH(h,c) \
	h = ((h) << STD_ENCODER_HASH_SHIFT) ^ (c);


//
// Insert a string into the hash chain at location bufpos
//
// Assertions check that we never attempt to insert near the end of the buffer
// (since our hash value is based on values at bufpos, bufpos+1, bufpos+2) and
// that our hash value is always valid for the bytes we are inserting.
//
#define INSERT_STRING(search,bufpos) \
{ \
    _ASSERT((bufpos + 2) < context->bufpos_end); \
	UPDATE_HASH(hash, window[bufpos+2]); \
	_ASSERT((unsigned int) STD_ENCODER_RECALCULATE_HASH(bufpos) == (unsigned int) (hash & STD_ENCODER_HASH_MASK)); \
	search = lookup[hash & STD_ENCODER_HASH_MASK]; \
	lookup[hash & STD_ENCODER_HASH_MASK] = (t_search_node) (bufpos); \
	prev[bufpos & WINDOW_MASK] = (t_search_node) search; \
}


#define CHECK_FLUSH_RECORDING_BUFFER() \
	if (recording_bitcount >= 16) \
	{ \
		*recording_bufptr++ = (BYTE) recording_bitbuf; \
		*recording_bufptr++ = (BYTE) (recording_bitbuf >> 8); \
		recording_bitbuf >>= 16; \
		recording_bitcount -= 16; \
	}


#define OUTPUT_RECORDING_DATA(count,data) \
	recording_bitbuf |= ((data) << recording_bitcount); \
	recording_bitcount += (count);


//
// Record unmatched symbol c
//
#define RECORD_CHAR(c) \
    context->outputting_block_num_literals++; \
    context->std_encoder->literal_tree_freq[c]++; \
	_ASSERT(context->std_encoder->recording_literal_tree_len[c] != 0); \
	OUTPUT_RECORDING_DATA(context->std_encoder->recording_literal_tree_len[c], context->std_encoder->recording_literal_tree_code[c]); \
	CHECK_FLUSH_RECORDING_BUFFER();


//
// Record a match with length match_len (>= MIN_MATCH) and displacement match_pos
//
#define RECORD_MATCH(match_len, match_pos) \
{ \
	int pos_slot = POS_SLOT(match_pos); \
	int len_slot = g_LengthLookup[match_len - MIN_MATCH]; \
	int item = (NUM_CHARS+1) + len_slot; \
	int extra_dist_bits = g_ExtraDistanceBits[pos_slot]; \
	int extra_len_bits = g_ExtraLengthBits[len_slot]; \
	_ASSERT(match_len >= MIN_MATCH && match_len <= MAX_MATCH); \
	_ASSERT(context->outputting_block_num_literals >= 0 && context->outputting_block_num_literals < STD_ENCODER_MAX_ITEMS); \
	_ASSERT(context->std_encoder->recording_literal_tree_len[item] != 0); \
	_ASSERT(context->std_encoder->recording_dist_tree_len[pos_slot] != 0); \
    context->outputting_block_num_literals++; \
    context->std_encoder->literal_tree_freq[(NUM_CHARS + 1) + len_slot]++; \
    context->std_encoder->dist_tree_freq[pos_slot]++; \
	OUTPUT_RECORDING_DATA(context->std_encoder->recording_literal_tree_len[item], context->std_encoder->recording_literal_tree_code[item]); \
	CHECK_FLUSH_RECORDING_BUFFER(); \
	if (extra_len_bits > 0) \
	{ \
		OUTPUT_RECORDING_DATA(extra_len_bits, (match_len-MIN_MATCH) & ((1 << extra_len_bits)-1)); \
		CHECK_FLUSH_RECORDING_BUFFER(); \
	} \
	OUTPUT_RECORDING_DATA(context->std_encoder->recording_dist_tree_len[pos_slot], context->std_encoder->recording_dist_tree_code[pos_slot]); \
	CHECK_FLUSH_RECORDING_BUFFER(); \
	if (extra_dist_bits > 0) \
	{ \
		OUTPUT_RECORDING_DATA(extra_dist_bits, match_pos & ((1 << extra_dist_bits)-1)); \
		CHECK_FLUSH_RECORDING_BUFFER(); \
	} \
}


#define FLUSH_RECORDING_BITBUF() \
    *recording_bufptr++ = (BYTE) recording_bitbuf; \
	*recording_bufptr++ = (BYTE) (recording_bitbuf >> 8); 


//
// Verifies that all of the hash pointers in the hash table are correct, and that everything
// in the same hash chain has the same hash value
//
#ifdef FULL_DEBUG
#define VERIFY_HASHES(bufpos) StdEncoderVerifyHashes(context, bufpos)
#else
#define VERIFY_HASHES(bufpos) ;
#endif


static void StdEncoderMoveWindows(t_encoder_context *context);

static int StdEncoderFindMatch(
    const BYTE *        window,
    const USHORT *      prev,
    long                bufpos, 
    long                search, 
    unsigned int *      match_pos, 
    int                 cutoff,
    int                 nice_length
);


void StdEncoderDeflate(
	t_encoder_context *	context, 
    int                 search_depth,
	int					lazy_match_threshold,
    int                 good_length,
    int                 nice_length
)
{
	long			bufpos;
	unsigned int	hash;
    t_std_encoder * encoder = context->std_encoder;
	byte *			window = encoder->window;
	t_search_node *	prev = encoder->prev;
	t_search_node *	lookup = encoder->lookup;
	unsigned long	recording_bitbuf;
	int				recording_bitcount;
	byte *			recording_bufptr;
    byte *          end_recording_bufptr;

	// restore literal/match bitmap variables
    end_recording_bufptr    = &encoder->lit_dist_buffer[STD_ENCODER_LIT_DIST_BUFFER_SIZE-8];
	recording_bufptr        = encoder->recording_bufptr;
    recording_bitbuf        = encoder->recording_bitbuf;
    recording_bitcount      = encoder->recording_bitcount;
	bufpos			        = context->bufpos;

	VERIFY_HASHES(bufpos);

    //
    // Recalculate our hash
    //
    // One disadvantage of the way we do our hashing is that matches are not permitted in the last
    // few characters near bufpos_end.
	//
    hash = 0;
	UPDATE_HASH(hash, window[bufpos]);
	UPDATE_HASH(hash, window[bufpos+1]);

	while (bufpos < context->bufpos_end)
	{
		int				match_len;
		t_match_pos		match_pos;
		t_match_pos		search;

        if (context->bufpos_end - bufpos <= 3)
		{
			// don't insert any strings when we get close to the end of the buffer,
            // since we will end up using corrupted hash values (the data after bufpos_end
            // is undefined, and those bytes would be swept into the hash value if we
            // calculated a hash at bufpos_end-2, for example, since our hash value is
            // build from 3 consecutive characters in the buffer).
			match_len = 0;
		}
		else
		{
			INSERT_STRING(search,bufpos);

			// find a match at what we'll call position X
			if (search != 0)
			{
				match_len = StdEncoderFindMatch(window, prev, bufpos, search, &match_pos, search_depth, nice_length);

				// truncate match if we're too close to the end of the buffer
				if (bufpos + match_len > context->bufpos_end)
					match_len = context->bufpos_end - bufpos;
			}
			else
			{
				match_len = 0;
			}
		}

		if (match_len < MIN_MATCH)
		{
			// didn't find a match, so output unmatched char
			RECORD_CHAR(window[bufpos]);
    		bufpos++;
		}
		else
		{
    		// bufpos now points to X+1
    		bufpos++;

			// is this match so good (long) that we should take it automatically without
			// checking X+1 ?
			if (match_len <= lazy_match_threshold)
			{
				int				next_match_len;
				t_match_pos		next_match_pos;

                // sets search
                INSERT_STRING(search,bufpos);

				// no, so check for a better match at X+1
				if (search != 0)
				{
					next_match_len = StdEncoderFindMatch(
						window,
                        prev,
						bufpos, 
						search,
						&next_match_pos,
						match_len < good_length ? search_depth : (search_depth >> 2),
                        nice_length
					);
				
					// truncate match if we're too close to the end of the buffer
					// note: next_match_len could now be < MIN_MATCH
					if (bufpos + next_match_len > context->bufpos_end)
						next_match_len = context->bufpos_end - bufpos;
				}
				else
				{
					next_match_len = 0;
				}

				// right now X and X+1 are both inserted into the search tree
				if (next_match_len > match_len)
				{
					// since next_match_len > match_len, it can't be < MIN_MATCH here

					// match at X+1 is better, so output unmatched char at X
					RECORD_CHAR(window[bufpos-1]);

					// now output match at location X+1
					RECORD_MATCH(next_match_len, next_match_pos);

					// insert remainder of second match into search tree
					// 
					// example: (*=inserted already)
					//
					// X      X+1               X+2      X+3     X+4
					// *      *
					//        nextmatchlen=3
					//        bufpos
					//
					// If next_match_len == 3, we want to perform 2
					// insertions (at X+2 and X+3).  However, first we must 
					// inc bufpos.
					//
					bufpos++; // now points to X+2
					match_len = next_match_len;
					goto insert;
				}
				else
				{
					// match at X is better, so take it
					RECORD_MATCH(match_len, match_pos);

					//
					// Insert remainder of first match into search tree, minus the first
					// two locations, which were inserted by the FindMatch() calls.
					// 
					// For example, if match_len == 3, then we've inserted at X and X+1
					// already (and bufpos is now pointing at X+1), and now we need to insert 
					// only at X+2.
					//
					match_len--;
					bufpos++; // now bufpos points to X+2
					goto insert;
				}
			}
			else /* match_length >= good_match */
			{
				// in assertion: bufpos points to X+1, location X inserted already
					
				// first match is so good that we're not even going to check at X+1
				RECORD_MATCH(match_len, match_pos);

				// insert remainder of match at X into search tree
insert:
				if (context->bufpos_end - bufpos <= match_len)
				{
					bufpos += (match_len-1);
				}
				else
				{
					while (--match_len > 0)
					{
						t_match_pos ignore; // we're not interested in the search position

						INSERT_STRING(ignore,bufpos);
						bufpos++;
					}
				}
			}
		}

		// literal buffer or distance buffer filled up (or close to filling up)?
		if (context->outputting_block_num_literals >= STD_ENCODER_MAX_ITEMS-4 ||
            recording_bufptr >= end_recording_bufptr)
		{
			// yes, then we must output a block
			_ASSERT(context->outputting_block_num_literals <= STD_ENCODER_MAX_ITEMS);

			// flush our recording matches bit buffer
            FLUSH_RECORDING_BITBUF();

			StdEncoderOutputBlock(context);

			// did we output the whole block?
			if (context->state != STATE_NORMAL)
				break;

			// we did output the whole block, so reset literal encoding
        	recording_bufptr = encoder->recording_bufptr;
            recording_bitbuf = encoder->recording_bitbuf;
            recording_bitcount = encoder->recording_bitcount;
		}
	} /* end ... while (bufpos < bufpos_end) */

    _ASSERT(bufpos <= context->bufpos_end);

	// save recording state
	encoder->recording_bufptr = recording_bufptr;
    encoder->recording_bitbuf = recording_bitbuf;
    encoder->recording_bitcount = recording_bitcount;

	context->bufpos = bufpos;

	VERIFY_HASHES(bufpos);

    if (context->bufpos == 2*WINDOW_SIZE)
        StdEncoderMoveWindows(context);
}


static int StdEncoderFindMatch(
    const BYTE *        window,
    const USHORT *      prev,
    long                bufpos, 
    long                search, 
    unsigned int *      match_pos, 
    int                 cutoff,
    int                 nice_length
)
{
	const BYTE *	window_bufpos = &window[bufpos];
	long			earliest; // how far back we can look
	int				best_match = 0; // best match length found so far
	t_match_pos		l_match_pos;

	_ASSERT(bufpos >= 0 && bufpos < 2*WINDOW_SIZE);
	_ASSERT(search < bufpos);
	_ASSERT(STD_ENCODER_RECALCULATE_HASH(search) == STD_ENCODER_RECALCULATE_HASH(bufpos));

	earliest = bufpos - WINDOW_SIZE;
    _ASSERT(earliest >= 0);

	while (search > earliest)
	{
		_ASSERT(STD_ENCODER_RECALCULATE_HASH(search) == STD_ENCODER_RECALCULATE_HASH(bufpos));
        _ASSERT(search < bufpos);

		if (window_bufpos[best_match] == window[search + best_match])
		{
			int j;

			for (j = 0; j < MAX_MATCH; j++)
			{
				if (window_bufpos[j] != window[search+j])
					break;
			}
	
			if (j > best_match)
			{
				best_match	= j;
				l_match_pos	= search; // absolute position

				if (j > nice_length)
					break;
			}
		}

		if (--cutoff == 0)
			break;

		search = (long) prev[search & WINDOW_MASK];
	}

    // turn l_match_pos into relative position
	l_match_pos = bufpos - l_match_pos - 1; 

	if (best_match == 3 && l_match_pos >= STD_ENCODER_MATCH3_DIST_THRESHOLD)
		return 0;

	_ASSERT(best_match < MIN_MATCH || l_match_pos < WINDOW_SIZE);
    *match_pos = l_match_pos;

	return best_match;
}


static void StdEncoderMoveWindows(t_encoder_context *context)
{
	if (context->bufpos >= 2*WINDOW_SIZE)
	{
		int		i;
		t_search_node *lookup = context->std_encoder->lookup;
		t_search_node *prev = context->std_encoder->prev;
		BYTE *window = context->std_encoder->window;

		VERIFY_HASHES(2*WINDOW_SIZE);

		memcpy(&window[0], &window[context->bufpos - WINDOW_SIZE], WINDOW_SIZE);

		for (i = 0; i < STD_ENCODER_HASH_TABLE_SIZE; i++)
		{
			long val = ((long) lookup[i]) - WINDOW_SIZE;
	
			if (val <= 0)
				lookup[i] = (t_search_node) 0;
			else
				lookup[i] = (t_search_node) val;
		}

		for (i = 0; i < WINDOW_SIZE; i++)
		{
			long val = ((long) prev[i]) - WINDOW_SIZE;
	
			if (val <= 0)
				prev[i] = (t_search_node) 0;
			else
				prev[i] = (t_search_node) val;
		}

#ifdef FULL_DEBUG
		memset(&window[WINDOW_SIZE], 0, WINDOW_SIZE);
#endif

		VERIFY_HASHES(2*WINDOW_SIZE);

		
		context->bufpos = WINDOW_SIZE;
		context->bufpos_end = context->bufpos;
	}
}


//
// Zero the running frequency counts
//
// Also set freq[END_OF_BLOCK_CODE] = 1
//
void StdEncoderZeroFrequencyCounts(t_std_encoder *encoder)
{
    _ASSERT(encoder != NULL);

  	memset(encoder->literal_tree_freq, 0, sizeof(encoder->literal_tree_freq));
    memset(encoder->dist_tree_freq, 0, sizeof(encoder->dist_tree_freq));
    encoder->literal_tree_freq[END_OF_BLOCK_CODE] = 1;
}


void StdEncoderReset(t_encoder_context *context)
{
    t_std_encoder *encoder = context->std_encoder;

	_ASSERT(encoder != NULL);
	memset(encoder->lookup, 0, sizeof(encoder->lookup));

    context->window_size        = WINDOW_SIZE;
	context->bufpos		        = context->window_size;
	context->bufpos_end         = context->bufpos;

	encoder->recording_bitbuf	= 0;
	encoder->recording_bitcount = 0;
    encoder->recording_bufptr   = encoder->lit_dist_buffer;

	DeflateInitRecordingTables(
	    encoder->recording_literal_tree_len,
    	encoder->recording_literal_tree_code, 
	    encoder->recording_dist_tree_len,
    	encoder->recording_dist_tree_code
    );

    StdEncoderZeroFrequencyCounts(encoder);
}


BOOL StdEncoderInit(t_encoder_context *context)
{
	context->std_encoder = (t_std_encoder *) LocalAlloc(LMEM_FIXED, sizeof(t_std_encoder));

    if (context->std_encoder == NULL)
        return FALSE;

	StdEncoderReset(context);
	return TRUE;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -