⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 optenc.c

📁 windows gzip source code
💻 C
📖 第 1 页 / 共 2 页
字号:
/*
 * optenc.c
 *
 * Optimal encoder
 *
 * BUGBUG  Can improve compression by using the "redo" method of LZX; after the first 32K bytes,
 * reset the compressor but keep the tables, and start over.
 */
#include <string.h>
#include <stdio.h>
#include <crtdbg.h>
#include "deflate.h"


//
// If we get a match this good, take it automatically
//
// Note: FAST_DECISION_THRESHOLD can be set to anything; it's been set to BREAK_LENGTH
//       arbitrarily
//
#define FAST_DECISION_THRESHOLD BREAK_LENGTH


//
// After we have this many literals, create a tree to get updated statistical estimates
//
#define FIRST_TREE_UPDATE 1024


//
// Verifies that all of the hash pointers in the hash table are correct, and that
// the tree structure is valid.
//
#define DISABLE_VERIFY_HASHES

#ifdef _DEBUG
#ifndef DISABLE_VERIFY_HASHES
#define VERIFY_HASHES(bufpos) verifyHashes(context, bufpos)
#else
#define VERIFY_HASHES(bufpos) ;
#endif
#else
#define VERIFY_HASHES(bufpos) ;
#endif


#define CHECK_FLUSH_RECORDING_BUFFER() \
	if (recording_bitcount >= 16) \
	{ \
		*recording_bufptr++ = (BYTE) recording_bitbuf; \
		*recording_bufptr++ = (BYTE) (recording_bitbuf >> 8); \
		recording_bitbuf >>= 16; \
		recording_bitcount -= 16; \
	}


#define OUTPUT_RECORDING_DATA(count,data) \
	recording_bitbuf |= ((data) << recording_bitcount); \
	recording_bitcount += (count);


//
// Record unmatched symbol c
//
#define RECORD_CHAR(c) \
    context->outputting_block_num_literals++; \
    encoder->literal_tree_freq[c]++; \
	_ASSERT(encoder->recording_literal_tree_len[c] != 0); \
	OUTPUT_RECORDING_DATA(encoder->recording_literal_tree_len[c], encoder->recording_literal_tree_code[c]); \
	CHECK_FLUSH_RECORDING_BUFFER();


//
// Record a match with length match_len (>= MIN_MATCH) and displacement match_pos
//
#define RECORD_MATCH(match_len, match_pos) \
{ \
	int pos_slot = POS_SLOT(match_pos); \
	int len_slot = g_LengthLookup[match_len - MIN_MATCH]; \
	int item = (NUM_CHARS+1) + len_slot; \
	int extra_dist_bits = g_ExtraDistanceBits[pos_slot]; \
	int extra_len_bits = g_ExtraLengthBits[len_slot]; \
	_ASSERT(match_len >= MIN_MATCH && match_len <= MAX_MATCH); \
	_ASSERT(context->outputting_block_num_literals >= 0 && context->outputting_block_num_literals < OPT_ENCODER_MAX_ITEMS); \
	_ASSERT(encoder->recording_literal_tree_len[item] != 0); \
	_ASSERT(encoder->recording_dist_tree_len[pos_slot] != 0); \
    context->outputting_block_num_literals++; \
    encoder->literal_tree_freq[(NUM_CHARS + 1) + len_slot]++; \
    encoder->dist_tree_freq[pos_slot]++; \
	OUTPUT_RECORDING_DATA(encoder->recording_literal_tree_len[item], encoder->recording_literal_tree_code[item]); \
	CHECK_FLUSH_RECORDING_BUFFER(); \
	if (extra_len_bits > 0) \
	{ \
		OUTPUT_RECORDING_DATA(extra_len_bits, (match_len-MIN_MATCH) & ((1 << extra_len_bits)-1)); \
		CHECK_FLUSH_RECORDING_BUFFER(); \
	} \
	OUTPUT_RECORDING_DATA(encoder->recording_dist_tree_len[pos_slot], encoder->recording_dist_tree_code[pos_slot]); \
	CHECK_FLUSH_RECORDING_BUFFER(); \
	if (extra_dist_bits > 0) \
	{ \
		OUTPUT_RECORDING_DATA(extra_dist_bits, match_pos & ((1 << extra_dist_bits)-1)); \
		CHECK_FLUSH_RECORDING_BUFFER(); \
	} \
}


#define FLUSH_RECORDING_BITBUF() \
    *recording_bufptr++ = (BYTE) recording_bitbuf; \
	*recording_bufptr++ = (BYTE) (recording_bitbuf >> 8); 


static void calculateUpdatedEstimates(t_encoder_context *context);
static void OptimalEncoderMoveWindows(t_encoder_context *context);


static int match_est(t_optimal_encoder *encoder, int match_length, unsigned int match_pos)
{
	int dist_slot;
	int len_slot;

	// output match position
	len_slot = g_LengthLookup[match_length-MIN_MATCH];
	dist_slot = POS_SLOT(match_pos);

	return	encoder->literal_tree_len[NUM_CHARS + 1 + len_slot] +
			g_ExtraLengthBits[len_slot] +
			encoder->dist_tree_len[dist_slot] + 
			g_ExtraDistanceBits[dist_slot];
}


//
// Create initial estimations to output each element
//
static void initOptimalEstimates(t_encoder_context *context)
{
	int i, p;
    t_optimal_encoder *encoder = context->optimal_encoder;

	for (i = 0; i < NUM_CHARS; i++)
		encoder->literal_tree_len[i] = 8;

	p = NUM_CHARS+1;
	encoder->literal_tree_len[p] = 3;
	encoder->literal_tree_len[p+1] = 4;
	encoder->literal_tree_len[p+2] = 5;

	for (; p < MAX_LITERAL_TREE_ELEMENTS; p++)
		encoder->literal_tree_len[p] = 6;

	for (i = 0; i < MAX_DIST_TREE_ELEMENTS; i++)
		encoder->dist_tree_len[i] = (i/2)+1;
}


//
// Fix optimal estimates; if bitlen == 0 it doesn't mean that the element takes 0
// bits to output, it means that the element didn't occur, so come up with some estimate.
//
static void fixOptimalEstimates(t_encoder_context *context)
{
	int i;
    t_optimal_encoder *encoder = context->optimal_encoder;

	for (i = 0; i < NUM_CHARS; i++)
	{
		if (encoder->literal_tree_len[i] == 0)
			encoder->literal_tree_len[i] = 13;
	}

	for (i = NUM_CHARS+1; i < MAX_LITERAL_TREE_ELEMENTS; i++)
	{
		if (encoder->literal_tree_len[i] == 0)
			encoder->literal_tree_len[i] = 12;
	}

	for (i = 0; i < MAX_DIST_TREE_ELEMENTS; i++)
	{
		if (encoder->dist_tree_len[i] == 0)
			encoder->dist_tree_len[i] = 10;
	}
}


/*
 * Returns an estimation of how many bits it would take to output
 * a given character
 */
#define CHAR_EST(c) (numbits_t) (encoder->literal_tree_len[(c)])


/*
 * Returns an estimation of how many bits it would take to output
 * a given match.
 */
#define MATCH_EST(ml,mp,result) result = match_est(encoder, ml,mp);


//
// Returns whether the literal buffers are just about full
//
// Since we could output a large number of matches/chars in between these checks, we
// have to be careful.
//
// BUGBUG should check after each item output, so we don't have to be so careful; this
//        means we will utilise more of the recording buffer
//
#define LITERAL_BUFFERS_FULL() \
    (context->outputting_block_num_literals >= OPT_ENCODER_MAX_ITEMS-4-LOOK-MAX_MATCH || \
            recording_bufptr + 3*(MAX_MATCH + LOOK) >= end_recording_bufptr)


void OptimalEncoderDeflate(t_encoder_context *context)
{
	unsigned long	bufpos_end;
	unsigned long	MatchPos;
	unsigned long	i;
	int				EncMatchLength; /* must be a signed number */
	unsigned long	bufpos;
	unsigned long	recording_bitbuf;
	int				recording_bitcount;
	byte *			recording_bufptr;
    byte *          end_recording_bufptr;
    t_optimal_encoder *encoder = context->optimal_encoder;

    _ASSERT(encoder != NULL);
	_ASSERT(context->state == STATE_NORMAL);

	// reinsert the up to BREAK_LENGTH nodes we removed the last time we exit this function
	VERIFY_HASHES(context->bufpos);
	reinsertRemovedNodes(context);
	VERIFY_HASHES(context->bufpos);

	// restore literal/match bitmap variables
    end_recording_bufptr = &encoder->lit_dist_buffer[OPT_ENCODER_LIT_DIST_BUFFER_SIZE-8];
	recording_bufptr = encoder->recording_bufptr;
    recording_bitbuf = encoder->recording_bitbuf;
    recording_bitcount = encoder->recording_bitcount;

    bufpos			= context->bufpos;
	bufpos_end		= context->bufpos_end;

	/*
	 * While we haven't reached the end of the data
	 */
after_output_block:

	while (bufpos < bufpos_end)
	{
		// time to update our stats?
		if (context->outputting_block_num_literals >= encoder->next_tree_update)
		{
			encoder->next_tree_update += 1024;

            calculateUpdatedEstimates(context);
			fixOptimalEstimates(context);
		}

		// literal buffer or distance buffer filled up (or close to filling up)?
		if (LITERAL_BUFFERS_FULL())
			break;

		/*
		 * Search for matches of all different possible lengths, at bufpos
		 */
		EncMatchLength = optimal_find_match(context, bufpos); 

		if (EncMatchLength < MIN_MATCH)
		{

output_literal:
			/*
			 * No match longer than 1 character exists in the history 
			 * window, so output the character at bufpos as a symbol.
			 */
			RECORD_CHAR(encoder->window[bufpos]);
			bufpos++;
			continue;
		}

		/*
		 * Found a match.
		 *
		 * Make sure it cannot exceed the end of the buffer.
		 */
		if ((unsigned long) EncMatchLength + bufpos > bufpos_end)
		{
			EncMatchLength = bufpos_end - bufpos;    

			/*
			 * Oops, not enough for even a small match, so we 
			 * have to output a literal
			 */
			if (EncMatchLength < MIN_MATCH)
				goto output_literal;
		}

		if (EncMatchLength < FAST_DECISION_THRESHOLD)
		{
			/*
			 *  A match has been found that is between MIN_MATCH and 
			 *  FAST_DECISION_THRESHOLD bytes in length.  The following 
			 *  algorithm is the optimal encoder that will determine the 
			 *  most efficient order of matches and unmatched characters 
			 *  over a span area defined by LOOK.  
			 *
			 *  The code is essentially a shortest path determination 
			 *  algorithm.  A stream of data can be encoded in a vast number 
			 *  of different ways depending on the match lengths and offsets
			 *  chosen.  The key to good compression ratios is to chose the 
			 *  least expensive path.
			 */
			unsigned long	span;
			unsigned long	epos, bpos, NextPrevPos, MatchPos;
			t_decision_node *decision_node_ptr;
			t_decision_node *context_decision_node = encoder->decision_node;
			t_match_pos *matchpos_table = encoder->matchpos_table;
			long		iterations;

			/*
			 * Points to the end of the area covered by this match; the span
			 * will continually be extended whenever we find more matches
			 * later on.  It will stop being extended when we reach a spot
			 * where there are no matches, which is when we decide which
			 * path to take to output the matches.
			 */
			span = bufpos + EncMatchLength;

			/*
			 * The furthest position into which we will do our lookahead parsing 
			 */
			epos = bufpos + LOOK;

			/*
			 * Temporary bufpos variable
			 */
			bpos = bufpos;

			/* 
			 * Calculate the path to the next character if we output
			 * an unmatched symbol.
			 */

			/* bits required to get here */
			context_decision_node[1].numbits = CHAR_EST(encoder->window[bufpos]);
				
			/* where we came from */
			context_decision_node[1].path    = bufpos;

			/* bits required to get here */
			context_decision_node[2].numbits = CHAR_EST(encoder->window[bufpos+1]) + context_decision_node[1].numbits;
				
			/* where we came from */
			context_decision_node[2].path    = bufpos+1;

			/*
			 * For the match found, estimate the cost of encoding the match
			 * for each possible match length, shortest offset combination.
			 *
			 * The cost, path and offset is stored at bufpos + Length.  
			 */
			for (i = MIN_MATCH; i <= (unsigned long) EncMatchLength; i++)
			{
				/*
				 * Get estimation of match cost given match length = i,
				 * match position = matchpos_table[i], and store
				 * the result in numbits[i]
				 */
				MATCH_EST(i, matchpos_table[i], context_decision_node[i].numbits);

				/*
				 * Where we came from 
				 */
				context_decision_node[i].path = bufpos;

				/*
				 * Associated match position with this path
				 */
				context_decision_node[i].link = matchpos_table[i];
			}

			/*
			 * Set bit counter to zero at the start 
			 */
			context_decision_node[0].numbits = 0;

			decision_node_ptr = &context_decision_node[-(long) bpos];

			while (1)
			{
				numbits_t est, cum_numbits;

				bufpos++;
	
				/* 
				 *  Set the proper repeated offset locations depending on the
				 *  shortest path to the location prior to searching for a 
				 *  match.
				 */

				/*
				 * The following is one of the two possible break points from
				 * the inner encoding loop.  This break will exit the loop if 
				 * a point is reached that no match can incorporate; i.e. a
				 * character that does not match back to anything is a point 
				 * where all possible paths will converge and the longest one
				 * can be chosen.
				 */
				if (span == bufpos)
					break;
					
				/*
				 * Search for matches at bufpos 
				 */
				EncMatchLength = optimal_find_match(context, bufpos); 

				/* 
				 * Make sure that the match does not exceed the stop point
				 */
				if ((unsigned long) EncMatchLength + bufpos > bufpos_end)
				{
					EncMatchLength = bufpos_end - bufpos; 
					
					if (EncMatchLength < MIN_MATCH)
						EncMatchLength = 0;
				}

				/*
				 * If the match is very long or it exceeds epos (either 
				 * surpassing the LOOK area, or exceeding past the end of the
				 * input buffer), then break the loop and output the path.
				 */
				if (EncMatchLength > FAST_DECISION_THRESHOLD || 
					bufpos + (unsigned long) EncMatchLength >= epos)
				{
					MatchPos = matchpos_table[EncMatchLength];

					decision_node_ptr[bufpos+EncMatchLength].link = MatchPos;
					decision_node_ptr[bufpos+EncMatchLength].path = bufpos;

					/*
					 * Quickly insert data into the search tree without
					 * returning match positions/lengths
					 */
#ifndef INSERT_NEAR_LONG_MATCHES

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -