📄 optenc.c

📁 windows gzip source code
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
					if (MatchPos == 3 && EncMatchLength > 16)
					{
						/*
						 * If we found a match 1 character away and it's
						 * length 16 or more, it's probably a string of
						 * zeroes, so don't insert that into the search
						 * engine, since doing so can slow things down
						 * significantly!
						 */
						optimal_insert(
							context,
                               bufpos + 1,
                               bufpos - WINDOW_SIZE + 2
                           );
					}
					else
#endif
					{
						for (i = 1; i < (unsigned long) EncMatchLength; i++)
							optimal_insert(
								context,
                                   bufpos + i,
                                   bufpos + i - WINDOW_SIZE + 4
                                );
					}

					bufpos += EncMatchLength;
					break;
				}


				/*
				 * The following code will extend the area spanned by the 
				 * set of matches if the current match surpasses the end of
				 * the span.  A match of length two that is far is not 
				 * accepted, since it would normally be encoded as characters,
				 * thus allowing the paths to converge.
				 */
				if (EncMatchLength >= 3)
				{
					if (span < (unsigned long) (bufpos + EncMatchLength))
					{
						long end;
						long i;

						end = min(bufpos+EncMatchLength-bpos, LOOK-1);

						/*
						 * These new positions are undefined for now, since we haven't
						 * gone there yet, so put in the costliest value
						 */
						for (i = span-bpos+1; i <= end; i++)
							context_decision_node[i].numbits = (numbits_t) -1;

						span = bufpos + EncMatchLength;
					}
				}

				/*
				 *  The following code will iterate through all combinations
				 *  of match lengths for the current match.  It will estimate
				 *  the cost of the path from the beginning of LOOK to 
				 *  bufpos and to every locations spanned by the current 
				 *  match.  If the path through bufpos with the found matches
				 *  is estimated to take fewer number of bits to encode than
				 *  the previously found match, then the path to the location
				 *  is altered.
				 *
				 *  The code relies on accurate estimation of the cost of 
				 *  encoding a character or a match.  Furthermore, it requires
				 *  a search engine that will store the smallest match offset
				 *  of each possible match length.
				 *
				 *  A match of length one is simply treated as an unmatched 
				 *  character.
				 */

				/* 
				 *  Get the estimated number of bits required to encode the 
				 *  path leading up to bufpos.
				 */
				cum_numbits = decision_node_ptr[bufpos].numbits;

				/*
				 *  Calculate the estimated cost of outputting the path through
				 *  bufpos and outputting the next character as an unmatched byte
				 */
				est = cum_numbits + CHAR_EST(encoder->window[bufpos]);

				/*
				 *  Check if it is more efficient to encode the next character
				 *  as an unmatched character rather than the previously found 
				 *  match.  If so, then update the cheapest path to bufpos + 1.
				 *
				 *  What happens if est == numbits[bufpos-bpos+1]; i.e. it
				 *  works out as well to output a character as to output a
				 *  match?  It's a tough call; however, we will push the
				 *  encoder to use matches where possible.
				 */
				if (est < decision_node_ptr[bufpos+1].numbits)
				{
					decision_node_ptr[bufpos+1].numbits = est;
					decision_node_ptr[bufpos+1].path    = bufpos;
				}

				/*
				 *	Now, iterate through the remaining match lengths and 
				 *  compare the new path to the existing.  Change the path
				 *  if it is found to be more cost effective to go through
				 *  bufpos.
				 */
				for (i = MIN_MATCH; i <= (unsigned long) EncMatchLength; i++)
				{
					MATCH_EST(i, matchpos_table[i], est);
					est += cum_numbits;

					/*
					 * If est == numbits[bufpos+i] we want to leave things
					 * alone, since this will tend to force the matches
					 * to be smaller in size, which is beneficial for most
					 * data.
					 */
					if (est < decision_node_ptr[bufpos+i].numbits)
					{
						decision_node_ptr[bufpos+i].numbits	= est;
						decision_node_ptr[bufpos+i].path	= bufpos;
						decision_node_ptr[bufpos+i].link	= matchpos_table[i];
					}
				}
			} /* continue to loop through span of matches */

			/*
			 *  Here bufpos == span, ie. a non-matchable character found.  The
			 *  following code will output the path properly.
			 */

			/*
			 *  Unfortunately the path is stored in reverse; how to get from
			 *  where we are now, to get back to where it all started.
			 *
			 *  Traverse the path back to the original starting position
			 *  of the LOOK span.  Invert the path pointers in order to be
			 *  able to traverse back to the current position from the start.
			 */

			/*
			 * Count the number of iterations we did, so when we go forwards
			 * we'll do the same amount
			 */
			iterations = 0;

			NextPrevPos = decision_node_ptr[bufpos].path;

   			do
			{
				unsigned long	PrevPos;

      			PrevPos = NextPrevPos;

   				NextPrevPos = decision_node_ptr[PrevPos].path;
   				decision_node_ptr[PrevPos].path = bufpos;

   				bufpos = PrevPos;
   				iterations++;
			} while (bufpos != bpos);

			/*
			 * Traverse from the beginning of the LOOK span to the end of 
			 * the span along the stored path, outputting matches and 
			 * characters appropriately.
			 */
			do
			{
   				if (decision_node_ptr[bufpos].path > bufpos+1)
   				{
					/*
					 * Path skips over more than 1 character; therefore it's a match
					 */
					RECORD_MATCH(
						decision_node_ptr[bufpos].path - bufpos,
						decision_node_ptr[ decision_node_ptr[bufpos].path ].link
					);

					bufpos = decision_node_ptr[bufpos].path;
				}
   				else
   				{
					/*
					 * Path goes to the next character; therefore it's a symbol
					 */
					RECORD_CHAR(encoder->window[bufpos]);
					bufpos++;
				}
			} while (--iterations != 0);
		}
		else  /* EncMatchLength >= FAST_DECISION_THRESHOLD */
		{
			/*
			 *  This code reflects a speed optimization that will always take
			 *  a match of length >= FAST_DECISION_THRESHOLD characters.
			 */

			/*
			 * The position associated with the match we found
			 */
			MatchPos = encoder->matchpos_table[EncMatchLength];

			/*
			 * Quickly insert match substrings into search tree
			 * (don't look for new matches; just insert the strings)
			 */
#ifndef INSERT_NEAR_LONG_MATCHES
			if (MatchPos == 3 && EncMatchLength > 16)
			{
				optimal_insert(
					context,
                       bufpos + 1,
                       bufpos - WINDOW_SIZE + 2 
                   );
			}
			else
#endif
			{
				for (i = 1; i < (unsigned long) EncMatchLength; i++)
					optimal_insert(
						context,
                           bufpos + i,
                           bufpos + i - WINDOW_SIZE + 1
                        );
			}

			/*
			 * Advance our position in the window
			 */
			bufpos += EncMatchLength;

			/*
			 * Output the match
			 */
			RECORD_MATCH(EncMatchLength, MatchPos);

		}  /* EncMatchLength >= FAST_DECISION_THRESHOLD */
	} /* end while ... bufpos <= bufpos_end */

	if (LITERAL_BUFFERS_FULL())
	{
		_ASSERT(context->outputting_block_num_literals <= OPT_ENCODER_MAX_ITEMS);

		// flush our recording matches bit buffer
        FLUSH_RECORDING_BITBUF();

        // BUGBUG Should check for failure result.  Luckily the only failure condition is
        // that the tree didn't fit into 500 bytes, which is basically impossible anyway.
		(void) OptimalEncoderOutputBlock(context);

		// fix estimates for optimal parser
		fixOptimalEstimates(context);

		encoder->next_tree_update = FIRST_TREE_UPDATE;

		// did we output the whole block?
		if (context->state == STATE_NORMAL)
		{
			// reset literal recording
        	recording_bufptr = encoder->recording_bufptr;
            recording_bitbuf = encoder->recording_bitbuf;
            recording_bitcount = encoder->recording_bitcount;
			goto after_output_block;
		}
	}

	// save recording state
	encoder->recording_bufptr = recording_bufptr;
    encoder->recording_bitbuf = recording_bitbuf;
    encoder->recording_bitcount = recording_bitcount;

    context->bufpos	= bufpos;

	VERIFY_HASHES(bufpos);
	removeNodes(context);
	VERIFY_HASHES(bufpos);

    if (context->bufpos == 2*WINDOW_SIZE)
        OptimalEncoderMoveWindows(context);
}


//
// Move the search windows when bufpos reaches 2*WINDOW_SIZE
//
static void OptimalEncoderMoveWindows(t_encoder_context *context)
{
	long	delta;
	int		i;
    t_optimal_encoder *encoder = context->optimal_encoder;
	t_search_node *search_tree_root = encoder->search_tree_root;
	t_search_node *left = encoder->search_left;
	t_search_node *right = encoder->search_right;

   	_ASSERT(context->bufpos == 2*WINDOW_SIZE);
 
	VERIFY_HASHES(context->bufpos);

	delta = context->bufpos - WINDOW_SIZE;

	memcpy(&encoder->window[0], &encoder->window[context->bufpos - WINDOW_SIZE], WINDOW_SIZE);

	for (i = 0; i < NUM_DIRECT_LOOKUP_TABLE_ELEMENTS; i++)
	{
		long val = ((long) search_tree_root[i]) - delta;
	
		if (val <= 0)
			search_tree_root[i] = (t_search_node) 0;
		else
			search_tree_root[i] = (t_search_node) val;

		_ASSERT(search_tree_root[i] < WINDOW_SIZE);
	}

	memcpy(&left[0], &left[context->bufpos - WINDOW_SIZE], sizeof(t_search_node)*WINDOW_SIZE);
	memcpy(&right[0], &right[context->bufpos - WINDOW_SIZE], sizeof(t_search_node)*WINDOW_SIZE);

	for (i = 0; i < WINDOW_SIZE; i++)
	{
		long val;
			
		// left
		val = ((long) left[i]) - delta;

		if (val <= 0)
			left[i] = (t_search_node) 0;
		else
			left[i] = (t_search_node) val;

		// right
		val = ((long) right[i]) - delta;

		if (val <= 0)
			right[i] = (t_search_node) 0;
		else
			right[i] = (t_search_node) val;
	}

#ifdef _DEBUG
	// force any search table references to be invalid
	memset(&encoder->window[WINDOW_SIZE], 0, WINDOW_SIZE);
#endif

	context->bufpos = WINDOW_SIZE;
	context->bufpos_end = context->bufpos;

	VERIFY_HASHES(context->bufpos);
}


//
// Calculate the frequencies of all literal and distance codes, for tree-making, then
// make the trees
//
static void calculateUpdatedEstimates(t_encoder_context *context)
{
    USHORT code[MAX_LITERAL_TREE_ELEMENTS];
    t_optimal_encoder *encoder = context->optimal_encoder;

	// create the trees, we're interested only in len[], not code[]
    // BUGBUG perf optimisation: make makeTree() not call MakeCode() in this situation
	makeTree(
		MAX_LITERAL_TREE_ELEMENTS, 
		15, 
		encoder->literal_tree_freq, 
		code,
		encoder->literal_tree_len
	);

	makeTree(
		MAX_DIST_TREE_ELEMENTS, 
		15, 
		encoder->dist_tree_freq, 
		code,
		encoder->dist_tree_len
	);
}


//
// Zero the running frequency counts
//
// Also set freq[END_OF_BLOCK_CODE] = 1
//
void OptimalEncoderZeroFrequencyCounts(t_optimal_encoder *encoder)
{
    _ASSERT(encoder != NULL);

    memset(encoder->literal_tree_freq, 0, sizeof(encoder->literal_tree_freq));
    memset(encoder->dist_tree_freq, 0, sizeof(encoder->dist_tree_freq));
    encoder->literal_tree_freq[END_OF_BLOCK_CODE] = 1;
}


void OptimalEncoderReset(t_encoder_context *context)
{
    t_optimal_encoder *encoder = context->optimal_encoder;

    _ASSERT(encoder != NULL);

	encoder->recording_bitbuf		= 0;
	encoder->recording_bitcount     = 0;
    encoder->recording_bufptr       = encoder->lit_dist_buffer;

    context->window_size            = WINDOW_SIZE;
	context->bufpos		            = context->window_size;
	context->bufpos_end             = context->bufpos;

	DeflateInitRecordingTables(
	    encoder->recording_literal_tree_len,
    	encoder->recording_literal_tree_code, 
	    encoder->recording_dist_tree_len,
    	encoder->recording_dist_tree_code
    );

	// clear the search table
	memset(
		encoder->search_tree_root,
		0, 
		sizeof(encoder->search_tree_root)
	);

	encoder->next_tree_update = FIRST_TREE_UPDATE;

	initOptimalEstimates(context);
    OptimalEncoderZeroFrequencyCounts(encoder);
}


BOOL OptimalEncoderInit(t_encoder_context *context)
{
	context->optimal_encoder = (t_optimal_encoder *) LocalAlloc(LMEM_FIXED, sizeof(t_optimal_encoder));

    if (context->optimal_encoder == NULL)
        return FALSE;

    OptimalEncoderReset(context);
	return TRUE;
}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -