📄 lz77.c
字号:
/* Name : lz77.c * * Notes: This file contains yet another implementation of the * famous LZ77 compression algorithm. This implementation, * however, contains several features not found in most * others. * * $Log: lz77.c $ * Revision 1.2 1996/02/03 21:03:39 idr * Modified the way that the "bonus" literals are written out for an * improved lazy match. They are now written out as a match, if possible. */#include "stdtypes.h"#include <stdlib.h>#include <string.h>#include <stdio.h>#include <assert.h>#include "lz77.h"#define MAX_OFFSET 4096#define MIN_LENGTH 3#define ADJ (MIN_LENGTH-1)#define MAX_LENGTH (15+ADJ)#define MAX_ELENGTH (MAX_LENGTH+255)#define MAX_LAZY (MIN_LENGTH-1)#define B_SIZE (MAX_OFFSET + MAX_ELENGTH)#define BUF_SIZE(m) ((m)->m_MaxLength + (m)->m_MaxOffset)#define IsBigEnough( m,d ) ((m).m_Length >= (d)->m_MinLength)/****************************************************************************/struct Match{ UWORD m_Length; LONG m_Offset;};struct LzMode{ ULONG m_MaxLength; /* Maximum match length. */ ULONG m_MinLength; /* Minimum match size for compression. */ ULONG m_MaxOffset; /* Maximum offset back in the buffer. */ ULONG m_MinLazy; /* If a match is less, try again. */};/****************************************************************************/UBYTE * InitCompress( struct LZ77Info * info, struct LzMode * mode );UBYTE * WriteMatch( struct LZ77Info *, struct Match *, UBYTE *, const UBYTE * );UBYTE * WriteLiteral( struct LZ77Info *, int, UBYTE * );struct Match FindMatch( struct LZ77Info *, const UBYTE *, UWORD, struct LzMode * );void UpdateMatchTables( struct LZ77Info *, const UBYTE *, UWORD, struct LzMode * );struct Match MatchPeek( struct LZ77Info *, struct LzMode *, const UBYTE *, ULONG length, int distance );extern APTR AllocHashTable( ULONG numHashNodes );extern void FreeHashTable( APTR table );extern void UpdateHashTable( APTR table, UBYTE * stream, ULONG numChars );extern struct Match FindHashMatch( APTR table, const UBYTE * stream, ULONG maxLen, const UBYTE * buffer );/****************************************************************************/struct LzMode crshModes[] ={ { MAX_LENGTH, 3, 4096, 0 }, /* mode 0 */ { MAX_ELENGTH, 3, 4096, 0 }, /* mode 1 */ { MAX_ELENGTH, 3, 4096, 4 }, /* mode 2 */ { MAX_ELENGTH, 3, 4096, 17 }, /* mode 3 */ { MAX_ELENGTH, 3, 4096, 17+254 } /* mode 4 */};/****************************************************************************//* Name : CompressLZ77() * * Notes: This is the main LZ77 compression routine. * */#define IsBigEnough( m,d ) ((m).m_Length >= (d)->m_MinLength)#define IsBetter( m, i1, i2 ) ( (m[i1].m_Length-i1) > (m[i2].m_Length-i2) )ULONG CompressLZ77( struct LZ77Info * info ){ struct Match match; UBYTE * input; UBYTE * output; LONG length; struct LzMode * mode; ULONG maxLength; if ( info->Mode < 0 ) info->Mode = 0; if ( info->Mode > 4 ) info->Mode = 4; mode = & crshModes[ info->Mode ]; output = InitCompress( info, mode ); input = info->input; length = info->length; while ( length > 0 ) { maxLength = ( length > mode->m_MaxLength ) ? mode->m_MaxLength : length; match = FindMatch( info, input, maxLength, mode ); if ( IsBigEnough( match, mode ) ) { /* If the match length was below the minimum "lazy match" * threshold, then we'll try to find a better match. */ if ( match.m_Length < mode->m_MinLazy ) { struct Match matchArray[ MAX_LAZY ]; int lcv; int bestMatch; /* The first step is to create an array of matches for * zero to MAX_LAZY literals. */ matchArray[ 0 ] = match; for ( lcv = 1 ; lcv < MAX_LAZY ; lcv++ ) { matchArray[ lcv ] = MatchPeek( info, mode, input, length, lcv ); } /* Now we need to decide which match will yield the best * compression ratio. */ bestMatch = 0; for ( lcv = 1 ; lcv < MAX_LAZY ; lcv++ ) { if ( IsBetter( matchArray, lcv, bestMatch ) ) { bestMatch = lcv; } } /* At this point we know which match will give us the best * compression, so we need to output the correct number of * "bonus" literal characters to get to the point where the * match will begin in the stream. * * However, if the number of literals to be written out is * larger than the minimum match length, we can just write * it out as a sub-match of the original match. */ if ( bestMatch >= mode->m_MinLength ) { ULONG temp; temp = match.m_Length; match.m_Length = bestMatch; output = WriteMatch( info, &match, output, input ); UpdateMatchTables( info, input + mode->m_MaxLength, match.m_Length, mode ); UpdateHashTable( info->hashTable, input, match.m_Length ); } else { for ( lcv = 0 ; lcv < bestMatch ; lcv++ ) { output = WriteLiteral( info, *input, output ); UpdateMatchTables( info, input + mode->m_MaxLength, 1L, mode ); UpdateHashTable( info->hashTable, input, 1L ); input++; length--; } } /* We will refind the match in the stream just to make sure * that the offset values that we obtained are correct. This * is needed because MatchPeek() does not modify the hash * table, and the previous look does. */ match = FindMatch( info, input, ( length > mode->m_MaxLength ) ? mode->m_MaxLength : length, mode ); } output = WriteMatch( info, &match, output, input ); UpdateMatchTables( info, input + mode->m_MaxLength, match.m_Length, mode ); UpdateHashTable( info->hashTable, input, match.m_Length ); length -= match.m_Length; input += match.m_Length; } else { /* If the match wasn't long enough to yield compression, then * we'll just out put a literal character into the data * stream. */ output = WriteLiteral( info, *input, output ); UpdateMatchTables( info, input + mode->m_MaxLength, 1, mode ); UpdateHashTable( info->hashTable, input, 1 ); length--; input++; } } return( output - info->output );}/*****************************************************************************//* Name : AdvanceTagBit() * * Notes: This function is used to update the current tag bit value to the * next value. If the new value would be 7, then the bit is in the * second tag word byte, so the tagPos pointer is incremented. If * the value would be -1, then the current tag word is full and a * new one needs to be set up. */static UBYTE * AdvanceTagBit( struct LZ77Info * info, UBYTE * output ){ info->nextBit--; /* If the nextBit is now equal to 7, then we have just moved from the * high order byte of the tag word to the low order byte. Since * we are forced to work byte-wise (for speed and compatability with * CPUs like the 68000), we simply advance the tagPos pointer. */ if ( info->nextBit == 7 ) { info->tagPos++; } else if ( info->nextBit == -1 ) { /* Initialize the next tag value. It is important that the output * tags be all set for literal bytes. This way if a decompressor * compressor is written to only test for completion between tags, * a maximum of 16 extra bytes will be written out. */ info->nextBit = 15; info->tagPos = output; info->tagPos[0] = 0; info->tagPos[1] = 0; /* Advance the output pointer to skip over the new * tag values. */ output += 2; } return( output );}/*****************************************************************************//* Name : UpdateMatchTables() * * Notes: This function copies ``length'' bytes from the specified input * stream into the circular buffer. */void UpdateMatchTables( struct LZ77Info * info, const UBYTE * input, UWORD length, struct LzMode * mode ){ UBYTE temp; UBYTE * buf; /* First, calculate a pointer to the current position in the * circular buffer. */ buf = &(info->buffer[ info->bufferOffset ]); /* Calculate the offset into the buffer for the next call. This * routine is somewhat on the tricky side because it needs to * SUBTRACT the buffer size if the pointer spills over. */ info->bufferOffset += length; if ( info->bufferOffset >= BUF_SIZE(mode) ) { info->bufferOffset -= BUF_SIZE(mode); } info->maxOffset -= length; if ( info->maxOffset < -mode->m_MaxOffset ) info->maxOffset = -mode->m_MaxOffset; /* Now the new data can be copied into the data buffer. */ do { temp = *(input++); buf[ BUF_SIZE(mode) ] = temp; buf[ 0 ] = temp; buf++; } while( --length ); return;}/*****************************************************************************//* Name : FindMatch() * * Notes: This is the routine that does all of the REAL work in a * typical LZ77 compression routine. This is the routine that * searches the look-back buffer in order to find a string that * matches the current input string. */struct Match FindMatch( struct LZ77Info * info, const UBYTE * string, UWORD maxLen, struct LzMode * mode ){ const UBYTE * buffer; buffer = &(info->buffer[ info->bufferOffset + BUF_SIZE( mode ) - mode->m_MaxLength ]); return( FindHashMatch( info->hashTable, string, maxLen, buffer ) );}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -