📄 hippack.cpp
字号:
#include "hippack.h"
#include <math.h>
#include <stdio.h>
#include <crtdbg.h>
#include "ByteWriter.h"
#include "ByteReader.h"
#include "LiteralHuffTree.h"
#include "LZ77Pack.h"
//#include "MoveToFront.h"
#include "CRC32.h"
#include "HashSearch.h"
//#include "Hash.h"
#include "LinkerCmds.h"
#include "WarnOff.h"
// #pragma optimize( "", off )
//
// constants/macros
//
#define BUFF ((PBYTE)m_pBuff)
HipPack::HipPack( void* pInBuff, DWORD cbInBuff )
{
//
// member variable initializations
//
this->m_pBuff = pInBuff;
this->m_cbBuff = cbInBuff;
m_pOut = NULL;
m_pbyValidHEntries = NULL;
m_Callback = NULL;
}
HipPack::~HipPack()
{
Dispose();
}
//
// Purpose:
// Perform all class-internal cleanups
//
void HipPack::Dispose()
{
FreeOutBuff();
return;
}
//
// Purpose:
// Deallocate output buffer's member if allocated
//
void HipPack::FreeOutBuff()
{
if ( m_pOut ) // sth allocated ?
{
free( m_pOut );
m_pOut = NULL;
}
return;
}
//
// Purpose:
// Compress the input data block into an output data block
// and returns its pointer and size
//
// Arguments:
// byLevel - 1..9 (byLevel + 11 == max bit count of ref distance)
//
BOOL HipPack::PerformCompression( BYTE byLevel, OUT void** ppCompBlock, OUT DWORD* pdwcbBlock )
{
BYTE* p, *pAlloc, *pWin;
DWORD dwcbMax, cbHuffTree, cbHdr, cbDone, cbHashed, cbDoneHere, dwcItem;
DWORD cbMaxDistance, cbitsPtr, cbMin2Match, cShortRefs, cNormalRefs, cbitBase;
PByteWriter writer = NULL;
PHIP_HEADER pHdr;
PLiteralHuffTree phuff = NULL;
// PLiteralHuffTree pLenHuff = NULL;
LP_MATCH match, match2;
PLHT_ITEM plhtItem;
BOOL ret, bRet;
BYTE byCBIteration = 0;
HashSearch hs;
bRet = FALSE;
m_bStopRun = FALSE;
//
// allocate enough memory
//
FreeOutBuff();
dwcbMax = (DWORD)(1.3 * m_cbBuff) + 0x1000; // live save
pAlloc = p = (PBYTE)malloc( dwcbMax );
if ( !p )
return FALSE; // ERR
//
// calculate CRC32
//
DWORD dwCRC = CRC32::Generate( BUFF, m_cbBuff );
printf( "-> CRC32: 0x%08lX\n", dwCRC );
//
// semi-initialize HiP header
//
cbHdr = sizeof( HIP_HEADER );
pHdr = (PHIP_HEADER)p;
pHdr->Signature = HIP_HDR_SIGNATURE;
pHdr->cbUncompressed = m_cbBuff;
pHdr->CRC = dwCRC;
pHdr->MajorVersion = HIP_MAJOR_VERSION;
pHdr->MinorVersion = HIP_MINOR_VERSION;
p += cbHdr;
//
// look for matches, literal probabilities and log them
//
// allocate memory for some hash tables
// printf( "-> Searching for string redundances...\n" );
phuff = new LiteralHuffTree();
m_pbyValidHEntries = new BYTE[ m_cbBuff ];
m_pHistory = new HIP_MATCH_HISTORY[ m_cbBuff ];
// cbDone = 0;
// pLenHuff = new LiteralHuffTree();
memset( m_pbyValidHEntries, 0, m_cbBuff );
hs.Assign( BUFF, m_cbBuff );
cbMin2Match = HP_MIN_MATCH_LEN;
cbitsPtr = 11 + byLevel;
cbMaxDistance = (1 << cbitsPtr) - 1 + cbMin2Match;
// cbWindow = (1 << HP_MAX_MATCH_SIZE_BITS) - 1 + cbMin2Match;
// enter compression loop
cbDone = cbHashed = 0;
while( cbDone != m_cbBuff )
{
if ( m_bStopRun )
goto TidyUp;
cbDoneHere = 0; // intermediate value being added later to cbDone
// check how many bytes we could compress via a RLE item
// DWORD cbRLEMax = GetByteChainLength( &BUFF[cbDone], m_cbBuff - cbDone );
// cbRLEMax = min( cbRLEMax, 255 );
// get window ptr/size for current input data position
pWin = &BUFF[ (DWORD)-cbMaxDistance + cbDone ];
if ( (DWORD)pWin < (DWORD)m_pBuff )
pWin = BUFF;
// look for match at P
if ( hs.FindLongestMatch(
pWin,
&((PBYTE)m_pBuff)[cbDone], m_cbBuff - cbDone,
cbMin2Match,
FALSE,
match.dwDistance, match.wSize) )
{
// get window ptr/size for current input data position
pWin = &BUFF[ (DWORD)-cbMaxDistance + cbDone + 1 ];
if ( (DWORD)pWin < (DWORD)m_pBuff )
pWin = BUFF;
// look for match at P+1 AKA lazy evolution mechanism
if ( hs.FindLongestMatch(
pWin,
&((PBYTE)m_pBuff)[cbDone+1], m_cbBuff - cbDone - 1,
cbMin2Match,
FALSE,
match2.dwDistance, match2.wSize) && match2.wSize > match.wSize )
{
//
// take advantage of lazy's result
//
// process p
m_pbyValidHEntries[cbDone] = 0;
// phuff->m_dwOccurr[ BUFF[cbDone] ]++;
cbDoneHere += 1;
// process p+1
// match2.wSize = min( match2.wSize, 258 );
m_pbyValidHEntries[cbDone+1] = 1;
m_pHistory[cbDone+1].dwRelOff = match2.dwDistance;
m_pHistory[cbDone+1].wSize = match2.wSize;
cbDoneHere += match2.wSize;
}
else
{
//
// take the match at P
//
// match.wSize = min( match.wSize, 258 );
m_pbyValidHEntries[cbDone] = 1;
m_pHistory[cbDone].dwRelOff = match.dwDistance;
m_pHistory[cbDone].wSize = match.wSize;
cbDoneHere += match.wSize;
}
}
else
{
//
// log a literal encoding here
//
m_pbyValidHEntries[cbDone] = 0;
// phuff->m_dwOccurr[ BUFF[cbDone] ]++;
cbDoneHere++;
}
//
// is RLE encoding here more optimal ?
//
/*
if ( cbRLEMax > 3 && cbRLEMax-1 > cbDoneHere )
{
// decrement occurrence counter if it should have been
// encoded as literal
if ( m_pbyValidHEntries[cbDone] == 0 )
phuff->m_dwOccurr[ BUFF[cbDone] ]--;
// first encode a literal
m_pbyValidHEntries[cbDone] = 0;
// secondly encode a short ref
m_pHistory[cbDone+1].dwRelOff = 1;
m_pHistory[cbDone+1].wSize = cbRLEMax - 1;
m_pbyValidHEntries[cbDone+1] = 1;
cbDone += cbRLEMax;
}
else*/
{
/*
if ( m_pbyValidHEntries[cbDone] == 1 )
if ( m_pHistory[cbDone].dwRelOff >= (1 << HP_SHORTREF_PTR_BIT_COUNT)+1 ||
m_pHistory[cbDone].wSize > 5 )
pLenHuff->m_dwOccurr[ m_pHistory[cbDone].wSize ]++;
*/
cbDone += cbDoneHere;
}
//
// process the callback if installed
//
if ( m_Callback )
{
byCBIteration++;
if ( byCBIteration == HIP_ITERATION_FOR_CALLBACK
|| cbDone == m_cbBuff) // end reached ?
{
// call user defined procedure
byCBIteration = 0;
if ( !m_Callback( cbDone, m_cbBuff ) )
goto TidyUp; // handled as ERR
}
}
//
// add processed bytes to the hash table
//
if ( cbHashed != cbDone )
{
// hs.UpdateNodesForRange( &BUFF[cbHashed], cbDone - cbHashed, m_cbBuff - cbHashed );
hs.UpdateNodesForRange( &BUFF[cbHashed], cbDone - cbHashed );
cbHashed = cbDone;
}
}
hs.Dispose(); // cleanup
//
// generate + emit literal Huffman table
//
// phuff->GenerateWithOccurrences();
// cbHuffTree = phuff->Emit( p, dwcbMax - cbHdr );
phuff->GenerateAdaptiveModel();
cbHuffTree = 0;
p += cbHuffTree;
//
// generate + emit length Huffman table
//
/*
pLenHuff->GenerateWithOccurrences();
DWORD cbLenHuff = pLenHuff->Emit( p, dwcbMax - cbHdr - cbHuffTree );
cbHuffTree += cbLenHuff;
p += cbLenHuff;
*/
#ifdef VERBOSE_PLEASE
printf( "[ Size of Huffman table: %u byte(s) ]\n", cbHuffTree );
#endif
//
// compress, now
//
writer = new ByteWriter( p, dwcbMax - cbHdr - cbHuffTree );
cbDone = 0;
// process first uncompressible literals
printf( "-> Building output...\n" );
cbDone = 0;
/*
//
// get most often match size
//
PDWORD wSizeOccurr = new DWORD[256];
memset( (void*)&wSizeOccurr[0], 0, sizeof(DWORD)*256 );
for ( UINT i = 0; i < m_cbBuff; i++ )
if ( m_pbyValidHEntries[i] == 1 )
{
HIP_MATCH_HISTORY m = m_pHistory[i];
if ( m.wSize < 256 )
wSizeOccurr[ m.wSize ]++;
}
DWORD dwBiggestCount = 0, dwUsualSize = MAXDWORD;
for ( UINT i = 0; i < 256; i++ )
if ( wSizeOccurr[i] > dwBiggestCount )
{
dwBiggestCount = wSizeOccurr[i];
dwUsualSize = i;
}
delete wSizeOccurr;
*/
//
// enter compression loop
//
cShortRefs = cNormalRefs = 0;
dwcItem = 0;
DWORD dwHuffIteration = 0;
while( cbDone != m_cbBuff )
{
/*
// eventually compress literal block
if ( dwcItem % 0x100 == 0)
{
DWORD c = 0;
while ( cbDone+c < m_cbBuff && m_pbyValidHEntries[cbDone+c] == 0 )
c++;
if ( c < 4 )
{
// define no literal block
writer->WriteBit( 0 );
}
else
{
// define literal block
writer->WriteBit( 1 );
GammaEncodeLength( writer, c - 2);
for ( UINT i = 0; i < c; i++ )
{
plhtItem = (*phuff)[ BUFF[cbDone+i] ];
writer->WriteBits( plhtItem->dwSequence, plhtItem->bySequLen );
}
cbDone += c;
}
}
*/
//
// maybe compress literal block
//
UINT cLiteralFlow = 0;
while ( cLiteralFlow < m_cbBuff-cbDone && m_pbyValidHEntries[cbDone + cLiteralFlow] == 0)
++cLiteralFlow;
if ( cLiteralFlow >= HP_MIN_LITERALS_IN_BLOCK )
{
// stamp short ref prefix
ret = writer->WriteBit( 1 );
ret &= writer->WriteBit( 0 );
ret &= writer->WriteBits( 0, HP_SHORTREF_PTR_BIT_COUNT ); // distance == 0 !
ret &= GammaEncodeLength( writer, cLiteralFlow - HP_MIN_LITERALS_IN_BLOCK + 2 );
for ( UINT i = 0; i < cLiteralFlow; i++ )
ret &= writer->WriteByte( BUFF[cbDone+i] );
cbDone += cLiteralFlow;
continue; // restart at loop top
}
//
// encode current item normally
//
if ( m_pbyValidHEntries[cbDone] == 1 )
{
// print ptr
HIP_MATCH_HISTORY Match = m_pHistory[cbDone];
if ( Match.wSize >= 2 && Match.wSize <= 5 // 2..5
&& Match.dwRelOff < (DWORD)(1 << HP_SHORTREF_PTR_BIT_COUNT) )
{
//
// encode short reference
//
// if ( Match.dwRelOff < Match.wSize )
// __asm nop
// 10 = short ref prefix
ret = writer->WriteBit( 1 );
ret &= writer->WriteBit( 0 );
ret &= writer->WriteBits( Match.dwRelOff, HP_SHORTREF_PTR_BIT_COUNT );
ret &= writer->WriteBits( Match.wSize - 2, 2 );
cShortRefs++;
_ASSERT( ret );
}
else
{
// if ( Match.dwRelOff < Match.wSize )
// __asm nop
DWORD dwSizeDec;
if ( Match.dwRelOff >= HP_NREF_SIZE_DEC_INDEX4 )
{
if ( Match.wSize < 6 )
goto EncodeAsLiteral;
dwSizeDec = 4;
}
else if ( Match.dwRelOff >= HP_NREF_SIZE_DEC_INDEX3 )
{
if ( Match.wSize < 5 )
goto EncodeAsLiteral;
dwSizeDec = 3;
}
else if ( Match.dwRelOff >= HP_NREF_SIZE_DEC_INDEX2 )
{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -