⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hippack.cpp

📁 My (so called) HiP compression algorithm as console mode utility. It s a hybrid of Lempel-Ziv 77 a
💻 CPP
📖 第 1 页 / 共 2 页
字号:
#include "hippack.h"
#include <math.h>
#include <stdio.h>
#include <crtdbg.h>
#include "ByteWriter.h"
#include "ByteReader.h"
#include "LiteralHuffTree.h"
#include "LZ77Pack.h"
//#include "MoveToFront.h"
#include "CRC32.h"
#include "HashSearch.h"
//#include "Hash.h"

#include "LinkerCmds.h"
#include "WarnOff.h"

// #pragma optimize( "", off )

//
// constants/macros
//
#define BUFF ((PBYTE)m_pBuff)

HipPack::HipPack( void* pInBuff, DWORD cbInBuff )
{
	//
	// member variable initializations
	//
	this->m_pBuff      = pInBuff;
	this->m_cbBuff     = cbInBuff;

	m_pOut             = NULL;
	m_pbyValidHEntries = NULL;
	m_Callback         = NULL;
}

HipPack::~HipPack()
{
	Dispose();
}

//
// Purpose:
//   Perform all class-internal cleanups
//
void HipPack::Dispose()
{
	FreeOutBuff();

	return;
}

//
// Purpose:
//   Deallocate output buffer's member if allocated
//
void HipPack::FreeOutBuff()
{
	if ( m_pOut ) // sth allocated ?
	{
		free( m_pOut );
		m_pOut = NULL;
	}

	return;
}

//
// Purpose:
//   Compress the input data block into an output data block
//   and returns its pointer and size
//
// Arguments:
//   byLevel - 1..9 (byLevel + 11 == max bit count of ref distance)
//
BOOL HipPack::PerformCompression( BYTE byLevel, OUT void** ppCompBlock, OUT DWORD* pdwcbBlock )
{
	BYTE*             p, *pAlloc, *pWin;
	DWORD             dwcbMax, cbHuffTree, cbHdr, cbDone, cbHashed, cbDoneHere, dwcItem;
	DWORD             cbMaxDistance, cbitsPtr, cbMin2Match, cShortRefs, cNormalRefs, cbitBase;
	PByteWriter       writer = NULL;
	PHIP_HEADER       pHdr;
	PLiteralHuffTree  phuff = NULL;
//	PLiteralHuffTree  pLenHuff = NULL;
	LP_MATCH          match, match2;
	PLHT_ITEM         plhtItem;
	BOOL              ret, bRet;
	BYTE              byCBIteration = 0;
	HashSearch        hs;

	bRet        = FALSE;
	m_bStopRun  = FALSE;

	//
	// allocate enough memory
	//
	FreeOutBuff();
	dwcbMax = (DWORD)(1.3 * m_cbBuff) + 0x1000; // live save
	pAlloc = p = (PBYTE)malloc( dwcbMax );
	if ( !p )
		return FALSE; // ERR

	//
	// calculate CRC32
	//
	DWORD dwCRC = CRC32::Generate( BUFF, m_cbBuff );
	printf( "-> CRC32: 0x%08lX\n", dwCRC );

	//
	// semi-initialize HiP header
	//
	cbHdr  = sizeof( HIP_HEADER );
	pHdr   = (PHIP_HEADER)p;
	pHdr->Signature       = HIP_HDR_SIGNATURE;
	pHdr->cbUncompressed  = m_cbBuff;
	pHdr->CRC             = dwCRC;
	pHdr->MajorVersion    = HIP_MAJOR_VERSION;
	pHdr->MinorVersion    = HIP_MINOR_VERSION;
	p += cbHdr;

	//
	// look for matches, literal probabilities and log them
	//

	// allocate memory for some hash tables
//	printf( "-> Searching for string redundances...\n" );
	phuff              = new LiteralHuffTree();
	m_pbyValidHEntries = new BYTE[ m_cbBuff ];
	m_pHistory         = new HIP_MATCH_HISTORY[ m_cbBuff ];
//	cbDone             = 0;
//	pLenHuff           = new LiteralHuffTree();

	memset( m_pbyValidHEntries, 0, m_cbBuff );

	hs.Assign( BUFF, m_cbBuff );

	cbMin2Match    = HP_MIN_MATCH_LEN;
	cbitsPtr       = 11 + byLevel;
	cbMaxDistance  = (1 << cbitsPtr) - 1 + cbMin2Match;
//	cbWindow       = (1 << HP_MAX_MATCH_SIZE_BITS) - 1 + cbMin2Match;

	// enter compression loop
	cbDone = cbHashed = 0;
	while( cbDone != m_cbBuff )
	{
		if ( m_bStopRun )
			goto TidyUp;

		cbDoneHere = 0; // intermediate value being added later to cbDone

		// check how many bytes we could compress via a RLE item
//		DWORD cbRLEMax = GetByteChainLength( &BUFF[cbDone], m_cbBuff - cbDone );
//		cbRLEMax = min( cbRLEMax, 255 );

        // get window ptr/size for current input data position
		pWin     = &BUFF[ (DWORD)-cbMaxDistance + cbDone ];
		if ( (DWORD)pWin < (DWORD)m_pBuff )
			pWin     = BUFF;

		// look for match at P
		if ( hs.FindLongestMatch(
			pWin,
			&((PBYTE)m_pBuff)[cbDone], m_cbBuff - cbDone,
			cbMin2Match,
			FALSE,
			match.dwDistance, match.wSize) )
		{
			// get window ptr/size for current input data position
			pWin     = &BUFF[ (DWORD)-cbMaxDistance + cbDone + 1 ];
			if ( (DWORD)pWin < (DWORD)m_pBuff )
				pWin     = BUFF;

			// look for match at P+1 AKA lazy evolution mechanism
			if ( hs.FindLongestMatch(
			pWin,
			&((PBYTE)m_pBuff)[cbDone+1], m_cbBuff - cbDone - 1,
			cbMin2Match,
			FALSE,
			match2.dwDistance, match2.wSize) && match2.wSize > match.wSize )
			{
				//
				// take advantage of lazy's result
				//

				// process p
				m_pbyValidHEntries[cbDone]   = 0;
//				phuff->m_dwOccurr[ BUFF[cbDone] ]++;
				cbDoneHere += 1;
				// process p+1
//				match2.wSize = min( match2.wSize, 258 );
				m_pbyValidHEntries[cbDone+1]   = 1;
				m_pHistory[cbDone+1].dwRelOff  = match2.dwDistance;
				m_pHistory[cbDone+1].wSize     = match2.wSize;

				cbDoneHere += match2.wSize;
			}
			else
			{
				//
				// take the match at P
				//
//				match.wSize = min( match.wSize, 258 );
				m_pbyValidHEntries[cbDone]   = 1;
				m_pHistory[cbDone].dwRelOff  = match.dwDistance;
				m_pHistory[cbDone].wSize     = match.wSize;
				cbDoneHere += match.wSize;
			}
		}
		else
		{
			//
			// log a literal encoding here
			//
			m_pbyValidHEntries[cbDone]   = 0;
//			phuff->m_dwOccurr[ BUFF[cbDone] ]++;
			cbDoneHere++;
		}

		//
		// is RLE encoding here more optimal ?
		//
		/*
		if ( cbRLEMax > 3 && cbRLEMax-1 > cbDoneHere )
		{
			// decrement occurrence counter if it should have been
			// encoded as literal
			if ( m_pbyValidHEntries[cbDone] == 0 )
				phuff->m_dwOccurr[ BUFF[cbDone] ]--;

			// first encode a literal
			m_pbyValidHEntries[cbDone]   = 0;

			// secondly encode a short ref
			m_pHistory[cbDone+1].dwRelOff  = 1;
			m_pHistory[cbDone+1].wSize     = cbRLEMax - 1;
			m_pbyValidHEntries[cbDone+1]   = 1;

			cbDone += cbRLEMax;
		}
		else*/
		{
			/*
			if ( m_pbyValidHEntries[cbDone] == 1 )
				if ( m_pHistory[cbDone].dwRelOff >= (1 << HP_SHORTREF_PTR_BIT_COUNT)+1 ||
					m_pHistory[cbDone].wSize > 5 )
					pLenHuff->m_dwOccurr[ m_pHistory[cbDone].wSize ]++;
			*/
            cbDone += cbDoneHere;
		}

		//
		// process the callback if installed
		//
		if ( m_Callback )
		{
			byCBIteration++;
			if ( byCBIteration == HIP_ITERATION_FOR_CALLBACK
				|| cbDone == m_cbBuff) // end reached ?
			{
				// call user defined procedure
				byCBIteration = 0;
				if ( !m_Callback( cbDone, m_cbBuff ) )
					goto TidyUp; // handled as ERR
			}
		}

		//
		// add processed bytes to the hash table
		//
		if ( cbHashed != cbDone )
		{
//			hs.UpdateNodesForRange( &BUFF[cbHashed], cbDone - cbHashed, m_cbBuff - cbHashed );
			hs.UpdateNodesForRange( &BUFF[cbHashed], cbDone - cbHashed );
			cbHashed = cbDone;
		}
	}

	hs.Dispose(); // cleanup

	//
	// generate + emit literal Huffman table
	//
//	phuff->GenerateWithOccurrences();
//	cbHuffTree = phuff->Emit( p, dwcbMax - cbHdr );

	phuff->GenerateAdaptiveModel();
	cbHuffTree = 0;
	p += cbHuffTree;

	//
	// generate + emit length Huffman table
	//
	/*
	pLenHuff->GenerateWithOccurrences();
	DWORD cbLenHuff = pLenHuff->Emit( p, dwcbMax - cbHdr - cbHuffTree );
	cbHuffTree += cbLenHuff;
	p += cbLenHuff;
	*/

#ifdef VERBOSE_PLEASE
	printf( "[  Size of Huffman table: %u byte(s) ]\n", cbHuffTree );
#endif

	//
	// compress, now
	//
	writer   = new ByteWriter( p, dwcbMax - cbHdr - cbHuffTree );
	cbDone   = 0;

	// process first uncompressible literals
	printf( "-> Building output...\n" );
	cbDone = 0;

	/*
	//
	// get most often match size
	//
	PDWORD wSizeOccurr = new DWORD[256];
	memset( (void*)&wSizeOccurr[0], 0, sizeof(DWORD)*256 );
	for ( UINT i = 0; i < m_cbBuff; i++ )
		if ( m_pbyValidHEntries[i] == 1 )
		{
			HIP_MATCH_HISTORY m = m_pHistory[i];
			if ( m.wSize < 256 )
                wSizeOccurr[ m.wSize ]++;
		}
	DWORD dwBiggestCount = 0, dwUsualSize = MAXDWORD;
	for ( UINT i = 0; i < 256; i++ )
		if ( wSizeOccurr[i] > dwBiggestCount )
		{
			dwBiggestCount = wSizeOccurr[i];
			dwUsualSize    = i;
		}
	delete wSizeOccurr;
	*/

	//
	// enter compression loop
	//
	cShortRefs              = cNormalRefs = 0;
	dwcItem                 = 0;
	DWORD dwHuffIteration   = 0;
	while( cbDone != m_cbBuff )
	{
		/*
		// eventually compress literal block
		if ( dwcItem % 0x100 == 0)
		{
			DWORD c = 0;
			while ( cbDone+c < m_cbBuff && m_pbyValidHEntries[cbDone+c] == 0 )
				c++;
			if ( c < 4 )
			{
				// define no literal block
				writer->WriteBit( 0 );
			}
			else
			{
				// define literal block
				writer->WriteBit( 1 );
				GammaEncodeLength( writer, c - 2);
				for ( UINT i = 0; i < c; i++ )
				{
					plhtItem = (*phuff)[ BUFF[cbDone+i] ];
					writer->WriteBits( plhtItem->dwSequence, plhtItem->bySequLen );
				}
				cbDone += c;
			}
		}
		*/

		//
		// maybe compress literal block
		//
		UINT cLiteralFlow = 0;
		while ( cLiteralFlow < m_cbBuff-cbDone && m_pbyValidHEntries[cbDone + cLiteralFlow] == 0)
			++cLiteralFlow;
		if ( cLiteralFlow >= HP_MIN_LITERALS_IN_BLOCK )
		{
			// stamp short ref prefix
			ret  = writer->WriteBit( 1 );
			ret &= writer->WriteBit( 0 );

			ret &= writer->WriteBits( 0, HP_SHORTREF_PTR_BIT_COUNT ); // distance == 0 !
			ret &= GammaEncodeLength( writer, cLiteralFlow - HP_MIN_LITERALS_IN_BLOCK + 2 );
			for ( UINT i = 0; i < cLiteralFlow; i++ )
				ret &= writer->WriteByte( BUFF[cbDone+i] );
			cbDone += cLiteralFlow;
			continue; // restart at loop top
		}

		//
		// encode current item normally
		//
		if ( m_pbyValidHEntries[cbDone] == 1 )
		{
			// print ptr
			HIP_MATCH_HISTORY Match = m_pHistory[cbDone];

			if ( Match.wSize >= 2 && Match.wSize <= 5 // 2..5
				&& Match.dwRelOff < (DWORD)(1 << HP_SHORTREF_PTR_BIT_COUNT) )
			{
				//
				// encode short reference
				//

//				if ( Match.dwRelOff < Match.wSize )
//					__asm nop

				// 10 = short ref prefix
				ret  = writer->WriteBit( 1 );
				ret &= writer->WriteBit( 0 );

				ret &= writer->WriteBits( Match.dwRelOff, HP_SHORTREF_PTR_BIT_COUNT );

				ret &= writer->WriteBits( Match.wSize - 2, 2 );

				cShortRefs++;

				_ASSERT( ret );
			}
			else
			{
//				if ( Match.dwRelOff < Match.wSize )
//					__asm nop

				DWORD dwSizeDec;
				if ( Match.dwRelOff >= HP_NREF_SIZE_DEC_INDEX4 )
				{
					if ( Match.wSize < 6 )
						goto EncodeAsLiteral;
					dwSizeDec = 4;
				}
				else if ( Match.dwRelOff >= HP_NREF_SIZE_DEC_INDEX3 )
				{
					if ( Match.wSize < 5 )
						goto EncodeAsLiteral;
					dwSizeDec = 3;
				}
				else if ( Match.dwRelOff >= HP_NREF_SIZE_DEC_INDEX2 )
				{

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -