📄 hashsearch.cpp

📁 My (so called) HiP compression algorithm as console mode utility. It s a hybrid of Lempel-Ziv 77 a
💻 CPP
字号:
#include "hashsearch.h"
#include <crtdbg.h>
#include "WarnOff.h"

//#include <stdio.h>

//
// prototypes
//
DWORD MyMemCmp2( PBYTE p1, PBYTE p2, DWORD cbMax );

//
// Purpose:
//   A little memcmp modification
//
// Returns:
//   The number of matching bytes
//
// Remarks:
//   Faster than HashSearch::MyMemCmp !
//
DWORD __forceinline MyMemCmp2( PBYTE p1, PBYTE p2, DWORD cbMax )
{
	DWORD cbMatch;

	__asm
	{
		mov     ecx, cbMax        // ecx = cbMax
		sub     ebx, ebx          // ebx = count of matching bytes
		mov     edi, p1           // edi = destination ptr
		mov     esi, p2           // esi = source ptr
DwordCmpLoop:
		cmp     ecx, 4
		jb      ByteCmp
		mov     eax, [edi + ebx]
		xor     eax, [esi + ebx]
		test    eax, eax
		jnz     ByteCmp
		add     ebx, 4
		sub     ecx, 4
		jmp     DwordCmpLoop
ByteCmp:
		cmp     ecx, 0
		jz      Done
		mov     al, byte ptr [edi + ebx]
		cmp     al, byte ptr [esi + ebx]
		jnz     Done
		inc     ebx
		dec     ecx
		jmp     ByteCmp
Done:
		mov     cbMatch, ebx 
	}

	return cbMatch;
}

HashSearch::HashSearch(void)
{
	m_pNodes        = NULL;
	m_pRecentNodes  = NULL;
}

HashSearch::~HashSearch(void)
{
	Dispose();
//	printf( "%u", m_iCurrentNode );
}

//
// Purpose:
//   Cleanup all
//
void HashSearch::Dispose()
{
	//
	// deallocate hashing tables
	//
	if ( m_pNodes )
	{
		free( m_pNodes );
		m_pNodes = NULL;
	}
	if ( m_pRecentNodes )
	{
		free( m_pRecentNodes );
		m_pRecentNodes = NULL;
	}
}

//
// Purpose:
//   Must be called before taking any action with a class instance
//
BOOL HashSearch::Assign( PBYTE pbyBuffer, DWORD cbBuffer )
{
	Dispose(); // cleanup any already allocated hashes

	m_pbyBuff  = pbyBuffer;
	m_cbBuff   = cbBuffer;

	//
	// allocate some memory for our hashing tables
	//
	m_pNodes         = (PHS_NODE)malloc( (m_cbBuff-1) * sizeof(HS_NODE) );
	m_pRecentNodes   = (PPHS_NODE)malloc( (MAXWORD+1) * 4 );

	if ( !m_pNodes || !m_pRecentNodes )
		return FALSE; // ERR

	memset( m_pRecentNodes, 0, (MAXWORD+1) * 4 );

	m_iCurrentNode   = 0;

	return TRUE; // OK
}

//
// Purpose:
//   Add one item in the hash table
//
// Arguments:
//   pData - address of the data whose word prefix + address should be inserted
//           into the hash table
//
BOOL HashSearch::AddNode( PBYTE pData )
{
	WORD       wPrefix = *(PWORD)pData;
	PHS_NODE   nTrace, nNew;

	if( m_iCurrentNode == m_cbBuff-1 )
		return FALSE; // ERR

	nTrace          = m_pRecentNodes[wPrefix];

	nNew            = &m_pNodes[m_iCurrentNode];
	nNew->Index     = pData;
	nNew->Previous  = NULL;
	nNew->Next      = NULL;

	if ( nTrace )
	{
		nNew->Previous = nTrace;
		nTrace->Next   = nNew;
	}

	m_pRecentNodes[wPrefix] = nNew;

	++m_iCurrentNode;

	_ASSERT( m_iCurrentNode <= m_cbBuff );

	return TRUE; // OK
}

//
// Purpose:
//   The core string comparision via node tracing
//
BOOL HashSearch::FindMatch(IN  PBYTE pbyWin, IN DWORD cbWin, IN PBYTE pData, IN DWORD cbData, 
						   OUT DWORD &dwDistance, OUT DWORD &dwLength )
{
	WORD       wPrefix = *(PWORD)pData;
	PHS_NODE   nTrace;
	DWORD      cb, cbBest = 0, cbBestDelta = 0;

	nTrace = m_pRecentNodes[wPrefix];
	if ( !nTrace )
		return FALSE; // this word didn't occur before

	cbBest = 0;
	do
	{
		//
		// node's index in window range ?
		//
		if ( (DWORD)nTrace->Index < (DWORD)pbyWin )
		{
			// all following nodes will point before the current window !
			break;
		}
		if ( (DWORD)nTrace->Index >= (DWORD)pData )
			goto NextNode;

		//
		// compare !
		//
		PBYTE  pbyCur    = pData+2;
		PBYTE  pbyIndex  = nTrace->Index+2;

		/*
		cb = 2;
		while( *pbyCur == *pbyIndex )
		{
			++cb;
			++pbyCur;
			++pbyIndex;

			if ( cb >= cbData )
				break;
		}
		*/
		cb = MyMemCmp2( pData+2, nTrace->Index+2, cbData-2 ) + 2;

		if ( cb > cbBest )
		{
			cbBest       = cb;
			cbBestDelta  = (DWORD)pData - (DWORD)nTrace->Index;
		}

NextNode:
		// make nTrace reference to the previous node
		nTrace = nTrace->Previous;
	} while( nTrace );

	//
	// handle reference parameters
	//
	dwDistance = cbBestDelta;
	dwLength   = cbBest;

	return TRUE; // OK
}

//
// Purpose:
//   Shape member function for HashSearch::FindMatch
//
BOOL HashSearch::FindLongestMatch(IN  PBYTE pbyWin, IN PBYTE pData, IN DWORD cbData, 
						   IN DWORD cbMin2Match, IN BOOL bAddNode, OUT DWORD &dwDistance, OUT DWORD &dwLength )
{
	// validate input variables
	if ( cbData < cbMin2Match )
	{
		if ( cbData > 1 && bAddNode )
			AddNode( pData );
		return FALSE; // ERR
	}

	BOOL bRet = FindMatch(
		pbyWin, (DWORD)pData - (DWORD)pbyWin,
		pData, cbData,
		dwDistance, dwLength );

	// minimal match length reached ?
	if ( dwLength < cbMin2Match )
		bRet = FALSE;

	// correct too big matches
//	dwLength = min( dwLength, cbWin );

	// add node for current position of non-last + wanted
	if ( bAddNode && cbData > 1 )
		AddNode( pData );

	return bRet;
}

//
// Purpose:
//   Calls HashSearch::AddNode for the specified number of bytes
//   at the specified data position
//
void HashSearch::UpdateNodesForRange( PBYTE pby, DWORD cb )
{
	for ( UINT i = 0; i < cb; i++ )
		AddNode( pby++ );

	return;
}

//
// Purpose:
//   A little memcmp modification
//
// Returns:
//   The number of matching bytes
//
DWORD __forceinline HashSearch::MyMemCmp( PBYTE p1, PBYTE p2, DWORD cbMax )
{
	DWORD cbMatch;

	__asm
	{
		mov     edx, cbMax        // edx = cbMax

		// 1 step: compare in DWORD steps
		mov     ecx, edx
		shr     ecx, 2            // ecx = ecx / 4
		mov     edi, p1
		mov     esi, p2
		rep     cmpsd
		inc     ecx
		shl     ecx, 2
		mov     ebx, edx
		and     ebx, 0xFFFFFFFC
		sub     ebx, ecx          // ebx = matching bytes up to now

		// 2. step: compare in BYTE steps
		mov     ecx, edx
		sub     ecx, ebx
		mov     edi, p1
		add     edi, ebx
		push    edi               // preserve starting destination ptr
		mov     esi, p2
		add     esi, ebx
		rep     cmpsb
		pop     eax               // restore starting destination ptr
		sub     edi, eax
		dec     edi
		add     ebx, edi

		mov     cbMatch, ebx
	}

	return cbMatch;
}
💿 文件大小 53 K
👤 上传用户 anlan001
📂 所属分类压缩解压
🏷️ 相关标签

#compression #Lempel-Ziv #algorithm #console
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -