📄 literalhufftree.cpp

📁 My (so called) HiP compression algorithm as console mode utility. It s a hybrid of Lempel-Ziv 77 a
💻 CPP
字号:
#include "literalhufftree.h"
#include <math.h>
#include <crtdbg.h>
#include <stdio.h>
#include "ByteWriter.h"
#include "ByteReader.h"
#include "Macros.h"

#include "WarnOff.h"
#include "LinkerCmds.h"

LiteralHuffTree::LiteralHuffTree(void)
{
	ZERO( m_dwOccurr );
	ZERO( m_tree );
	m_ucTreeItems           = 0;
}

LiteralHuffTree::~LiteralHuffTree(void)
{
}

//
// Purpose:
//   A root procedure
//
BOOL LiteralHuffTree::Generate( PBYTE buff, DWORD cb )
{
	if ( !buff || !cb )
		return FALSE; // ERR

	GetByteOccurrences( buff, cb );

	BuildInternalTree();

	LinkTreeElements();

	GenerateBitSequences();

	return TRUE; // OK
}

//
// Purpose:
//   A root procedure
//
BOOL LiteralHuffTree::GenerateWithOccurrences()
{
	BuildInternalTree();

	LinkTreeElements();

	GenerateBitSequences();

	return TRUE; // OK
}

//
// Purpose:
//   A root procedure
//
BOOL LiteralHuffTree::GenerateAdaptiveModel()
{
	//
	// init the occurrences with 1 for each symbol in the alphabet
	//
	for ( UINT i = 0; i < ARRAY_ITEMS(m_dwOccurr); i++ )
		m_dwOccurr[i] = 1;

	//
	// generate the tree, ...
	//
	BuildInternalTree();

	LinkTreeElements();

	GenerateBitSequences();

	return TRUE; // OK
}

//
// Purpose:
//   Scan the specified buffer and log the occurrence
//   of each byte
//
void LiteralHuffTree::GetByteOccurrences( PBYTE pby, DWORD cb )
{
	ZERO( m_dwOccurr );
	while( cb )
	{
		m_dwOccurr[ *pby ]++;
		--cb;
		++pby;
	}

	return;
}

//
// Purpose:
//   Transfer the occurrences array into m_tree items
//
void LiteralHuffTree::BuildInternalTree()
{
	ZERO( m_tree );
	for ( UINT u = 0; u < 256; u++ )
	{
		m_tree[u].id            = u;
		m_tree[u].dwcOccurr     = m_dwOccurr[u];
		m_tree[u].dwRightId     = (DWORD)-1;        // indicates the end of a branch
		m_tree[u].dwLeftId      = (DWORD)-1;        // indicates the end of a branch
	}
	m_ucTreeItems = 256;

	return;
}

//
// Purpose:
//   Builds the Huffman tree linkage by adding branch items in the chain
//
// Remarks:
//   (biggest probability first)
//
void LiteralHuffTree::LinkTreeElements()
{
	PLHT_ITEM   pitem1, pitem2, pitem, pLastLink;
	LHT_ITEM    branch, pseudoitem;

	if ( !m_ucTreeItems )
		return; // no items in our tree

	pseudoitem.dwcOccurr  = MAXDWORD;
	pLastLink             = NULL;
	for (;;)
	{
		//
		// search unlinked items with the lowest occurrences
		//
		pitem1 = NULL;
		pitem2 = NULL;
		pitem1 = &pseudoitem;
		for ( UINT u = 0; u < m_ucTreeItems; u++ )
		{
			pitem = &m_tree[u];
			if ( !pitem->bLinked && pitem->dwcOccurr > 0 && pitem->dwcOccurr < pitem1->dwcOccurr )
				pitem1           = pitem;
		}
		pitem1->bLinked = TRUE; // mark the found items linked !
		pitem2 = &pseudoitem;
		for ( UINT u = 0; u < m_ucTreeItems; u++ )
		{
			pitem = &m_tree[u];
			if ( !pitem->bLinked && pitem->dwcOccurr > 0 && pitem->dwcOccurr < pitem2->dwcOccurr)
				pitem2           = pitem;
		}

		pitem2->bLinked = TRUE; // mark the found items linked !

		//
		// did we link all items ?
		//
		if ( pitem1->dwcOccurr == MAXDWORD || pitem2->dwcOccurr == MAXDWORD )
		{
			if ( pLastLink )
				pLastLink->dwDadId = MAXDWORD; // indicates root item
			break; // exit for(;;) loop
		}

		_ASSERT( m_ucTreeItems < ARRAY_ITEMS(m_tree) );

		//
		// add a branch item linking the 2
		//
		ZERO( branch )
		branch.id         = m_ucTreeItems;
		branch.bLinked    = FALSE;
		branch.dwLeftId   = pitem1->id;
		branch.dwRightId  = pitem2->id;
		branch.dwcOccurr  = pitem1->dwcOccurr + pitem2->dwcOccurr;

		pitem1->dwDadId   = pitem2->dwDadId = branch.id;

		m_tree[ m_ucTreeItems++ ] = branch;
		pLastLink = &m_tree[ m_ucTreeItems - 1 ];
	}

	return;
}

//
// Purpose:
//   Trace the tree for non-branch items to find out
//   their corresponding bit sequences
//
void LiteralHuffTree::GenerateBitSequences()
{
	PLHT_ITEM  pitem, ptrace, ptrace2;
	DWORD      cbitsBig = 0, cbitsSmall = 0xFF;

	for ( UINT u = 0; u < m_ucTreeItems; u++ )
	{
		pitem = &m_tree[u];
		if ( !pitem->dwcOccurr )
			continue; // this literal is never used
		if ( pitem->dwLeftId == (DWORD)-1 && pitem->dwRightId == (DWORD)-1 )
		{
			//
			// handle a non-branch chain element
			//
			ptrace = pitem;
			while( ptrace->dwDadId != (DWORD)-1 )
			{
				ptrace2 = &m_tree[ ptrace->dwDadId ];
				if ( ptrace->id == ptrace2->dwRightId )
					pitem->dwSequence |= HT_RIGHT;
				else
					pitem->dwSequence |= HT_LEFT;
				++pitem->bySequLen;
				pitem->dwSequence <<= 1;
				ptrace = ptrace2;
			}			
			pitem->dwSequence >>= 1;

#ifdef VERBOSE_PLEASE
			cbitsBig    = max( cbitsBig, pitem->bySequLen );
			cbitsSmall  = min( cbitsSmall, pitem->bySequLen );
#endif
			_ASSERT( pitem->bySequLen < 15 );
		}
	}

#ifdef VERBOSE_PLEASE_DISCARDED // prints very much because of the adaptive huffman model
	printf( "[  Smallest literal bit equivalence: %u bit(s) ]\n", cbitsSmall );
	printf( "[  Biggest literal bit equivalence: %u bit(s) ]\n", cbitsBig );
#endif
}

//
// Purpose:
//   Calculates the number of bits that eats to
//   biggest occurrence value in the set
//
//
DWORD LiteralHuffTree::GetBitCountForOccurrence()
{
	DWORD dwMax = 0;

	// get the biggest occurrence value
	for ( UINT u = 0; u < 256; u++ )
		if ( m_tree[u].dwcOccurr > dwMax )
			dwMax = m_tree[u].dwcOccurr;

	// calculate how much bits we'll need to store this value
	float f = log(dwMax) / log(2);
	DWORD c = (DWORD)f;
	if ( (float)c < f ) // is f a decimal number ?
		c++;

	return c;
}

//
// Purpose:
//   Write the Huffman table in a compact format
//   to the specified memory area
//
// Returns:
//   Number of bytes emitted to the output buffer
//   0 means error !
//
DWORD LiteralHuffTree::Emit( BYTE* pBuff, DWORD cbBuff )
{
	DWORD         cbitItem;
	ByteWriter    writer( pBuff, cbBuff );

	// get the number of bits which we'll stamp the occurrence values into the buffer
	cbitItem = GetBitCountForOccurrence();

	//
	// probability stamp loop
	//
	writer.WriteByte( cbitItem );
    for ( UINT u = 0; u < 256; u++ )
		writer.WriteBits( m_tree[u].dwcOccurr, cbitItem );

	writer.FinishWriting();

	return writer.GetWrittenByteCount(); // OK
}

/*
DWORD LiteralHuffTree::Emit( BYTE* pBuff, DWORD cbBuff )
{
	if ( cbBuff < 256 )
		return 0; // ERR

	DWORD dwBiggest = 0;
	for ( UINT i = 0; i < 256; i++ )
		dwBiggest = max( dwBiggest, m_tree[i].dwcOccurr );
	float fDiv = (DWORD)dwBiggest/255;
	fDiv++;
	
	for ( UINT i = 0; i < 256; i++ )
	{
		*pBuff = (DWORD)((float)m_tree[i].dwcOccurr/fDiv);
		if ( m_tree[i].dwcOccurr && *pBuff == 0 )
			*pBuff = 1;
		++pBuff;
	}

	return 256; // OK
}
*/

DWORD LiteralHuffTree::RipEmittedTree( PBYTE pby, DWORD cb )
{
	ByteReader   reader( pby, cb );
	DWORD        cbitsItem = reader.ReadByte();

	ZERO( m_dwOccurr );
    for ( UINT u = 0; u < 256; u++ )
		m_dwOccurr[u] = reader.ReadBits( cbitsItem );

	BuildInternalTree();

	LinkTreeElements();

	GenerateBitSequences();

	return reader.GetReadByteCount(); // OK
}

/*
DWORD LiteralHuffTree::RipEmittedTree( PBYTE pby, DWORD cb )
{
	if ( cb < 256 )
		return 0;

	ZERO( m_dwOccurr );
    for ( UINT u = 0; u < 256; u++ )
	{
		m_dwOccurr[u] = (DWORD)*pby;
		pby++;
	}

	BuildInternalTree();

	LinkTreeElements();

	GenerateBitSequences();

	return 256; // OK
}
*/

//
// Purpose:
//   This routine returns the data field of the HT_ITEM structure
//   which is identified by reading bits form the ByteReader class
//
DWORD LiteralHuffTree::QueryLiteralByBitSequence( PByteReader reader )
{
	PLHT_ITEM pitem; 
	
	pitem = &m_tree[m_ucTreeItems - 1];
	while ( pitem->dwLeftId != -1 && pitem->dwRightId != -1 )
		switch( reader->ReadBit() )
		{
		case HT_LEFT:
			pitem = &m_tree[pitem->dwLeftId];
			break;

		case HT_RIGHT:
			pitem = &m_tree[pitem->dwRightId];
			break;
		}

	return pitem->id; 
}
💿 文件大小 53 K
👤 上传用户 anlan001
📂 所属分类压缩解压
🏷️ 相关标签

#compression #Lempel-Ziv #algorithm #console
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -