⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 aricoder.cpp

📁 This is a little console mode utility program which is able to (de-)compress single files with a s
💻 CPP
字号:
#include "AriCoder.h"
#include "CrtDbg.h"
#include "Macros.h"

//
// constants
//
#define END_OF_STREAM          256
#define COMP_BLOCK_HDR_SIZE    sizeof( DWORD ) // just the size of the original data

//
// AriCoder class implementation
//
AriCoder::AriCoder()
{
	Init();
}

AriCoder::~AriCoder()
{
	Dispose();
}

//
// Purpose:
//   Perform all internal needed intialization works
//
void AriCoder::Init()
{
	ZERO( m_Occurrences )
	m_cbTable      = 0;
	m_pbyOutBuff   = NULL;
}

//
// Purpose:
//   Perform all class cleanups
//
void AriCoder::Dispose()
{
	FreeOutBuff();
}

//
// Purpose:
//   Deallocate the output buffer if needed
//
void AriCoder::FreeOutBuff()
{
	if ( m_pbyOutBuff )
	{
		free( m_pbyOutBuff );
		m_pbyOutBuff = NULL;
	}
}

//
// Purpose:
//   Setup the m_Occurrences array concerning to the specified data buffer
//
void AriCoder::CountSymbols( PBYTE p, DWORD cb )
{
	DWORD dwSymCnts[ALPHABET_SIZE];

	//
	// count symbol occurrences
	//
	ZERO( dwSymCnts )
	for ( UINT i = 0; i < cb; i++ )
		dwSymCnts[ p[i] ]++;

	//
	// stuff the DWORD chain into 'm_Occurrences' - scale them down to byte size
	// (2 steps)
	//

	/// 1st step: shrink occurrences down to 1 byte size

	// get biggest occurrence
	DWORD dwcMax = 0;
	for ( UINT u = 0; u < ALPHABET_SIZE; u++ )
		dwcMax = max( dwSymCnts[u], dwcMax );

	// divide counts - init 'm_Occurrence' chain
	DWORD dwScale = (dwcMax / 256) + 1; // calc divider

	for ( UINT z = 0; z < ALPHABET_SIZE; z++ )
	{
		m_Occurrences[z] = (BYTE)( dwSymCnts[z] / dwScale );
		if ( dwSymCnts[z] != 0 && m_Occurrences[z] == 0 ) // minimization too extreme ?
			m_Occurrences[z] = 1;
	}

	/// 2nd step: make sure that the total of all shrinked occurrences is less than 16384
	DWORD dwTotal = 1, dwScale2;

	for ( UINT e = 0; e < ARRAY_ITEMS(m_Occurrences); e++ )
		dwTotal += m_Occurrences[e];
    if ( dwTotal > ( 32767 - 256 ) )
        dwScale2 = 4;
    else if ( dwTotal > 16383 )
        dwScale2 = 2;
    else
        return; // we're done - no devision needed

	for ( UINT c = 0; c < ALPHABET_SIZE; c++ )
		m_Occurrences[c] = (BYTE)( m_Occurrences[c] / dwScale2 );

	return;
}

//
// Purpose:
//   Construct the ranges
//
// Remarks:
//   low of symbol_i    = m_Totals[i]
//   high of symbol_i   = m_Totals[i + 1]
//
void AriCoder::BuildTotals()
{
	m_Totals[0] = 0;
	for ( UINT i = 0; i < END_OF_STREAM; i++ )
		m_Totals[i + 1] = m_Totals[i] + m_Occurrences[i];
	m_Totals[END_OF_STREAM + 1] = m_Totals[END_OF_STREAM] + 1;
}

//
// Purpose:
//   Count the occurrences in the specified memory buffer and
//   construct the rage table
//
void AriCoder::BuildModelFromData( PBYTE p, DWORD cb )
{
	CountSymbols( p, cb );
	BuildTotals();
}

//
// Purpose:
//   Writes the shrinked symbol occurrences into the specified buffer
//
// Returns:
//   Size of the symbol count table in bytes
//   0  - error
//
// Remarks:
//   The idea for this format is also from Mark Nelson but my code is
//   different. It looks like this:
//
//   (start)(end)(symbol_counts[start...end]) (start).... (start=0)(end=0) <- terminator
//
//   start  - starting symbol of chain
//   end    - ending symbol of chain
//
DWORD AriCoder::EmitCounts( PBYTE p, DWORD cb )
{
	DWORD   cbEmitted = 0;
	DWORD   dwcZeros;

	// find the first non-zero count
	UINT iCur = 0, iEnd;
	while( iCur < 255 && m_Occurrences[iCur] == 0 )
		iCur++;

	//
	// main loop
	//
	iEnd        = iCur + 1;
	while( iCur < 256 )
	{
		dwcZeros   = 0;
		for ( UINT u = iEnd; u < 256 && m_Occurrences[u] == 0; u++ )
			dwcZeros++;

		if ( dwcZeros > 3 || (iEnd+dwcZeros) == 256 )
		{
			DWORD  dwcCounts  = (iEnd - iCur);

			if ( (dwcCounts+2) + cbEmitted > cb )
				return 0; // ERR - not enough space in buffer

			// emit current run
			p[cbEmitted++]   = iCur;
			p[cbEmitted++]   = iEnd - 1;
			for ( UINT i = iCur; i < iEnd; i++ )
				p[cbEmitted++] = m_Occurrences[i];

			// reset index vars
			iCur       = iEnd + dwcZeros;
			iEnd       = iCur; // will be increased at end of loop
		}

		// update sliding vars
		++iEnd;
	}

	// emit terminator
	if ( cbEmitted + 2 > cb )
		return 0; // ERR - not enough space in buffer
	p[cbEmitted++]  = 0xFF;
	p[cbEmitted++]  = 0x00;

	return cbEmitted; // OK
}

//
// Purpose:
//   Read out a symbol count table, emitted by AriCoder::EmitCounts, from
//   a certain memory location.
//
// Returns:
//   Size of the symbol count table in bytes
//
DWORD AriCoder::GrabCounts( PBYTE p )
{
	DWORD cbRead = 0;

	// zero-init table
	ZERO( m_Occurrences )

	//
	// main loop
	//
	while ( TRUE )
	{
		// rip start/end indices
		UINT iStart = p[cbRead++];
		UINT iEnd   = p[cbRead++];

		// terminator reached ?
		if ( iEnd < iStart ) 
			break;

		// transfer: table counts -> m_Occurrences
		for( UINT i = iStart; i <= iEnd; i++ )
			m_Occurrences[i] = p[cbRead++];
	}

	return cbRead; // OK
}

//
// Purpose:
//   Adjust variables for encoding
//
void AriCoder::InitEncoder()
{
	this->m_wLow              = 0;
	this->m_wHigh             = MAXWORD;
	this->m_cUnderflowBits    = 0;
}

//
// Purpose:
//   Adjust variables for decoding
//
void AriCoder::InitDecoder( ByteReader &reader )
{
	this->m_wLow              = 0;
	this->m_wHigh             = MAXWORD;
	this->m_cUnderflowBits    = 0;

	// read the first word form the input
	m_wCode = 0;
	for( UINT i = 0; i < 16; i++ )
	{
		m_wCode <<=   1;
		m_wCode  |=   reader.ReadBit();
	}
}

//
// Purpose:
//   Receiving low, high and scale of a symbol
//
void AriCoder::GetSymbolInfo( WORD symbol, ARI_SYM &s )
{
	s.low     = m_Totals[symbol];
	s.high    = m_Totals[symbol + 1];
	s.scale   = m_Totals[END_OF_STREAM + 1];
}

//
// Purpose:
//   Output the specified symbol by adjusting m_wCode/m_wHigh/m_wLow to it
//   and eventually output some bits
//
void AriCoder::EncodeSymbol( ByteWriter &writer, ARI_SYM sym )
{
	DWORD range = (DWORD)(m_wHigh - m_wLow) + 1;

	// rescale high/low for the new symbol
	m_wHigh = m_wLow + (WORD)((range * sym.high) / sym.scale - 1);
	m_wLow  = m_wLow + (WORD)((range * sym.low ) / sym.scale );

	// turns out new bits until high and low are far enough
    while( TRUE )
	{
		if ((m_wHigh & 0x8000) == (m_wLow & 0x8000)) // if MSBs of high/low match -> output bits
		{
            BOOL b = writer.WriteBit( m_wHigh & 0x8000 );
            while ( m_cUnderflowBits > 0 )
			{
                b &= writer.WriteBit( ~m_wHigh & 0x8000 ); // in danger of underflow ?
                m_cUnderflowBits--;
            }
			_ASSERT( b );
        }
        else if ((m_wLow & 0x4000) && !(m_wHigh & 0x4000))
		{
			// shift out 2nd MSB
            m_cUnderflowBits  += 1;
            m_wLow            &= 0x3fff;
            m_wHigh           |= 0x4000;
        }
		else
            return;

		// shift low & high
        m_wLow    <<= 1;
        m_wHigh   <<= 1;
        m_wHigh    |= 1;
    }
}

//
// Purpose:
//   Must be called at the end of the compression to
//   process the underflow bits
//
void AriCoder::FlushEncoder( ByteWriter &writer )
{
	BOOL b = writer.WriteBit( m_wLow & 0x4000 );
    m_cUnderflowBits++;

    while ( m_cUnderflowBits-- > 0 )
        b &= writer.WriteBit( ~m_wLow & 0x4000 );

	_ASSERT( b );
}

//
// Purpose:
//   Root routine to compress a certain data block
//
// Args:
//   pOut   - returns a pointer to the compressed data; MUST be freed with free()
//   cbOut  - size of compressed data
//
BOOL AriCoder::CompressData( PBYTE p, DWORD cb, PBYTE &pOut, DWORD &cbOut )
{
	BOOL b;

	FreeOutBuff();

	//
	// build range table
	//
	BuildModelFromData( p, cb );

	//
	// allocate memory
	//
	m_cbOutBuff    = (DWORD)(cb * 1.2) + 300;
	m_pbyOutBuff   = (PBYTE)malloc( m_cbOutBuff );
	if ( !m_pbyOutBuff )
		return FALSE; // ERR

	//
	// emit the uncompressed data size
	//
	*(PDWORD)m_pbyOutBuff = cb;

	//
	// emit shrinked count table
	//
	m_cbTable = EmitCounts(
		m_pbyOutBuff + COMP_BLOCK_HDR_SIZE,
		m_cbOutBuff - COMP_BLOCK_HDR_SIZE );

	//
	// encoding loop
	//
	ByteWriter  writer;
	ARI_SYM     sym;

	InitEncoder();
	writer.Assign(
		m_pbyOutBuff + m_cbTable + COMP_BLOCK_HDR_SIZE,
		m_cbOutBuff - m_cbTable - COMP_BLOCK_HDR_SIZE);
	
	//
	// encoding loop
	//
	for( UINT i = 0; i < cb; i++ )
	{
		GetSymbolInfo( p[i], sym );
		EncodeSymbol( writer, sym );
	}

	// put the EndOfStream symbol
	GetSymbolInfo( END_OF_STREAM, sym );
	EncodeSymbol( writer, sym );

	// flush encoder + shut down all
	FlushEncoder( writer );
	b = writer.WriteBits( 0L, 16 );
	_ASSERT( b );

	writer.FinishWriting();

	//
	// set routine reference variables
	//
	pOut   = m_pbyOutBuff;
	cbOut  = writer.m_cbWritten + COMP_BLOCK_HDR_SIZE + m_cbTable;

	return TRUE; // OK
}

//
// Remarks:
//   Decompression only
//
void AriCoder::GetSymbolScale( PARI_SYM pSym )
{
	pSym->scale = m_Totals[ END_OF_STREAM + 1 ];
}

//
// Remarks:
//   Decompression only
//
DWORD AriCoder::GetCurrentCount( ARI_SYM s )
{
	LONG  lRange = (LONG)( this->m_wHigh - this->m_wLow ) + 1 ;
	WORD  lCount = (WORD)( ( ((LONG)( m_wCode - m_wLow ) + 1 ) * s.scale-1 ) / lRange ); 

	return lCount;
}

//
// Purpose:
//   Returns the index into the m_Totals chain corresponding
//   to the specified dwCount argument
//
// Remarks:
//   Decompression only
//
UINT AriCoder::SymbolToIndex( DWORD dwCount, PARI_SYM sym )
{
    for ( UINT z = END_OF_STREAM; dwCount < m_Totals[z]; z-- )
		;
    sym->high   = m_Totals[ z + 1 ];
    sym->low    = m_Totals[ z ];

	return z;
}

//
// Purpose:
//   Remove the symbol from high/low/code and input eventually
//   some new bits
//
// Remarks:
//   Decompression only
//
void AriCoder::RemoveSymbolFromStream( ByteReader &reader, ARI_SYM sym )
{
    DWORD dwRange;

    dwRange  = (long)( m_wHigh - m_wLow ) + 1;
    m_wHigh  = m_wLow + (unsigned short int)
                 (( dwRange * sym.high ) / sym.scale - 1 );
    m_wLow = m_wLow + (unsigned short int)
                 (( dwRange * sym.low ) / sym.scale );

    while( TRUE )
	{
        if ( (m_wHigh & 0x8000) == (m_wLow & 0x8000) )
		{}
        else if ((m_wLow & 0x4000) == 0x4000  && (m_wHigh & 0x4000) == 0)  // in danger of underflow ?
		{
			// shift out 2nd MSB
            m_wCode  ^= 0x4000;
            m_wLow   &= 0x3fff;
            m_wHigh  |= 0x4000;
        }
		else
            return;

        m_wLow   <<= 1;
        m_wHigh  <<= 1;
        m_wHigh   |= 1;
        m_wCode  <<= 1;
        m_wCode   += reader.ReadBit();
    }
}

//
// Purpose:
//   Root routine to compress a certain data block
//
// Args:
//   pOut   - returns a pointer to the compressed data; MUST be freed with free()
//   cbOut  - size of compressed data
//
BOOL AriCoder::DecompressData( PBYTE p, DWORD cb, PBYTE &pOut, DWORD &cbOut )
{
	FreeOutBuff();

	//
	// receive uncompressed data size
	//
	DWORD cbOrigi = *(PDWORD)p;

	//
	// receive some space in the winslows swapfile FNA alloc mem
	//
	m_cbOutBuff   = cbOrigi;
	m_pbyOutBuff  = (PBYTE)malloc( cbOrigi );
	if ( !m_pbyOutBuff )
		return FALSE; // ERR

	//
	// receive symbol counts, build model
	//
	m_cbTable = GrabCounts( p + COMP_BLOCK_HDR_SIZE );
	BuildTotals();

	//
	// decoding loop
	//
	ByteReader reader;

	reader.Assign(
		p + m_cbTable + COMP_BLOCK_HDR_SIZE,
		cb - m_cbTable - COMP_BLOCK_HDR_SIZE );
	InitDecoder( reader );

	ARI_SYM  sym;
	DWORD    cbPut = 0;
	while( TRUE )
	{
		// get next character from stream
		GetSymbolScale( &sym );
		DWORD dwCount = GetCurrentCount( sym );
		WORD  wChar   = SymbolToIndex( dwCount, &sym );

		// terminator reached ?
		if ( wChar == END_OF_STREAM )
			break;

		RemoveSymbolFromStream( reader, sym );

		// output the found character
		m_pbyOutBuff[cbPut++] = (BYTE)wChar;
	}

	//
	// init output variables
	//
	pOut   = m_pbyOutBuff;
	cbOut  = cbOrigi;

	return TRUE; // OK
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -