📄 aricoder.cpp
字号:
#include "AriCoder.h"
#include "CrtDbg.h"
#include "Macros.h"
//
// constants
//
#define END_OF_STREAM 256
#define COMP_BLOCK_HDR_SIZE sizeof( DWORD ) // just the size of the original data
//
// AriCoder class implementation
//
AriCoder::AriCoder()
{
Init();
}
AriCoder::~AriCoder()
{
Dispose();
}
//
// Purpose:
// Perform all internal needed intialization works
//
void AriCoder::Init()
{
ZERO( m_Occurrences )
m_cbTable = 0;
m_pbyOutBuff = NULL;
}
//
// Purpose:
// Perform all class cleanups
//
void AriCoder::Dispose()
{
FreeOutBuff();
}
//
// Purpose:
// Deallocate the output buffer if needed
//
void AriCoder::FreeOutBuff()
{
if ( m_pbyOutBuff )
{
free( m_pbyOutBuff );
m_pbyOutBuff = NULL;
}
}
//
// Purpose:
// Setup the m_Occurrences array concerning to the specified data buffer
//
void AriCoder::CountSymbols( PBYTE p, DWORD cb )
{
DWORD dwSymCnts[ALPHABET_SIZE];
//
// count symbol occurrences
//
ZERO( dwSymCnts )
for ( UINT i = 0; i < cb; i++ )
dwSymCnts[ p[i] ]++;
//
// stuff the DWORD chain into 'm_Occurrences' - scale them down to byte size
// (2 steps)
//
/// 1st step: shrink occurrences down to 1 byte size
// get biggest occurrence
DWORD dwcMax = 0;
for ( UINT u = 0; u < ALPHABET_SIZE; u++ )
dwcMax = max( dwSymCnts[u], dwcMax );
// divide counts - init 'm_Occurrence' chain
DWORD dwScale = (dwcMax / 256) + 1; // calc divider
for ( UINT z = 0; z < ALPHABET_SIZE; z++ )
{
m_Occurrences[z] = (BYTE)( dwSymCnts[z] / dwScale );
if ( dwSymCnts[z] != 0 && m_Occurrences[z] == 0 ) // minimization too extreme ?
m_Occurrences[z] = 1;
}
/// 2nd step: make sure that the total of all shrinked occurrences is less than 16384
DWORD dwTotal = 1, dwScale2;
for ( UINT e = 0; e < ARRAY_ITEMS(m_Occurrences); e++ )
dwTotal += m_Occurrences[e];
if ( dwTotal > ( 32767 - 256 ) )
dwScale2 = 4;
else if ( dwTotal > 16383 )
dwScale2 = 2;
else
return; // we're done - no devision needed
for ( UINT c = 0; c < ALPHABET_SIZE; c++ )
m_Occurrences[c] = (BYTE)( m_Occurrences[c] / dwScale2 );
return;
}
//
// Purpose:
// Construct the ranges
//
// Remarks:
// low of symbol_i = m_Totals[i]
// high of symbol_i = m_Totals[i + 1]
//
void AriCoder::BuildTotals()
{
m_Totals[0] = 0;
for ( UINT i = 0; i < END_OF_STREAM; i++ )
m_Totals[i + 1] = m_Totals[i] + m_Occurrences[i];
m_Totals[END_OF_STREAM + 1] = m_Totals[END_OF_STREAM] + 1;
}
//
// Purpose:
// Count the occurrences in the specified memory buffer and
// construct the rage table
//
void AriCoder::BuildModelFromData( PBYTE p, DWORD cb )
{
CountSymbols( p, cb );
BuildTotals();
}
//
// Purpose:
// Writes the shrinked symbol occurrences into the specified buffer
//
// Returns:
// Size of the symbol count table in bytes
// 0 - error
//
// Remarks:
// The idea for this format is also from Mark Nelson but my code is
// different. It looks like this:
//
// (start)(end)(symbol_counts[start...end]) (start).... (start=0)(end=0) <- terminator
//
// start - starting symbol of chain
// end - ending symbol of chain
//
DWORD AriCoder::EmitCounts( PBYTE p, DWORD cb )
{
DWORD cbEmitted = 0;
DWORD dwcZeros;
// find the first non-zero count
UINT iCur = 0, iEnd;
while( iCur < 255 && m_Occurrences[iCur] == 0 )
iCur++;
//
// main loop
//
iEnd = iCur + 1;
while( iCur < 256 )
{
dwcZeros = 0;
for ( UINT u = iEnd; u < 256 && m_Occurrences[u] == 0; u++ )
dwcZeros++;
if ( dwcZeros > 3 || (iEnd+dwcZeros) == 256 )
{
DWORD dwcCounts = (iEnd - iCur);
if ( (dwcCounts+2) + cbEmitted > cb )
return 0; // ERR - not enough space in buffer
// emit current run
p[cbEmitted++] = iCur;
p[cbEmitted++] = iEnd - 1;
for ( UINT i = iCur; i < iEnd; i++ )
p[cbEmitted++] = m_Occurrences[i];
// reset index vars
iCur = iEnd + dwcZeros;
iEnd = iCur; // will be increased at end of loop
}
// update sliding vars
++iEnd;
}
// emit terminator
if ( cbEmitted + 2 > cb )
return 0; // ERR - not enough space in buffer
p[cbEmitted++] = 0xFF;
p[cbEmitted++] = 0x00;
return cbEmitted; // OK
}
//
// Purpose:
// Read out a symbol count table, emitted by AriCoder::EmitCounts, from
// a certain memory location.
//
// Returns:
// Size of the symbol count table in bytes
//
DWORD AriCoder::GrabCounts( PBYTE p )
{
DWORD cbRead = 0;
// zero-init table
ZERO( m_Occurrences )
//
// main loop
//
while ( TRUE )
{
// rip start/end indices
UINT iStart = p[cbRead++];
UINT iEnd = p[cbRead++];
// terminator reached ?
if ( iEnd < iStart )
break;
// transfer: table counts -> m_Occurrences
for( UINT i = iStart; i <= iEnd; i++ )
m_Occurrences[i] = p[cbRead++];
}
return cbRead; // OK
}
//
// Purpose:
// Adjust variables for encoding
//
void AriCoder::InitEncoder()
{
this->m_wLow = 0;
this->m_wHigh = MAXWORD;
this->m_cUnderflowBits = 0;
}
//
// Purpose:
// Adjust variables for decoding
//
void AriCoder::InitDecoder( ByteReader &reader )
{
this->m_wLow = 0;
this->m_wHigh = MAXWORD;
this->m_cUnderflowBits = 0;
// read the first word form the input
m_wCode = 0;
for( UINT i = 0; i < 16; i++ )
{
m_wCode <<= 1;
m_wCode |= reader.ReadBit();
}
}
//
// Purpose:
// Receiving low, high and scale of a symbol
//
void AriCoder::GetSymbolInfo( WORD symbol, ARI_SYM &s )
{
s.low = m_Totals[symbol];
s.high = m_Totals[symbol + 1];
s.scale = m_Totals[END_OF_STREAM + 1];
}
//
// Purpose:
// Output the specified symbol by adjusting m_wCode/m_wHigh/m_wLow to it
// and eventually output some bits
//
void AriCoder::EncodeSymbol( ByteWriter &writer, ARI_SYM sym )
{
DWORD range = (DWORD)(m_wHigh - m_wLow) + 1;
// rescale high/low for the new symbol
m_wHigh = m_wLow + (WORD)((range * sym.high) / sym.scale - 1);
m_wLow = m_wLow + (WORD)((range * sym.low ) / sym.scale );
// turns out new bits until high and low are far enough
while( TRUE )
{
if ((m_wHigh & 0x8000) == (m_wLow & 0x8000)) // if MSBs of high/low match -> output bits
{
BOOL b = writer.WriteBit( m_wHigh & 0x8000 );
while ( m_cUnderflowBits > 0 )
{
b &= writer.WriteBit( ~m_wHigh & 0x8000 ); // in danger of underflow ?
m_cUnderflowBits--;
}
_ASSERT( b );
}
else if ((m_wLow & 0x4000) && !(m_wHigh & 0x4000))
{
// shift out 2nd MSB
m_cUnderflowBits += 1;
m_wLow &= 0x3fff;
m_wHigh |= 0x4000;
}
else
return;
// shift low & high
m_wLow <<= 1;
m_wHigh <<= 1;
m_wHigh |= 1;
}
}
//
// Purpose:
// Must be called at the end of the compression to
// process the underflow bits
//
void AriCoder::FlushEncoder( ByteWriter &writer )
{
BOOL b = writer.WriteBit( m_wLow & 0x4000 );
m_cUnderflowBits++;
while ( m_cUnderflowBits-- > 0 )
b &= writer.WriteBit( ~m_wLow & 0x4000 );
_ASSERT( b );
}
//
// Purpose:
// Root routine to compress a certain data block
//
// Args:
// pOut - returns a pointer to the compressed data; MUST be freed with free()
// cbOut - size of compressed data
//
BOOL AriCoder::CompressData( PBYTE p, DWORD cb, PBYTE &pOut, DWORD &cbOut )
{
BOOL b;
FreeOutBuff();
//
// build range table
//
BuildModelFromData( p, cb );
//
// allocate memory
//
m_cbOutBuff = (DWORD)(cb * 1.2) + 300;
m_pbyOutBuff = (PBYTE)malloc( m_cbOutBuff );
if ( !m_pbyOutBuff )
return FALSE; // ERR
//
// emit the uncompressed data size
//
*(PDWORD)m_pbyOutBuff = cb;
//
// emit shrinked count table
//
m_cbTable = EmitCounts(
m_pbyOutBuff + COMP_BLOCK_HDR_SIZE,
m_cbOutBuff - COMP_BLOCK_HDR_SIZE );
//
// encoding loop
//
ByteWriter writer;
ARI_SYM sym;
InitEncoder();
writer.Assign(
m_pbyOutBuff + m_cbTable + COMP_BLOCK_HDR_SIZE,
m_cbOutBuff - m_cbTable - COMP_BLOCK_HDR_SIZE);
//
// encoding loop
//
for( UINT i = 0; i < cb; i++ )
{
GetSymbolInfo( p[i], sym );
EncodeSymbol( writer, sym );
}
// put the EndOfStream symbol
GetSymbolInfo( END_OF_STREAM, sym );
EncodeSymbol( writer, sym );
// flush encoder + shut down all
FlushEncoder( writer );
b = writer.WriteBits( 0L, 16 );
_ASSERT( b );
writer.FinishWriting();
//
// set routine reference variables
//
pOut = m_pbyOutBuff;
cbOut = writer.m_cbWritten + COMP_BLOCK_HDR_SIZE + m_cbTable;
return TRUE; // OK
}
//
// Remarks:
// Decompression only
//
void AriCoder::GetSymbolScale( PARI_SYM pSym )
{
pSym->scale = m_Totals[ END_OF_STREAM + 1 ];
}
//
// Remarks:
// Decompression only
//
DWORD AriCoder::GetCurrentCount( ARI_SYM s )
{
LONG lRange = (LONG)( this->m_wHigh - this->m_wLow ) + 1 ;
WORD lCount = (WORD)( ( ((LONG)( m_wCode - m_wLow ) + 1 ) * s.scale-1 ) / lRange );
return lCount;
}
//
// Purpose:
// Returns the index into the m_Totals chain corresponding
// to the specified dwCount argument
//
// Remarks:
// Decompression only
//
UINT AriCoder::SymbolToIndex( DWORD dwCount, PARI_SYM sym )
{
for ( UINT z = END_OF_STREAM; dwCount < m_Totals[z]; z-- )
;
sym->high = m_Totals[ z + 1 ];
sym->low = m_Totals[ z ];
return z;
}
//
// Purpose:
// Remove the symbol from high/low/code and input eventually
// some new bits
//
// Remarks:
// Decompression only
//
void AriCoder::RemoveSymbolFromStream( ByteReader &reader, ARI_SYM sym )
{
DWORD dwRange;
dwRange = (long)( m_wHigh - m_wLow ) + 1;
m_wHigh = m_wLow + (unsigned short int)
(( dwRange * sym.high ) / sym.scale - 1 );
m_wLow = m_wLow + (unsigned short int)
(( dwRange * sym.low ) / sym.scale );
while( TRUE )
{
if ( (m_wHigh & 0x8000) == (m_wLow & 0x8000) )
{}
else if ((m_wLow & 0x4000) == 0x4000 && (m_wHigh & 0x4000) == 0) // in danger of underflow ?
{
// shift out 2nd MSB
m_wCode ^= 0x4000;
m_wLow &= 0x3fff;
m_wHigh |= 0x4000;
}
else
return;
m_wLow <<= 1;
m_wHigh <<= 1;
m_wHigh |= 1;
m_wCode <<= 1;
m_wCode += reader.ReadBit();
}
}
//
// Purpose:
// Root routine to compress a certain data block
//
// Args:
// pOut - returns a pointer to the compressed data; MUST be freed with free()
// cbOut - size of compressed data
//
BOOL AriCoder::DecompressData( PBYTE p, DWORD cb, PBYTE &pOut, DWORD &cbOut )
{
FreeOutBuff();
//
// receive uncompressed data size
//
DWORD cbOrigi = *(PDWORD)p;
//
// receive some space in the winslows swapfile FNA alloc mem
//
m_cbOutBuff = cbOrigi;
m_pbyOutBuff = (PBYTE)malloc( cbOrigi );
if ( !m_pbyOutBuff )
return FALSE; // ERR
//
// receive symbol counts, build model
//
m_cbTable = GrabCounts( p + COMP_BLOCK_HDR_SIZE );
BuildTotals();
//
// decoding loop
//
ByteReader reader;
reader.Assign(
p + m_cbTable + COMP_BLOCK_HDR_SIZE,
cb - m_cbTable - COMP_BLOCK_HDR_SIZE );
InitDecoder( reader );
ARI_SYM sym;
DWORD cbPut = 0;
while( TRUE )
{
// get next character from stream
GetSymbolScale( &sym );
DWORD dwCount = GetCurrentCount( sym );
WORD wChar = SymbolToIndex( dwCount, &sym );
// terminator reached ?
if ( wChar == END_OF_STREAM )
break;
RemoveSymbolFromStream( reader, sym );
// output the found character
m_pbyOutBuff[cbPut++] = (BYTE)wChar;
}
//
// init output variables
//
pOut = m_pbyOutBuff;
cbOut = cbOrigi;
return TRUE; // OK
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -