📄 bzip2encoder.cpp
字号:
// BZip2Encoder.cpp
#include "StdAfx.h"
#include "BZip2Encoder.h"
#include "../../../Common/Alloc.h"
#include "../BWT/Mtf8.h"
#include "BZip2CRC.h"
namespace NCompress {
namespace NBZip2 {
static const UInt32 kBufferSize = (1 << 17);
static const int kNumHuffPasses = 4;
CEncoder::CEncoder():
m_Block(0),
m_NeedHuffmanCreate(true),
m_NumPasses(1),
m_OptimizeNumTables(false),
m_BlockSizeMult(kBlockSizeMultMax)
{}
CEncoder::~CEncoder()
{
::BigFree(m_Block);
}
UInt32 CEncoder::ReadRleBlock(Byte *buffer)
{
UInt32 i = 0;
Byte prevByte;
if (m_InStream.ReadByte(prevByte))
{
UInt32 blockSize = m_BlockSizeMult * kBlockSizeStep - 1;
int numReps = 1;
buffer[i++] = prevByte;
while (i < blockSize) // "- 1" to support RLE
{
Byte b;
if (!m_InStream.ReadByte(b))
break;
if (b != prevByte)
{
if (numReps >= kRleModeRepSize)
buffer[i++] = numReps - kRleModeRepSize;
buffer[i++] = b;
numReps = 1;
prevByte = b;
continue;
}
numReps++;
if (numReps <= kRleModeRepSize)
buffer[i++] = b;
else if (numReps == kRleModeRepSize + 255)
{
buffer[i++] = numReps - kRleModeRepSize;
numReps = 0;
}
}
// it's to support original BZip2 decoder
if (numReps >= kRleModeRepSize)
buffer[i++] = numReps - kRleModeRepSize;
}
return i;
}
void CEncoder::WriteBits2(UInt32 value, UInt32 numBits)
{ m_OutStreamCurrent->WriteBits(value, numBits); }
void CEncoder::WriteByte2(Byte b) { WriteBits2(b , 8); }
void CEncoder::WriteBit2(bool v) { WriteBits2((v ? 1 : 0), 1); }
void CEncoder::WriteCRC2(UInt32 v)
{
for (int i = 0; i < 4; i++)
WriteByte2(((Byte)(v >> (24 - i * 8))));
}
void CEncoder::WriteBits(UInt32 value, UInt32 numBits)
{ m_OutStream.WriteBits(value, numBits); }
void CEncoder::WriteByte(Byte b) { WriteBits(b , 8); }
void CEncoder::WriteBit(bool v) { WriteBits((v ? 1 : 0), 1); }
void CEncoder::WriteCRC(UInt32 v)
{
for (int i = 0; i < 4; i++)
WriteByte(((Byte)(v >> (24 - i * 8))));
}
// blockSize > 0
void CEncoder::EncodeBlock(Byte *block, UInt32 blockSize)
{
WriteBit2(false); // Randomised = false
{
UInt32 origPtr = m_BlockSorter.Sort(block, blockSize);
WriteBits2(origPtr, kNumOrigBits);
}
CMtf8Encoder mtf;
int numInUse = 0;
{
bool inUse[256];
bool inUse16[16];
UInt32 i;
for (i = 0; i < 256; i++)
inUse[i] = false;
for (i = 0; i < 16; i++)
inUse16[i] = false;
for (i = 0; i < blockSize; i++)
inUse[block[i]] = true;
for (i = 0; i < 256; i++)
if (inUse[i])
{
inUse16[i >> 4] = true;
mtf.Buffer[numInUse++] = (Byte)i;
}
for (i = 0; i < 16; i++)
WriteBit2(inUse16[i]);
for (i = 0; i < 256; i++)
if (inUse16[i >> 4])
WriteBit2(inUse[i]);
}
int alphaSize = numInUse + 2;
Byte *mtfs = m_MtfArray;
UInt32 mtfArraySize = 0;
UInt32 symbolCounts[kMaxAlphaSize];
{
for (int i = 0; i < kMaxAlphaSize; i++)
symbolCounts[i] = 0;
}
{
UInt32 rleSize = 0;
UInt32 i = 0;
do
{
UInt32 index = m_BlockSorter.Indices[i];
if (index == 0)
index = blockSize - 1;
else
index--;
int pos = mtf.FindAndMove(block[index]);
if (pos == 0)
rleSize++;
else
{
while (rleSize != 0)
{
rleSize--;
mtfs[mtfArraySize++] = (rleSize & 1);
symbolCounts[rleSize & 1]++;
rleSize >>= 1;
}
if (pos >= 0xFE)
{
mtfs[mtfArraySize++] = 0xFF;
mtfs[mtfArraySize++] = pos - 0xFE;
}
else
mtfs[mtfArraySize++] = pos + 1;
symbolCounts[pos + 1]++;
}
}
while (++i < blockSize);
while (rleSize != 0)
{
rleSize--;
mtfs[mtfArraySize++] = (rleSize & 1);
symbolCounts[rleSize & 1]++;
rleSize >>= 1;
}
if (alphaSize < 256)
mtfs[mtfArraySize++] = (Byte)(alphaSize - 1);
else
{
mtfs[mtfArraySize++] = 0xFF;
mtfs[mtfArraySize++] = (Byte)(alphaSize - 256);
}
symbolCounts[alphaSize - 1]++;
}
UInt32 numSymbols = 0;
{
for (int i = 0; i < kMaxAlphaSize; i++)
numSymbols += symbolCounts[i];
}
int bestNumTables = kNumTablesMin;
UInt32 bestPrice = 0xFFFFFFFF;
UInt32 startPos = m_OutStreamCurrent->GetPos();
UInt32 startCurByte = m_OutStreamCurrent->GetCurByte();
for (int nt = kNumTablesMin; nt <= kNumTablesMax + 1; nt++)
{
int numTables;
if(m_OptimizeNumTables)
{
m_OutStreamCurrent->SetPos(startPos);
m_OutStreamCurrent->SetCurState((startPos & 7), startCurByte);
if (nt <= kNumTablesMax)
numTables = nt;
else
numTables = bestNumTables;
}
else
{
if (numSymbols < 200) numTables = 2;
else if (numSymbols < 600) numTables = 3;
else if (numSymbols < 1200) numTables = 4;
else if (numSymbols < 2400) numTables = 5;
else numTables = 6;
}
WriteBits2(numTables, kNumTablesBits);
UInt32 numSelectors = (numSymbols + kGroupSize - 1) / kGroupSize;
WriteBits2(numSelectors, kNumSelectorsBits);
{
UInt32 remFreq = numSymbols;
int gs = 0;
int t = numTables;
do
{
UInt32 tFreq = remFreq / t;
int ge = gs;
UInt32 aFreq = 0;
while (aFreq < tFreq) // && ge < alphaSize)
aFreq += symbolCounts[ge++];
if (ge - 1 > gs && t != numTables && t != 1 && (((numTables - t) & 1) == 1))
aFreq -= symbolCounts[--ge];
NCompression::NHuffman::CEncoder &huffEncoder = m_HuffEncoders[t - 1];
int i = 0;
do
huffEncoder.m_Items[i].Len = (i >= gs && i < ge) ? 0 : 1;
while (++i < alphaSize);
gs = ge;
remFreq -= aFreq;
}
while(--t != 0);
}
for (int pass = 0; pass < kNumHuffPasses; pass++)
{
{
int t = 0;
do
m_HuffEncoders[t].StartNewBlock();
while(++t < numTables);
}
{
UInt32 mtfPos = 0;
UInt32 g = 0;
do
{
UInt32 symbols[kGroupSize];
int i = 0;
do
{
UInt32 symbol = mtfs[mtfPos++];
if (symbol >= 0xFF)
symbol += mtfs[mtfPos++];
symbols[i] = symbol;
}
while (++i < kGroupSize && mtfPos < mtfArraySize);
UInt32 bestPrice = 0xFFFFFFFF;
int t = 0;
do
{
NCompression::NHuffman::CItem *items = m_HuffEncoders[t].m_Items;
UInt32 price = 0;
int j = 0;
do
price += items[symbols[j]].Len;
while (++j < i);
if (price < bestPrice)
{
m_Selectors[g] = (Byte)t;
bestPrice = price;
}
}
while(++t < numTables);
NCompression::NHuffman::CEncoder &huffEncoder = m_HuffEncoders[m_Selectors[g++]];
int j = 0;
do
huffEncoder.AddSymbol(symbols[j]);
while (++j < i);
}
while (mtfPos < mtfArraySize);
}
int t = 0;
do
{
NCompression::NHuffman::CEncoder &huffEncoder = m_HuffEncoders[t];
int i = 0;
do
if (huffEncoder.m_Items[i].Freq == 0)
huffEncoder.m_Items[i].Freq = 1;
while(++i < alphaSize);
Byte levels[kMaxAlphaSize];
huffEncoder.BuildTree(levels);
}
while(++t < numTables);
}
{
Byte mtfSel[kNumTablesMax];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -