📄 lzmaencoder.cpp
字号:
// LZMA/Encoder.cpp
#include "StdAfx.h"
#include "../../../Common/Defs.h"
#include "../../Common/StreamUtils.h"
#include "LZMAEncoder.h"
// for minimal compressing code size define these:
// #define COMPRESS_MF_BT
// #define COMPRESS_MF_BT4
#if !defined(COMPRESS_MF_BT) && !defined(COMPRESS_MF_HC)
#define COMPRESS_MF_BT
#define COMPRESS_MF_HC
#endif
#ifdef COMPRESS_MF_BT
#if !defined(COMPRESS_MF_BT2) && !defined(COMPRESS_MF_BT3) && !defined(COMPRESS_MF_BT4)
#define COMPRESS_MF_BT2
#define COMPRESS_MF_BT3
#define COMPRESS_MF_BT4
#endif
#ifdef COMPRESS_MF_BT2
#include "../LZ/BinTree/BinTree2.h"
#endif
#ifdef COMPRESS_MF_BT3
#include "../LZ/BinTree/BinTree3.h"
#endif
#ifdef COMPRESS_MF_BT4
#include "../LZ/BinTree/BinTree4.h"
#endif
#endif
#ifdef COMPRESS_MF_HC
#include "../LZ/HashChain/HC4.h"
#endif
#ifdef COMPRESS_MF_MT
#include "../LZ/MT/MT.h"
#endif
namespace NCompress {
namespace NLZMA {
const int kDefaultDictionaryLogSize = 22;
const UInt32 kNumFastBytesDefault = 0x20;
enum
{
kBT2,
kBT3,
kBT4,
kHC4
};
static const wchar_t *kMatchFinderIDs[] =
{
L"BT2",
L"BT3",
L"BT4",
L"HC4"
};
Byte g_FastPos[1 << 11];
class CFastPosInit
{
public:
CFastPosInit() { Init(); }
void Init()
{
const Byte kFastSlots = 22;
int c = 2;
g_FastPos[0] = 0;
g_FastPos[1] = 1;
for (Byte slotFast = 2; slotFast < kFastSlots; slotFast++)
{
UInt32 k = (1 << ((slotFast >> 1) - 1));
for (UInt32 j = 0; j < k; j++, c++)
g_FastPos[c] = slotFast;
}
}
} g_FastPosInit;
void CLiteralEncoder2::Encode(NRangeCoder::CEncoder *rangeEncoder, Byte symbol)
{
UInt32 context = 1;
int i = 8;
do
{
i--;
UInt32 bit = (symbol >> i) & 1;
_encoders[context].Encode(rangeEncoder, bit);
context = (context << 1) | bit;
}
while(i != 0);
}
void CLiteralEncoder2::EncodeMatched(NRangeCoder::CEncoder *rangeEncoder,
Byte matchByte, Byte symbol)
{
UInt32 context = 1;
int i = 8;
do
{
i--;
UInt32 bit = (symbol >> i) & 1;
UInt32 matchBit = (matchByte >> i) & 1;
_encoders[0x100 + (matchBit << 8) + context].Encode(rangeEncoder, bit);
context = (context << 1) | bit;
if (matchBit != bit)
{
while(i != 0)
{
i--;
UInt32 bit = (symbol >> i) & 1;
_encoders[context].Encode(rangeEncoder, bit);
context = (context << 1) | bit;
}
break;
}
}
while(i != 0);
}
UInt32 CLiteralEncoder2::GetPrice(bool matchMode, Byte matchByte, Byte symbol) const
{
UInt32 price = 0;
UInt32 context = 1;
int i = 8;
if (matchMode)
{
do
{
i--;
UInt32 matchBit = (matchByte >> i) & 1;
UInt32 bit = (symbol >> i) & 1;
price += _encoders[0x100 + (matchBit << 8) + context].GetPrice(bit);
context = (context << 1) | bit;
if (matchBit != bit)
break;
}
while (i != 0);
}
while(i != 0)
{
i--;
UInt32 bit = (symbol >> i) & 1;
price += _encoders[context].GetPrice(bit);
context = (context << 1) | bit;
}
return price;
};
namespace NLength {
void CEncoder::Init(UInt32 numPosStates)
{
_choice.Init();
_choice2.Init();
for (UInt32 posState = 0; posState < numPosStates; posState++)
{
_lowCoder[posState].Init();
_midCoder[posState].Init();
}
_highCoder.Init();
}
void CEncoder::Encode(NRangeCoder::CEncoder *rangeEncoder, UInt32 symbol, UInt32 posState)
{
if(symbol < kNumLowSymbols)
{
_choice.Encode(rangeEncoder, 0);
_lowCoder[posState].Encode(rangeEncoder, symbol);
}
else
{
_choice.Encode(rangeEncoder, 1);
if(symbol < kNumLowSymbols + kNumMidSymbols)
{
_choice2.Encode(rangeEncoder, 0);
_midCoder[posState].Encode(rangeEncoder, symbol - kNumLowSymbols);
}
else
{
_choice2.Encode(rangeEncoder, 1);
_highCoder.Encode(rangeEncoder, symbol - kNumLowSymbols - kNumMidSymbols);
}
}
}
void CEncoder::SetPrices(UInt32 posState, UInt32 numSymbols, UInt32 *prices) const
{
UInt32 a0 = _choice.GetPrice0();
UInt32 a1 = _choice.GetPrice1();
UInt32 b0 = a1 + _choice2.GetPrice0();
UInt32 b1 = a1 + _choice2.GetPrice1();
UInt32 i = 0;
for (i = 0; i < kNumLowSymbols; i++)
{
if (i >= numSymbols)
return;
prices[i] = a0 + _lowCoder[posState].GetPrice(i);
}
for (; i < kNumLowSymbols + kNumMidSymbols; i++)
{
if (i >= numSymbols)
return;
prices[i] = b0 + _midCoder[posState].GetPrice(i - kNumLowSymbols);
}
for (; i < numSymbols; i++)
prices[i] = b1 + _highCoder.GetPrice(i - kNumLowSymbols - kNumMidSymbols);
}
}
CEncoder::CEncoder():
_numFastBytes(kNumFastBytesDefault),
_distTableSize(kDefaultDictionaryLogSize * 2),
_posStateBits(2),
_posStateMask(4 - 1),
_numLiteralPosStateBits(0),
_numLiteralContextBits(3),
_dictionarySize(1 << kDefaultDictionaryLogSize),
_dictionarySizePrev(UInt32(-1)),
_numFastBytesPrev(UInt32(-1)),
_matchFinderIndex(kBT4),
#ifdef COMPRESS_MF_MT
_multiThread(false),
#endif
_writeEndMark(false)
{
// _maxMode = false;
_fastMode = false;
}
HRESULT CEncoder::Create()
{
if (!_rangeEncoder.Create(1 << 20))
return E_OUTOFMEMORY;
if (!_matchFinder)
{
switch(_matchFinderIndex)
{
#ifdef COMPRESS_MF_BT
#ifdef COMPRESS_MF_BT2
case kBT2:
_matchFinder = new NBT2::CMatchFinder;
break;
#endif
#ifdef COMPRESS_MF_BT3
case kBT3:
_matchFinder = new NBT3::CMatchFinder;
break;
#endif
#ifdef COMPRESS_MF_BT4
case kBT4:
_matchFinder = new NBT4::CMatchFinder;
break;
#endif
#endif
#ifdef COMPRESS_MF_HC
case kHC4:
_matchFinder = new NHC4::CMatchFinder;
break;
#endif
}
if (_matchFinder == 0)
return E_OUTOFMEMORY;
#ifdef COMPRESS_MF_MT
if (_multiThread && !(_fastMode && (_matchFinderIndex == kHC4)))
{
CMatchFinderMT *mfSpec = new CMatchFinderMT;
if (mfSpec == 0)
return E_OUTOFMEMORY;
CMyComPtr<IMatchFinder> mf = mfSpec;
RINOK(mfSpec->SetMatchFinder(_matchFinder));
_matchFinder.Release();
_matchFinder = mf;
}
#endif
}
if (!_literalEncoder.Create(_numLiteralPosStateBits, _numLiteralContextBits))
return E_OUTOFMEMORY;
if (_dictionarySize == _dictionarySizePrev && _numFastBytesPrev == _numFastBytes)
return S_OK;
RINOK(_matchFinder->Create(_dictionarySize, kNumOpts, _numFastBytes, kMatchMaxLen + 1)); // actually it's + _numFastBytes - _numFastBytes
_dictionarySizePrev = _dictionarySize;
_numFastBytesPrev = _numFastBytes;
return S_OK;
}
static bool AreStringsEqual(const wchar_t *base, const wchar_t *testString)
{
while (true)
{
wchar_t c = *testString;
if (c >= 'a' && c <= 'z')
c -= 0x20;
if (*base != c)
return false;
if (c == 0)
return true;
base++;
testString++;
}
}
static int FindMatchFinder(const wchar_t *s)
{
for (int m = 0; m < (int)(sizeof(kMatchFinderIDs) / sizeof(kMatchFinderIDs[0])); m++)
if (AreStringsEqual(kMatchFinderIDs[m], s))
return m;
return -1;
}
STDMETHODIMP CEncoder::SetCoderProperties(const PROPID *propIDs,
const PROPVARIANT *properties, UInt32 numProperties)
{
for (UInt32 i = 0; i < numProperties; i++)
{
const PROPVARIANT &prop = properties[i];
switch(propIDs[i])
{
case NCoderPropID::kNumFastBytes:
{
if (prop.vt != VT_UI4)
return E_INVALIDARG;
UInt32 numFastBytes = prop.ulVal;
if(numFastBytes < 5 || numFastBytes > kMatchMaxLen)
return E_INVALIDARG;
_numFastBytes = numFastBytes;
break;
}
case NCoderPropID::kAlgorithm:
{
if (prop.vt != VT_UI4)
return E_INVALIDARG;
UInt32 maximize = prop.ulVal;
_fastMode = (maximize == 0);
// _maxMode = (maximize >= 2);
break;
}
case NCoderPropID::kMatchFinder:
{
if (prop.vt != VT_BSTR)
return E_INVALIDARG;
int matchFinderIndexPrev = _matchFinderIndex;
int m = FindMatchFinder(prop.bstrVal);
if (m < 0)
return E_INVALIDARG;
_matchFinderIndex = m;
if (_matchFinder && matchFinderIndexPrev != _matchFinderIndex)
{
_dictionarySizePrev = UInt32(-1);
_matchFinder.Release();
}
break;
}
#ifdef COMPRESS_MF_MT
case NCoderPropID::kMultiThread:
{
if (prop.vt != VT_BOOL)
return E_INVALIDARG;
bool newMultiThread = (prop.boolVal == VARIANT_TRUE);
if (newMultiThread != _multiThread)
{
_dictionarySizePrev = UInt32(-1);
_matchFinder.Release();
}
_multiThread = newMultiThread;
break;
}
#endif
case NCoderPropID::kDictionarySize:
{
const int kDicLogSizeMaxCompress = 30;
if (prop.vt != VT_UI4)
return E_INVALIDARG;
UInt32 dictionarySize = prop.ulVal;
if (dictionarySize < UInt32(1 << kDicLogSizeMin) ||
dictionarySize > UInt32(1 << kDicLogSizeMaxCompress))
return E_INVALIDARG;
_dictionarySize = dictionarySize;
UInt32 dicLogSize;
for(dicLogSize = 0; dicLogSize < (UInt32)kDicLogSizeMaxCompress; dicLogSize++)
if (dictionarySize <= (UInt32(1) << dicLogSize))
break;
_distTableSize = dicLogSize * 2;
break;
}
case NCoderPropID::kPosStateBits:
{
if (prop.vt != VT_UI4)
return E_INVALIDARG;
UInt32 value = prop.ulVal;
if (value > (UInt32)NLength::kNumPosStatesBitsEncodingMax)
return E_INVALIDARG;
_posStateBits = value;
_posStateMask = (1 << _posStateBits) - 1;
break;
}
case NCoderPropID::kLitPosBits:
{
if (prop.vt != VT_UI4)
return E_INVALIDARG;
UInt32 value = prop.ulVal;
if (value > (UInt32)kNumLitPosStatesBitsEncodingMax)
return E_INVALIDARG;
_numLiteralPosStateBits = value;
break;
}
case NCoderPropID::kLitContextBits:
{
if (prop.vt != VT_UI4)
return E_INVALIDARG;
UInt32 value = prop.ulVal;
if (value > (UInt32)kNumLitContextBitsMax)
return E_INVALIDARG;
_numLiteralContextBits = value;
break;
}
case NCoderPropID::kEndMarker:
{
if (prop.vt != VT_BOOL)
return E_INVALIDARG;
SetWriteEndMarkerMode(prop.boolVal == VARIANT_TRUE);
break;
}
default:
return E_INVALIDARG;
}
}
return S_OK;
}
STDMETHODIMP CEncoder::WriteCoderProperties(ISequentialOutStream *outStream)
{
const UInt32 kPropSize = 5;
Byte properties[kPropSize];
properties[0] = (_posStateBits * 5 + _numLiteralPosStateBits) * 9 + _numLiteralContextBits;
for (int i = 0; i < 4; i++)
properties[1 + i] = Byte(_dictionarySize >> (8 * i));
return WriteStream(outStream, properties, kPropSize, NULL);
}
STDMETHODIMP CEncoder::SetOutStream(ISequentialOutStream *outStream)
{
_rangeEncoder.SetStream(outStream);
return S_OK;
}
STDMETHODIMP CEncoder::ReleaseOutStream()
{
_rangeEncoder.ReleaseStream();
return S_OK;
}
HRESULT CEncoder::Init()
{
CBaseState::Init();
// RINOK(_matchFinder->Init(inStream));
_rangeEncoder.Init();
for(int i = 0; i < kNumStates; i++)
{
for (UInt32 j = 0; j <= _posStateMask; j++)
{
_isMatch[i][j].Init();
_isRep0Long[i][j].Init();
}
_isRep[i].Init();
_isRepG0[i].Init();
_isRepG1[i].Init();
_isRepG2[i].Init();
}
_literalEncoder.Init();
{
for(UInt32 i = 0; i < kNumLenToPosStates; i++)
_posSlotEncoder[i].Init();
}
{
for(UInt32 i = 0; i < kNumFullDistances - kEndPosModelIndex; i++)
_posEncoders[i].Init();
}
_lenEncoder.Init(1 << _posStateBits);
_repMatchLenEncoder.Init(1 << _posStateBits);
_posAlignEncoder.Init();
_longestMatchWasFound = false;
_optimumEndIndex = 0;
_optimumCurrentIndex = 0;
_additionalOffset = 0;
return S_OK;
}
HRESULT CEncoder::MovePos(UInt32 num)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -