📄 ppmz2.c
字号:
//-{------------------------------------------------------------------------
#include "ppmz2.h"
#include <crblib/arithc.h>
#include <crblib/tsc.h>
#include <stdio.h>
#include "ppmdet.h"
#include "ContextTrie.h"
#include "context.h"
#include "exclude.h"
#include "minusone.h"
#include "config.h"
#include "codecntx.h"
#include "loe.h"
#define DO_STATS
typedef struct
{
ContextTrie * ContextTrie;
arithInfo * ari;
Exclude * exclude;
See * see;
PPMDet * det;
} PPMZ2;
PPMZ2 * PPMZ2_Create(void)
{
PPMZ2 * ppz;
uint alphabets[PPMZ2_Order];
ppz = new(*ppz);
assert(ppz);
alphabets[0] = alphabets[1] = alphabets[2] = alphabets[3] = alphabets[4] = 256;
alphabets[5] = 256*256*256;
alphabets[6] = alphabets[7] = UINT_MAX;
ppz->ContextTrie = ContextTrie_Create(alphabets,PPMZ2_Order,PPMZ2_TrieMegs);
assert(ppz->ContextTrie);
ppz->ari = arithInit();
ppz->exclude = Exclude_Create(256);
ppz->see = See_Create();
ppz->det = PPMDet_Create(PPMZ2_DetMegs);
return ppz;
}
void PPMZ2_Destroy(PPMZ2 * ppz)
{
Exclude_Destroy(ppz->exclude);
arithFree(ppz->ari);
See_Destroy(ppz->see);
// doing this is much faster, but ruins any encapsulation
// we might have had : eg. we destroy the context ContextTrie for ALL ppz objects
ContextTrie_Destroy(ppz->ContextTrie);
Context_DestroyAllContexts();
PPMDet_Destroy(ppz->det);
destroy(ppz);
}
//-}{------------------------------------------------------------------------
// Code Array Funcstions
uint PPMZ2_EncodeArray(ubyte *rawBuf,uint rawLen,ubyte *compBuf)
{
PPMZ2 * ppz;
arithInfo * ari;
uint compLen;
int i;
ubyte *rawPtr,*rawBufEnd;
#ifdef DO_STATS
int numChoseLOE[PPMZ2_Order+1],
numTriedByOrder[PPMZ2_Order+1],
numCodedByOrder[PPMZ2_Order+1];
int numCodedDet=0;
#endif
ppz = PPMZ2_Create();
rawPtr = rawBuf;
rawBufEnd = rawBuf + rawLen;
pushTSC();
// seed a preamble
memcpy(compBuf,rawPtr,PPMZ2_SeedBytes);
assert( PPMZ2_SeedBytes > 0 );
memset(rawPtr - PPMZ2_MaxContextLen,PPMZ2_SeedByte,PPMZ2_MaxContextLen);
rawPtr += PPMZ2_SeedBytes;
ari = ppz->ari;
arithEncodeInitNoStuff(ari,compBuf + PPMZ2_SeedBytes);
#ifdef DO_STATS
memset(numChoseLOE,0,(PPMZ2_Order+1)*sizeof(int));
memset(numTriedByOrder,0,(PPMZ2_Order+1)*sizeof(int));
memset(numCodedByOrder,0,(PPMZ2_Order+1)*sizeof(int));
#endif
while(rawPtr < rawBufEnd)
{
ulong cntx;
ulong tindeces[PPMZ2_Order];
Context * contexts[PPMZ2_Order+1];
int sym,LOE_Order,order,codedOrder;
bool useFull;
// reset exclusions
Exclude_Clear(ppz->exclude);
sym = *rawPtr;
// build the forward context
cntx = getulong(rawPtr-4);
// the context structure is determined by these indeces;
// we do 1-2-3-4-5-8-12-16 , all with the ContextTrie
tindeces[0] = rawPtr[-1]; tindeces[1] = rawPtr[-2];
tindeces[2] = rawPtr[-3]; tindeces[3] = rawPtr[-4];
tindeces[4] = rawPtr[-5];
tindeces[5] = rawPtr[-6] + (rawPtr[-7]<<8) + (rawPtr[-8]<<16);
tindeces[6] = getulong(rawPtr-12);
tindeces[7] = getulong(rawPtr-16);
// get all the contexts from the ContextTrie
// we must do this before the det, cuz the det points out of
// the top context node (we don't need to do this in "follow" mode)
ContextTrie_GetNodes(ppz->ContextTrie,tindeces,contexts);
// notez : including Det in the LOE helps compression on text-like files
// (paper1,bib) but hurts on LZ-like files (trans,progs)
// since it hurts speed so much, we don't do it
useFull = true;
if ( ! PPMDet_Encode(ppz->det,ari,rawPtr,rawBuf,sym,ppz->exclude,contexts[PPMZ2_Order],&useFull) )
{
// do the initial LOE to pick a start order
LOE_Order = LOE_ChooseOrder(contexts,cntx,PPMZ2_Order,ppz->exclude,ppz->see,useFull);
#ifdef DO_STATS
numChoseLOE[LOE_Order] ++;
#endif
// go down the orders
for(order=LOE_Order;;)
{
// try to coder from order
#ifdef DO_STATS
numTriedByOrder[order] ++;
#endif
if ( CodeContext_EncodeFromContext(ari,contexts[order],ppz->exclude,ppz->see,cntx,sym,&useFull) )
{
#ifdef DO_STATS
numCodedByOrder[order] ++;
#endif
goto encoded;
}
if ( order == 0 )
break;
// maybe skip down a few :
order = LOE_ChooseOrder(contexts,cntx,order-1,ppz->exclude,ppz->see,useFull);
}
// encode raw with order -1
if ( PPMZ2_TextMode )
encodeOrderMinusOneText(sym,ari,ppz->exclude);
else
encodeOrderMinusOne(sym,256,ari,ppz->exclude);
// did encode, now update the stats
encoded:
codedOrder = max(order,0);
for(order=0;order<=PPMZ2_Order;order++)
{
Context_Update(contexts[order],sym,cntx,ppz->see,codedOrder);
}
}
else
{
#ifdef DO_STATS
numCodedDet++;
#endif
}
PPMDet_Update(ppz->det,rawPtr,rawBuf,sym);
rawPtr++;
i = rawPtr - rawBuf;
if ( i % PPMZ2_PrintfInterval == 0 )
{
fprintf(stderr,"%d/%d\r",i,rawLen);
fflush(stderr);
}
}
fprintf(stderr,"%d/%d\n",rawLen,rawLen);
showPopTSCper(stdout,"encode",rawLen,"byte");
compLen = arithEncodeDone(ari) + PPMZ2_SeedBytes;
PPMZ2_Destroy(ppz);
// the arithc has stuffed 1 byte; put it back
compBuf[PPMZ2_SeedBytes-1] = rawBuf[PPMZ2_SeedBytes-1];
#ifdef DO_STATS
printf("o : %7s : %7s : %7s\n","loe","tried","coded");
printf("d : %7d : %7d : %7d\n",
rawLen,rawLen,numCodedDet);
for(i=PPMZ2_Order;i>=0;i--)
{
printf("%d : %7d : %7d : %7d\n",
i,numChoseLOE[i],numTriedByOrder[i],numCodedByOrder[i]);
}
#endif
return compLen;
}
bool PPMZ2_DecodeArray(ubyte *rawBuf,uint rawLen,ubyte *compBuf)
{
PPMZ2 * ppz;
arithInfo * ari;
int i;
ubyte *rawBufEnd,*rawPtr;
ppz = PPMZ2_Create();
rawPtr = rawBuf;
rawBufEnd = rawBuf + rawLen;
memcpy(rawPtr,compBuf,PPMZ2_SeedBytes);
memset(rawPtr - PPMZ2_MaxContextLen,PPMZ2_SeedByte,PPMZ2_MaxContextLen);
rawPtr += PPMZ2_SeedBytes;
compBuf += PPMZ2_SeedBytes;
ari = ppz->ari;
arithDecodeInitNoStuff(ari,compBuf);
pushTSC();
while(rawPtr < rawBufEnd)
{
ulong cntx;
ulong tindeces[PPMZ2_Order];
Context * contexts[PPMZ2_Order+1];
int sym,LOE_Order,order,codedOrder;
bool useFull;
Exclude_Clear(ppz->exclude);
// build the forward context
cntx = getulong(rawPtr-4);
tindeces[0] = rawPtr[-1]; tindeces[1] = rawPtr[-2];
tindeces[2] = rawPtr[-3]; tindeces[3] = rawPtr[-4];
tindeces[4] = rawPtr[-5];
tindeces[5] = rawPtr[-6] + (rawPtr[-7]<<8) + (rawPtr[-8]<<16);
tindeces[6] = getulong(rawPtr-12);
tindeces[7] = getulong(rawPtr-16);
// get all the contexts from the ContextTrie
ContextTrie_GetNodes(ppz->ContextTrie,tindeces,contexts);
useFull = true;
if ( ! PPMDet_Decode(ppz->det,ari,rawPtr,rawBuf,&sym,ppz->exclude,contexts[PPMZ2_Order],&useFull) )
{
// do the initial LOE to pick a start order
LOE_Order = LOE_ChooseOrder(contexts,cntx,PPMZ2_Order,ppz->exclude,ppz->see,useFull);
// go down the orders
for(order=LOE_Order;;)
{
// try to coder from order
if ( CodeContext_DecodeFromContext(ari,contexts[order],ppz->exclude,ppz->see,cntx,&sym,&useFull) )
{
goto decoded;
}
if ( order == 0 )
break;
// maybe skip down a few :
order = LOE_ChooseOrder(contexts,cntx,order-1,ppz->exclude,ppz->see,useFull);
}
// decode raw with order -1
if ( PPMZ2_TextMode )
sym = decodeOrderMinusOneText(ari,ppz->exclude);
else
sym = decodeOrderMinusOne(256,ari,ppz->exclude);
// did decode, now update the stats
decoded:
codedOrder = max(order,0);
for(order=0;order<=PPMZ2_Order;order++)
{
Context_Update(contexts[order],sym,cntx,ppz->see,codedOrder);
}
}
PPMDet_Update(ppz->det,rawPtr,rawBuf,sym);
*rawPtr++ = sym;
i = rawPtr - rawBuf;
if ( i % PPMZ2_PrintfInterval == 0 )
{
fprintf(stderr,"%d/%d\r",i,rawLen);
fflush(stderr);
}
}
fprintf(stderr,"%d/%d\n",rawLen,rawLen);
showPopTSCper(stdout,"decode",rawLen,"byte");
arithDecodeDone(ari);
PPMZ2_Destroy(ppz);
return true;
}
//-}------------------------------------------------------------------------
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -