📄 ppmdet.c
字号:
//-{-----------------------------------------------------------------------
#include "ppmdet.h"
#include "ppmzesc.h"
#include <crblib/inc.h>
#include <crblib/mempool.h>
#include <crblib/intmath.h>
#include <crblib/list.h>
#include "config.h"
/*******
@@ LRU the contexts!
-------------
with the matchPtr method :
trans : 93695 -> 14209 = 1.213 bpb
with the fast followNode method
currently -> 14213 , 1.214 bpb
*********/
//-}{----------------------------------------------------------------------
#define PPMDet_MaxMatchLen (1024)
#define PPMDet_MaxNodeWalk (100)
#define NodeArraySize (1<<18) // a 256k window
#define DO_ADDNODE_ON_SUCCESS
#define HASH_MASK (0xFFFF)
#define HASH_SIZE (HASH_MASK + 1 )
#define HASH(x) ((((x)>>11) ^ (x) )&HASH_MASK)
//-}{----------------------------------------------------------------------
typedef struct PPMDetNode PPMDetNode;
typedef struct PPMDetContext PPMDetContext;
struct PPMDetNode
{
LinkNode LN; // must be at head
uword detMinLen,sym;
ubyte * backPtrMinus13;
};
struct PPMDetContext
{
LinkNode Nodes;
uint match,escape;
};
struct PPMDet
{
MemPool * PPMDetContextPool;
PPMZEsc *zesc;
PPMDetNode NodeArray[NodeArraySize+1];
uint NodeArrayI;
// stuff saved from Enc/Dec to Update :
Context * gotContext;
PPMDetContext * gotDContext;
PPMDetNode *gotNode,*nextNode;
uint gotML,longestML;
};
PPMDetNode * PPMDet_GetNode(PPMDet * det,PPMDetContext *c,ubyte * backPtr,ubyte *backBase);
PPMDetNode * PPMDet_AddNodeToContext(PPMDet * det,Context * c,int sym,ubyte * backPtr,uint minLen);
static int matchBack(ubyte *p1,ubyte *p2,ubyte *backBase);
//-}{----------------------------------------------------------------------
PPMDet * PPMDet_Create(uint UseMegs)
{
PPMDet * det;
int i;
det = new(PPMDet);
det->PPMDetContextPool = MemPool_Create(sizeof(PPMDetContext),1024,256);
det->zesc = PPMZEsc_Create();
det->NodeArrayI = 0;
for(i=0;i<NodeArraySize;i++)
{
LN_Null( &(det->NodeArray[i]) );
}
return det;
}
void PPMDet_Destroy(PPMDet * det)
{
assert(det);
MemPool_Destroy(det->PPMDetContextPool);
PPMZEsc_Destroy(det->zesc);
destroy(det);
}
void PPMDet_Update(PPMDet * det,ubyte * backPtr,ubyte *backBase,int sym)
{
PPMDetNode * n;
det->nextNode = NULL;
// remembered from the last encode/decode call:
n = det->gotNode;
assert( det->gotContext );
if ( n )
{
assert(det->gotDContext);
if ( n->sym == sym )
{
uint i;
det->gotDContext->match ++;
i = ((ulong)n) - (ulong)(det->NodeArray);
if ( i == (NodeArraySize-1)*sizeof(PPMDetNode) )
det->nextNode = det->NodeArray;
else
det->nextNode = n + 1;
// nextNode may not be filled out yet until we do AddNode below :
}
else
{
det->gotDContext->escape ++;
assert( det->gotML >= n->detMinLen );
n->detMinLen = det->gotML + PPMDet_MinLen_Inc;
}
}
PPMDet_AddNodeToContext(det,det->gotContext,sym,backPtr,det->longestML + 1);
}
static void PPMDet_DoMatch(PPMDet * det,ubyte * backPtr,ubyte *backBase,Context *context)
{
det->gotContext = context;
if ( det->nextNode )
{
det->gotDContext = (PPMDetContext *)(context->child);
if ( det->gotDContext )
{
det->gotNode = det->nextNode;
det->gotML ++;
det->longestML = max(det->longestML,det->gotML);
if ( det->gotML >= 64 )
{
// force it to accept this match
det->gotNode->detMinLen = min(det->gotNode->detMinLen,det->gotML);
}
else
{
if ( det->gotML < det->gotNode->detMinLen )
{
PPMDet_GetNode(det,(PPMDetContext *)(context->child),backPtr,backBase);
}
}
}
else
{
PPMDet_GetNode(det,(PPMDetContext *)(context->child),backPtr,backBase);
}
}
else
{
det->gotDContext = NULL;
det->gotNode = NULL;
PPMDet_GetNode(det,(PPMDetContext *)(context->child),backPtr,backBase);
}
}
uint PPMDet_GetMPSP(PPMDet * det,ubyte * backPtr,ubyte *backBase,Context *context)
{
int esc,tot,count;
PPMDet_DoMatch(det,backPtr,backBase,context);
if ( ! det->gotNode )
return 0;
count = det->gotDContext->match;
PPMZEsc_GetStats(det->zesc,&esc,&tot, getulong(backPtr-4), 1, count, 8, context->upex.numSyms);
return ((tot - esc)<<PPMZ2_IntProb_Shift)/tot;
}
bool PPMDet_Encode(PPMDet * det,arithInfo * ari,ubyte * backPtr,ubyte *backBase,int sym,
Exclude *exc,Context *context,bool * pUseFull)
{
bool match;
int count,pred;
assert( *pUseFull == true );
PPMDet_DoMatch(det,backPtr,backBase,context);
if ( ! det->gotNode )
return false;
count = det->gotDContext->match;
pred = det->gotNode->sym;
if ( det->gotML >= 64 )
count = 99999;
assert( Exclude_IsEmpty(exc) );
match = ( sym == pred ) ? true : false;
*pUseFull = false; // @@ do this?
// @@ use upex or full for numsyms ?
PPMZEsc_Encode(det->zesc,ari, getulong(backPtr-4), 1, count, 8, context->upex.numSyms, !match);
Exclude_Set(exc,pred);
return match;
}
bool PPMDet_Decode(PPMDet * det,arithInfo * ari,ubyte * backPtr,ubyte *backBase,int *psym,
Exclude *exc,Context *context,bool * pUseFull)
{
bool match;
int count,pred;
assert( *pUseFull == true );
PPMDet_DoMatch(det,backPtr,backBase,context);
if ( ! det->gotNode )
return false;
count = det->gotDContext->match;
pred = det->gotNode->sym;
if ( det->gotML >= 64 )
count = 99999;
assert( Exclude_IsEmpty(exc) );
*pUseFull = false; // @@ do this?
match = ! PPMZEsc_Decode(det->zesc,ari, getulong(backPtr-4), 1, count, 8, context->upex.numSyms);
Exclude_Set(exc,pred);
*psym = pred;
return match;
}
//-}{---- GetNode -------------------------------------------------
static int matchBack(ubyte *p1,ubyte *p2,ubyte *backBase)
{
int len,maxLen;
len = 0;
maxLen = min( (p1 - backBase), (p2 - backBase) );
maxLen = min( maxLen, PPMDet_MaxMatchLen);
while( *p1-- == *p2-- )
{
len++;
if ( len >= maxLen )
break;
}
return len;
}
PPMDetNode * PPMDet_GetNode(PPMDet * det,PPMDetContext *dc,ubyte * backPtr,ubyte *backBase)
{
PPMDetNode *bestNode;
uint len,bestLen,longestLen;
ubyte * bm13;
PPMDetNode * n;
LinkNode * pList;
uint walk;
if ( ! dc )
{
// @@ cbloom bug fix 5-14-02
det->gotDContext = NULL;
det->gotNode = NULL;
det->longestML = 0;
det->gotML = 0;
return NULL;
}
if ( (uint)(backPtr - backBase) < PPMDet_MinOrder ) return NULL;
bm13 = backPtr - 13;
bestNode = NULL;
bestLen = longestLen = 0;
pList = &(dc->Nodes);
walk = 0;
for(n = LN_Next(pList);n != (PPMDetNode *)pList;n = LN_Next(n))
{
len = matchBack(bm13,n->backPtrMinus13,backBase) + 12;
longestLen = max(longestLen,len);
if ( len >= n->detMinLen && len > bestLen )
{
bestLen = len;
bestNode = n;
}
if ( ++walk == PPMDet_MaxNodeWalk )
break;
}
det->gotDContext = dc;
det->gotNode = bestNode;
det->longestML = longestLen;
det->gotML = bestLen;
assert( bestLen >= PPMDet_MinOrder || ! bestNode );
return bestNode;
}
PPMDetNode * PPMDet_AddNodeToContext(PPMDet * det,Context * c,int sym,ubyte * backPtr,uint minLen)
{
PPMDetNode * n;
PPMDetContext *dc;
if ( c->child )
{
dc = (PPMDetContext *)(c->child);
}
else
{
dc = MemPool_GetHunk(det->PPMDetContextPool);
dc->escape = dc->match = 1;
LN_Null(&(dc->Nodes));
c->child = (void *)dc;
}
n = &(det->NodeArray[det->NodeArrayI++]);
if ( det->NodeArrayI == NodeArraySize )
det->NodeArrayI = 0;
LN_Cut(n);
if ( minLen < PPMDet_MinOrder )
minLen = PPMDet_MinOrder;
n->sym = sym;
n->detMinLen = minLen;
n->backPtrMinus13 = backPtr - 13;
LN_Null(n);
LN_AddHead(&(dc->Nodes),n);
return n;
}
//-}-----------------------------------------------------------------------
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -