📄 uda.h
字号:
{140,252, 0,40},{249,135,41, 0},{250, 69,40, 1},{ 80,251, 1,40},//248-51
{140,252, 0,41}}; // 252,253-255 are reserved
////////////////////////////////////////////////////////////////////////
// A StateMap maps a nonstationary counter state to a probability.
// After each mapping,the mapping is adjusted to improve future
// predictions. Methods:
// sm.p(cx) converts state cx (0-255) to a probability (0-4095).
// Counter state -> probability * 256
class StateMap
{
U32 cxt; // context
Array<U16> t; // 256 states -> probability * 64K
public:
StateMap():cxt(0),t(256)
{
for(U32 i=0;i<256;++i)
{
U32 n0=nex(i,2);
U32 n1=nex(i,3);
if(!n0) n1*=256; //128
if(!n1) n0*=256; //128
t[i]=65536*(n1+1)/(n0+n1+2);
}
}
U32 p(U32 cx)
{
t[cxt]+=((y<<16)-t[cxt]+128)>>8;
return t[cxt=cx]>>4;
}
};
////////////////////////////////////////////////////////////////////////
inline U32 hash(U32 a,U32 b,U32 c=~0,U32 d=~0,U32 e=~0) // ~0
{
U32 h=a*200002979u+b*30005491u+c*50004239u+d*70004807u+e*110002499u;
return h;//^h>>9^a>>2^b>>3^c>>4^d>>5^e>>6;
}
////////////////////////////////////////////////////////////////////////
// A ContextMap maps contexts to a bit histories and makes predictions
// to a Mixer. Methods common to all classes:
// ContextMap cm(M,C); creates using about M bytes of memory (a power
// of 2) for C contexts.
// cm.set(cx); sets the next context to cx,called up to C times
// cx is an arbitrary 32 bit value that identifies the context.
// It should be called before predicting the first bit of each byte.
// cm.mix(m) updates Mixer m with the next prediction. Returns 1
// ifcontext cx is found,else 0. Then it extends all the contexts with
// global bit y. It should be called for every bit:
// if(bpos==0)
// for(int i=0; i<C; ++i) cm.set(cxt[i]);
// cm.mix(m);
// The different types are as follows:
// - SmallStationaryContextMap. 0 <= cx < M/512.
// The state is a 16-bit probability that is adjusted after each
// prediction. C=1.
// - ContextMap. For large contexts,C >= 1. Context need not be hashed.
// Predict to mixer m from bit history state s,using sm to map s to
// a probability.
inline U32 mix2(U32 s,StateMap &sm)
{
U32 p1=sm.p(s);
const U32 n0=-!nex(s,2);
const U32 n1=-!nex(s,3);
const int st=stretch(p1)>>2;
m.add(st);
p1>>=4;
const U32 p0=255-p1;
m.add((p1-p0));
m.add((n1-n0)*st);
m.add((p1&n0)-(p0&n1));
m.add((p1&n1)-(p0&n0));
return s>0;
}
////////////////////////////////////////////////////////////////////////
// Context is looked up directly. m=size is power of 2 in bytes.
// Context should be < m/512. High bits are discarded.
class SmallStationaryContextMap
{
Array<U16> t;
U32 cxt;
U16 *cp;
public:
SmallStationaryContextMap(U32 n):t(n/2),cxt(0)
{
for(U32 i=0;i<t.size();++i) t[i]=32768;
cp=&t[0];
}
void set(U32 cx)
{
cxt=(cx<<8)&(t.size()-256);
}
void mix(U32 rate=7)
{
*cp += ((y<<16)-*cp+(1<<(rate-1))) >> rate;
cp=&t[cxt+c0];
m.add(stretch(*cp>>4));
}
};
////////////////////////////////////////////////////////////////////////
// Context map for large contexts.Most modeling use this type of context
// map.It includes a built in RunContextMap to predict last byte seen
// in the same context,and also bit-level contexts that map to a bit
// history state.
// Bit histories are stored in a hash table. The table is organized into
// 64-byte buckets alinged on cache page boundaries.Each bucket contains
// a hash chain of 7 elements,plus a 2 element queue(packed into 1 byte)
// of the last 2 elements accessed for LRU replacement. Each element has
// a 2 byte checksum for detecting collisions,and an array of 7b history
// states indexed by last 0~2 bits of context.The buckets are indexed
// by a context ending after 0,2,or 5 bits of current byte.Thus,each
// byte modeled results in 3 main memory accesses per context,with all
// other accesses to cache.
// On bits 0,2 and 5,the context is updated and a new bucket is selected
// The most recently accessed element is tried first,by comparing the
// 16 bit checksum,then the 7 elements are searched linearly.If no match
// is found,then the element with the lowest priority among 5 elements
// not in the LRU queue is replaced. After a replacement,the queue is
// emptied (so that consecutive misses favor a LFU replacement policy).
// In all cases,the found/replaced element is put in front of the queue.
// The priority is the state number of the first element (the one with 0
// additional bits of context).The states are sorted by increasing n0+n1
// (number of bits seen),implementing a LFU replacement policy.
// When the context ends on a byte boundary (bit 0),only 3 of the 7 bit
// history states are used. The remaining 4 bytes implement a run model
// as follows: <count:7,d:1> <b1> <b2> <b3> where <b1> is the last byte
// seen,possibly repeated,and <b2> and <b3> are the two bytes seen
// before the first <b1>. <count:7,d:1> is a 7 bit count and a 1 bit
// flag. If d=0 then <count> = 1..127 is the number of repeats of <b1>
// and no other bytes have been seen,and <b2><b3> are not used.
// If <d> = 1 then the history is <b3>,<b2>,and <count> - 2 repeats
// of <b1>. In this case,<b3> is valid only if<count> >= 3 and
// <b2> is valid only if<count> >= 2.
// As an optimization, last two hash elements of each byte(representing
// contexts with 2-7 bits) are not updated until a context is seen for
// a second time. This is indicated by <count,d> = <1,0>.After update,
// <count,d> is updated to <2,0> or <2,1>.
class ContextMap
{
class E // hash element,64 bytes
{
U16 chk[7]; // byte context checksums
U8 last; // last 2 accesses (0-6) in low,high nibble
public:
U8 bh[7][7];// byte context,3-bit context -> bit history state
// bh[][0] = 1st bit,bh[][1,2] = 2nd bit,bh[][3..6] = 3rd bit
// bh[][0] is also a replacement priority,0 = empty
U8* get(U16 chk); // Find element (0-6) matching checksum.
// If not found,insert or replace lowest priority (not last).
};
Array<E> t; // bit histories for bits 0-1,2-4,5-7
// For 0-1,also contains a run count in bh[][4] and value in bh[][5]
// and pending update count in bh[7]
Array<U8*> cp; // C pointers to current bit history
Array<U8*> cp0; // First element of 7 element array containing cp[i]
Array<U32> cxt; // C whole byte contexts (hashes)
Array<U8*> runp;// C [0..3] = count,value,unused,unused
StateMap *sm; // C maps of state -> p
U32 cn; // Next context to set by set()
public:
ContextMap(U32 n,U32 c=1); //m = memory in bytes,a power of 2,C = c
~ContextMap() {delete []sm;}
void set(U32 cx); // set next whole byte context
U32 mix();
};
ContextMap:: // Construct using m bytes of memory for c contexts
ContextMap(U32 n,U32 c):t(n>>6,64),cp(c),cp0(c),cxt(c),runp(c),cn(0)
{
sm=new StateMap[c];
for(U32 i=0;i<c;++i)
{
cp0 [i]=cp[i]=&t[0].bh[0][0];
runp[i]=cp[i]+3;
}
}
// Find or create hash element matching checksum ch
inline U8* ContextMap::E::get(U16 ch)
{
if(chk[last&15]==ch) return &bh[last&15][0];
U32 b=0xffff,bi=0;
for(U8 i=0;i<7;++i)
{
if(chk[i]==ch) return last=last<<4|i,&bh[i][0];
const U32 pri=bh[i][0];
if((last&15)!=i && last>>4!=i && pri<b) b=pri,bi=i;
}
return last=0xf0|bi,chk[bi]=ch,(U8*)memset(&bh[bi][0],0,7);
}
inline void ContextMap::set(U32 cx) // Set the i'th context to cx
{
const U32 i=cn++;
cx=cx*987654323+i; // permute (don't hash) cx to spread distribution
cx=cx<<16|cx>>16;
cxt[i]=cx+i;
}
// Update the model with bit y1,and predict next bit to mixer m.
U32 ContextMap::mix() // Update model with y
{
const U32 bp=bpos,cc=c0;
U32 ret=0;
for(U32 i=0;i<cn;++i)
{
if(cp[i]) *cp[i]=nex(*cp[i],y);
// Update context pointers
if(bp >1&&runp[i][0]==0) cp[i]=0;
else if(bp==1||bp==3||bp==6) cp[i]=cp0[i]+1+(cc&1);
else if(bp==4||bp==7) cp[i]=cp0[i]+3+(cc&3);
else
{
cp0[i]=cp[i]=t[cxt[i]+cc&t.size()-1].get(cxt[i]>>16);
if(bp==0) // Update pending bit histories for bits 2-7
{
if(cp0[i][3]==2)
{
const U32 c=cp0[i][4]+256; U8 *p;
p=t[cxt[i]+(c>>6)&t.size()-1].get(cxt[i]>>16);
p[0 ]=((c>>5)&1)+1;
p[1+((c>>5)&1)]=((c>>4)&1)+1;
p[3+((c>>4)&3)]=((c>>3)&1)+1;
p=t[cxt[i]+(c>>3)&t.size()-1].get(cxt[i]>>16);
p[0 ]=((c>>2)&1)+1;
p[1+((c>>2)&1)]=((c>>1)&1)+1;
p[3+((c>>1)&3)]=( c &1)+1;
cp0[i][6]=0;
}
// Update run count of previous context
if(!runp[i][0]) // new context
runp[i][0]=2,runp[i][1]=c1;
else if(runp[i][1]!=c1) // different byte in context
runp[i][0]=1,runp[i][1]=c1;
else if(runp[i][0]<254) // same byte in context
runp[i][0]+=2;
runp[i]=cp0[i]+3;
}
}
// predict from last byte in context
if(((U32)runp[i][1]+256)>>(8-bp)==cc)//predicted bit +for1,-for0
{
U32 rc=runp[i][0];// count*2,+1 if 2 different bytes seen
m.add(((runp[i][1]>>(7-bp)&1)*2-1)*ilog(rc+1)<<(2+(~rc&1)));
}
else
m.add(0);
// predict from bit context
ret+=mix2(cp[i]?*cp[i]:0, sm[i]);
}
if(bp==7) cn=0;
return ret;
}
////////////////////////////////////////////////////////////////////////
//matchModel() finds the longest matching context and returns its length
U32 matchModel()
{
const U32 MAXLEN=2047; // longest allowed match + 1
static Array<U32> t(MEM); // hash table of pointers to contexts
static U32 h =0; // hash of last 7 bytes
static U32 ptr=0; // points to next byte of match if any
static U32 len=0; // length of match,or 0 ifno match
static U32 ret=0;
if(!bpos)
{
h=h*997*8+c1+1&t.size()-1; // update context hash
if(len) ++len,++ptr;
else // find match
{
ptr=t[h];
if(ptr && pos-ptr<buf.size())
while(buf(len+1)==buf[ptr-len-1] && len<MAXLEN) ++len;
}
t[h]=pos; // update hash table
ret =len;
//if(ret>0&&!(ret&0xfff)) printf("pos=%d len=%d ptr=%d\n",pos,len,ptr);
}
// predict
if(len>MAXLEN) len=MAXLEN;
int sgn;
if(len && c1==buf[ptr-1] && c0==((U32)buf[ptr]+256)>>(8-bpos))
{
if(buf[ptr]>>(7-bpos)&1) sgn=1;
else sgn=-1;
}
else sgn=len=0;
m.add(sgn* 4*ilog(len));
m.add(sgn*64*(len<32?len:32)); //112
return ret;
}
////////////////////////////////////////////////////////////////////////
// Model 2-D data with fixed record length. Also order 1-2 models
// that include the distance to the last match.
void recordModel()
{ //buf(1)->last 3 pos
static Array<U32> cpos1(256),cpos2(256),cpos3(256),cpos4(256);
static U32 rlen=2,rlen1=3,rlen2=4; // run length and 2 candidates
static U32 rcount1=0,rcount2=0; // candidate counts
static ContextMap cm(MEM*4,4);
if(!bpos)
{
const U32 c=c1,r=pos-cpos1[c];
if(r>1 && r==cpos1[c]-cpos2[c]
&& r==cpos2[c]-cpos3[c]
&& r==cpos3[c]-cpos4[c]
&& (r>15 || (c==buf(r*5+1)) && c==buf(r*6+1)))
{
if(r==rlen1) ++rcount1;
else if(r==rlen2) ++rcount2;
else if(rcount1>rcount2) rlen2=r,rcount2=1;
else rlen1=r,rcount1=1;
}
if(rcount1>15 && rlen!=rlen1) rlen=rlen1,rcount1=rcount2=0;
if(rcount2>15 && rlen!=rlen2) rlen=rlen2,rcount1=rcount2=0;
const U32 col=pos%rlen;
cm.set(c1<<8|buf(rlen)); //27
cm.set(rlen|buf(rlen)<<10|buf(rlen*2)<<18); //25
cm.set(rlen|c1 <<10|col<<18); //34
cm.set( col|rlen<<12); //14
cpos4[c]=cpos3[c]; // update last context positions
cpos3[c]=cpos2[c];
cpos2[c]=cpos1[c];
cpos1[c]=pos;
}
cm.mix();
}
////////////////////////////////////////////////////////////////////////
// Model order 1-2 contexts with gaps.
void sparseModel()
{
static ContextMap cm(MEM*4,7),scm(MEM,4);
if(!bpos)
{
cm.set(c4&0x00ff00ff); //16
cm.set(c4&0xff0000ff); //18
cm.set(c4&0x00ffff00); //29
cm.set(c1|buf(5)<<8); //16
cm.set(c1|buf(6)<<8); //17
cm.set(c3|buf(6)<<8); //14
cm.set(c4>>24|buf(8)<<8); //18
scm.set(c1); //70
scm.set(c2); //39
scm.set(c3); //14
scm.set(buf(8)); //39
}
cm.mix();
scm.mix();
}
////////////////////////////////////////////////////////////////////////
// Model English text (words and columns/end of line)
void wordModel()
{
static U32 word0=0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -