📄 uda.h

📁 压缩率较高的小工具基于PAQ算法
💻 H
📖 第 1 页 / 共 3 页
字号:
{140,252, 0,40},{249,135,41, 0},{250, 69,40, 1},{ 80,251, 1,40},//248-51
{140,252, 0,41}}; // 252,253-255 are reserved
////////////////////////////////////////////////////////////////////////
// A StateMap maps a nonstationary counter state to a probability.
// After each mapping,the mapping is adjusted to improve future
// predictions.  Methods:
// sm.p(cx) converts state cx (0-255) to a probability (0-4095).
// Counter state -> probability * 256
class StateMap
{
	U32 cxt;		// context
	Array<U16> t;	// 256 states -> probability * 64K
public:
	StateMap():cxt(0),t(256)
	{
		for(U32 i=0;i<256;++i)
		{
			U32 n0=nex(i,2);
			U32 n1=nex(i,3);
			if(!n0) n1*=256;	//128
			if(!n1) n0*=256;	//128
			t[i]=65536*(n1+1)/(n0+n1+2);
		}
	}
	U32 p(U32 cx)
	{
		t[cxt]+=((y<<16)-t[cxt]+128)>>8;
		return t[cxt=cx]>>4;
	}
};
////////////////////////////////////////////////////////////////////////
inline U32 hash(U32 a,U32 b,U32 c=~0,U32 d=~0,U32 e=~0) // ~0
{
	U32 h=a*200002979u+b*30005491u+c*50004239u+d*70004807u+e*110002499u;
	return h;//^h>>9^a>>2^b>>3^c>>4^d>>5^e>>6;
}
////////////////////////////////////////////////////////////////////////
// A ContextMap maps contexts to a bit histories and makes predictions
// to a Mixer.	Methods common to all classes:
// ContextMap cm(M,C); creates using about M bytes of memory (a power
//	 of 2) for C contexts.
// cm.set(cx);	sets the next context to cx,called up to C times
//	 cx is an arbitrary 32 bit value that identifies the context.
//	 It should be called before predicting the first bit of each byte.
// cm.mix(m) updates Mixer m with the next prediction.	Returns 1
//	 ifcontext cx is found,else 0. Then it extends all the contexts with
//	 global bit y.	It should be called for every bit:
//	   if(bpos==0)
//		 for(int i=0; i<C; ++i) cm.set(cxt[i]);
//	   cm.mix(m);
// The different types are as follows:
// - SmallStationaryContextMap.  0 <= cx < M/512.
//	   The state is a 16-bit probability that is adjusted after each
//	   prediction.	C=1.
// - ContextMap.  For large contexts,C >= 1. Context need not be hashed.
// Predict to mixer m from bit history state s,using sm to map s to
// a probability.
inline U32 mix2(U32 s,StateMap &sm)
{
	U32 p1=sm.p(s);
	const U32 n0=-!nex(s,2);
	const U32 n1=-!nex(s,3);
	const int st=stretch(p1)>>2;
	m.add(st);
	p1>>=4;
	const U32 p0=255-p1;
	m.add((p1-p0));
	m.add((n1-n0)*st);
	m.add((p1&n0)-(p0&n1));
	m.add((p1&n1)-(p0&n0));
	return s>0;
}
////////////////////////////////////////////////////////////////////////
// Context is looked up directly.  m=size is power of 2 in bytes.
// Context should be < m/512.  High bits are discarded.
class SmallStationaryContextMap
{
	Array<U16> t;
	U32 cxt;
	U16 *cp;
public:
	SmallStationaryContextMap(U32 n):t(n/2),cxt(0)
	{
		for(U32 i=0;i<t.size();++i) t[i]=32768;
		cp=&t[0];
	}
	void set(U32 cx)
	{
		cxt=(cx<<8)&(t.size()-256);
	}
	void mix(U32 rate=7)
	{
		*cp += ((y<<16)-*cp+(1<<(rate-1))) >> rate;
		cp=&t[cxt+c0];
		m.add(stretch(*cp>>4));
	}
};
////////////////////////////////////////////////////////////////////////
// Context map for large contexts.Most modeling use this type of context
// map.It includes a built in RunContextMap to predict last byte seen
// in the same context,and also bit-level contexts that map to a bit
// history state.
// Bit histories are stored in a hash table. The table is organized into
// 64-byte buckets alinged on cache page boundaries.Each bucket contains
// a hash chain of 7 elements,plus a 2 element queue(packed into 1 byte)
// of the last 2 elements accessed for LRU replacement. Each element has
// a 2 byte checksum for detecting collisions,and an array of 7b history
// states indexed by last 0~2 bits of context.The buckets are indexed
// by a context ending after 0,2,or 5 bits of current byte.Thus,each
// byte modeled results in 3 main memory accesses per context,with all
// other accesses to cache.
// On bits 0,2 and 5,the context is updated and a new bucket is selected
// The most recently accessed element is tried first,by comparing the
// 16 bit checksum,then the 7 elements are searched linearly.If no match
// is found,then the element with the lowest priority among 5 elements
// not in the LRU queue is replaced.  After a replacement,the queue is
// emptied (so that consecutive misses favor a LFU replacement policy).
// In all cases,the found/replaced element is put in front of the queue.
// The priority is the state number of the first element (the one with 0
// additional bits of context).The states are sorted by increasing n0+n1
// (number of bits seen),implementing a LFU replacement policy.
// When the context ends on a byte boundary (bit 0),only 3 of the 7 bit
// history states are used.  The remaining 4 bytes implement a run model
// as follows: <count:7,d:1> <b1> <b2> <b3> where <b1> is the last byte
// seen,possibly repeated,and <b2> and <b3> are the two bytes seen
// before the first <b1>.  <count:7,d:1> is a 7 bit count and a 1 bit
// flag.  If d=0 then <count> = 1..127 is the number of repeats of <b1>
// and no other bytes have been seen,and <b2><b3> are not used.
// If <d> = 1 then the history is <b3>,<b2>,and <count> - 2 repeats
// of <b1>.  In this case,<b3> is valid only if<count> >= 3 and
// <b2> is valid only if<count> >= 2.
// As an optimization, last two hash elements of each byte(representing
// contexts with 2-7 bits) are not updated until a context is seen for
// a second time.  This is indicated by <count,d> = <1,0>.After update,
// <count,d> is updated to <2,0> or <2,1>.
class ContextMap
{
	class E 		// hash element,64 bytes
	{
		U16 chk[7]; // byte context checksums
		U8 last;	// last 2 accesses (0-6) in low,high nibble
	public:
		U8 bh[7][7];// byte context,3-bit context -> bit history state
		// bh[][0] = 1st bit,bh[][1,2] = 2nd bit,bh[][3..6] = 3rd bit
		// bh[][0] is also a replacement priority,0 = empty
		U8* get(U16 chk);  // Find element (0-6) matching checksum.
		// If not found,insert or replace lowest priority (not last).
	};
	Array<E> t; 	// bit histories for bits 0-1,2-4,5-7
	// For 0-1,also contains a run count in bh[][4] and value in bh[][5]
	// and pending update count in bh[7]
	Array<U8*> cp;	// C pointers to current bit history
	Array<U8*> cp0; // First element of 7 element array containing cp[i]
	Array<U32> cxt; // C whole byte contexts (hashes)
	Array<U8*> runp;// C [0..3] = count,value,unused,unused
	StateMap *sm;	// C maps of state -> p
	U32 cn; 		// Next context to set by set()
public:
	ContextMap(U32 n,U32 c=1);	//m = memory in bytes,a power of 2,C = c
	~ContextMap()		{delete []sm;}
	void set(U32 cx);	// set next whole byte context
	U32 mix();
};
ContextMap::	// Construct using m bytes of memory for c contexts
ContextMap(U32 n,U32 c):t(n>>6,64),cp(c),cp0(c),cxt(c),runp(c),cn(0)
{
	sm=new StateMap[c];
	for(U32 i=0;i<c;++i)
	{
		cp0 [i]=cp[i]=&t[0].bh[0][0];
		runp[i]=cp[i]+3;
	}
}
// Find or create hash element matching checksum ch
inline U8* ContextMap::E::get(U16 ch)
{
	if(chk[last&15]==ch) return &bh[last&15][0];
	U32 b=0xffff,bi=0;
	for(U8 i=0;i<7;++i)
	{
		if(chk[i]==ch) return last=last<<4|i,&bh[i][0];
		const U32 pri=bh[i][0];
		if((last&15)!=i && last>>4!=i && pri<b) b=pri,bi=i;
	}
	return last=0xf0|bi,chk[bi]=ch,(U8*)memset(&bh[bi][0],0,7);
}
inline void ContextMap::set(U32 cx) // Set the i'th context to cx
{
	const U32 i=cn++;
	cx=cx*987654323+i; // permute (don't hash) cx to spread distribution
	cx=cx<<16|cx>>16;
	cxt[i]=cx+i;
}
// Update the model with bit y1,and predict next bit to mixer m.
U32 ContextMap::mix()	// Update model with y
{
	const U32 bp=bpos,cc=c0;
	U32 ret=0;
	for(U32 i=0;i<cn;++i)
	{
		if(cp[i]) *cp[i]=nex(*cp[i],y);
		// Update context pointers
			 if(bp >1&&runp[i][0]==0)	cp[i]=0;
		else if(bp==1||bp==3||bp==6)	cp[i]=cp0[i]+1+(cc&1);
		else if(bp==4||bp==7)			cp[i]=cp0[i]+3+(cc&3);
		else
		{
			cp0[i]=cp[i]=t[cxt[i]+cc&t.size()-1].get(cxt[i]>>16);
			if(bp==0)	// Update pending bit histories for bits 2-7
			{
				if(cp0[i][3]==2)
				{
					const U32 c=cp0[i][4]+256; U8 *p;
					p=t[cxt[i]+(c>>6)&t.size()-1].get(cxt[i]>>16);
					p[0 		  ]=((c>>5)&1)+1;
					p[1+((c>>5)&1)]=((c>>4)&1)+1;
					p[3+((c>>4)&3)]=((c>>3)&1)+1;
					p=t[cxt[i]+(c>>3)&t.size()-1].get(cxt[i]>>16);
					p[0 		  ]=((c>>2)&1)+1;
					p[1+((c>>2)&1)]=((c>>1)&1)+1;
					p[3+((c>>1)&3)]=( c    &1)+1;
					cp0[i][6]=0;
				}
				// Update run count of previous context
				if(!runp[i][0]) 		// new context
					runp[i][0]=2,runp[i][1]=c1;
				else if(runp[i][1]!=c1) // different byte in context
					runp[i][0]=1,runp[i][1]=c1;
				else if(runp[i][0]<254) // same byte in context
					runp[i][0]+=2;
				runp[i]=cp0[i]+3;
			}
		}
		// predict from last byte in context
		if(((U32)runp[i][1]+256)>>(8-bp)==cc)//predicted bit +for1,-for0
		{
			U32 rc=runp[i][0];// count*2,+1 if 2 different bytes seen
			m.add(((runp[i][1]>>(7-bp)&1)*2-1)*ilog(rc+1)<<(2+(~rc&1)));
		}
		else
			m.add(0);
		// predict from bit context
		ret+=mix2(cp[i]?*cp[i]:0, sm[i]);
	}
	if(bp==7) cn=0;
	return ret;
}
////////////////////////////////////////////////////////////////////////
//matchModel() finds the longest matching context and returns its length
U32 matchModel()
{
	const  U32 MAXLEN=2047; 	// longest allowed match + 1
	static Array<U32> t(MEM);	// hash table of pointers to contexts
	static U32 h  =0;			// hash of last 7 bytes
	static U32 ptr=0;			// points to next byte of match if any
	static U32 len=0;			// length of match,or 0 ifno match
	static U32 ret=0;
	if(!bpos)
	{
		h=h*997*8+c1+1&t.size()-1; // update context hash
		if(len) ++len,++ptr;
		else		// find match
		{
			ptr=t[h];
			if(ptr && pos-ptr<buf.size())
				while(buf(len+1)==buf[ptr-len-1] && len<MAXLEN) ++len;
		}
		t[h]=pos;	// update hash table
		ret =len;
//if(ret>0&&!(ret&0xfff)) printf("pos=%d len=%d ptr=%d\n",pos,len,ptr);
	}
	// predict
	if(len>MAXLEN) len=MAXLEN;
	int sgn;
	if(len && c1==buf[ptr-1] && c0==((U32)buf[ptr]+256)>>(8-bpos))
	{
		if(buf[ptr]>>(7-bpos)&1) sgn=1;
		else sgn=-1;
	}
	else sgn=len=0;
	m.add(sgn* 4*ilog(len));
	m.add(sgn*64*(len<32?len:32));	//112
	return ret;
}
////////////////////////////////////////////////////////////////////////
// Model 2-D data with fixed record length. Also order 1-2 models
// that include the distance to the last match.
void recordModel()
{										//buf(1)->last 3 pos
	static Array<U32> cpos1(256),cpos2(256),cpos3(256),cpos4(256);
	static U32 rlen=2,rlen1=3,rlen2=4;	// run length and 2 candidates
	static U32 rcount1=0,rcount2=0; 	// candidate counts
	static ContextMap cm(MEM*4,4);
	if(!bpos)
	{
		const U32 c=c1,r=pos-cpos1[c];
		if(r>1 && r==cpos1[c]-cpos2[c]
			   && r==cpos2[c]-cpos3[c]
			   && r==cpos3[c]-cpos4[c]
			   && (r>15 || (c==buf(r*5+1)) && c==buf(r*6+1)))
		{
				 if(r==rlen1) ++rcount1;
			else if(r==rlen2) ++rcount2;
			else if(rcount1>rcount2) rlen2=r,rcount2=1;
			else rlen1=r,rcount1=1;
		}
		if(rcount1>15 && rlen!=rlen1) rlen=rlen1,rcount1=rcount2=0;
		if(rcount2>15 && rlen!=rlen2) rlen=rlen2,rcount1=rcount2=0;
		const U32 col=pos%rlen;
		cm.set(c1<<8|buf(rlen));					//27
		cm.set(rlen|buf(rlen)<<10|buf(rlen*2)<<18); //25
		cm.set(rlen|c1		 <<10|col<<18); 		//34
		cm.set( col|rlen<<12);						//14
		cpos4[c]=cpos3[c];	// update last context positions
		cpos3[c]=cpos2[c];
		cpos2[c]=cpos1[c];
		cpos1[c]=pos;
	}
	cm.mix();
}
////////////////////////////////////////////////////////////////////////
// Model order 1-2 contexts with gaps.
void sparseModel()
{
	static ContextMap cm(MEM*4,7),scm(MEM,4);
	if(!bpos)
	{
		 cm.set(c4&0x00ff00ff); 	//16
		 cm.set(c4&0xff0000ff); 	//18
		 cm.set(c4&0x00ffff00); 	//29
		 cm.set(c1|buf(5)<<8);		//16
		 cm.set(c1|buf(6)<<8);		//17
		 cm.set(c3|buf(6)<<8);		//14
		 cm.set(c4>>24|buf(8)<<8);	//18
		scm.set(c1);				//70
		scm.set(c2);				//39
		scm.set(c3);				//14
		scm.set(buf(8));			//39
	}
	 cm.mix();
	scm.mix();
}
////////////////////////////////////////////////////////////////////////
// Model English text (words and columns/end of line)
void wordModel()
{
	static U32 word0=0;
💿 文件大小 28 K
👤 上传用户 bt9
📂 所属分类压缩解压
🏷️ 相关标签

#PAQ #算法
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -