📄 uda.h
字号:
static U32 text0=0; // hash stream of letters
static ContextMap cm(MEM*16,4);
if(!bpos)
{
U32 c=c1;
if(c-'A'<='Z'-'A') c+='a'-'A';
if(c-'a'<='z'-'a')
{
word0=word0*263* 4+c;
text0=text0*997*16+c;
}
else word0=0;
cm.set(word0); //102
cm.set(text0&0xffffff); //23
cm.set(c4&0xffff0000); //25
cm.set(c4&0xff00ff00|buf(6)); //22
}
cm.mix();
}
////////////////////////////////////////////////////////////////////////
// The context is 1B string history that occurs within a 1/2 B context.
void indirectModel()
{
static ContextMap cm(MEM*4,2);
static Array<U32> t1(256);
if(!bpos)
{
U32 &r1=t1[c2]; r1=r1<<8|c1;
const U32 t=c1|t1[c1]<<8;
cm.set(t&0xffff); //13
cm.set(t&0xffffff); //12
}
cm.mix();
}
////////////////////////////////////////////////////////////////////////
// Model a 24-bit color uncompressed .bmp or .tiffile.
// Return width in pixels ifan image file is detected,else 0.
// 32-bit little endian number at buf(i)..buf(i-3)
inline U32 i4(int i)
{
return buf(i)+256*buf(i-1)+65536*buf(i-2)+16777216*buf(i-3);
}
inline U32 i2(int i) // 16-bit
{
return buf(i)+256*buf(i-1);
}
inline U32 sqrbuf(int i)// Square buf(i)
{
return buf(i)*buf(i);
}
int bmpModel()
{
static U32 w =0; // width of image in bytes (pixels * 3)
static U32 eoi =0; // end of image
static U32 tiff=0; // offset of tifheader
const U32 SC =0x20000;
static SmallStationaryContextMap scm1(SC),scm2(SC),
scm3(SC),scm4(SC),scm5(SC),scm6(SC*2);
static ContextMap cm(MEM*4,8);
// Detect .bmp file header (24 bit color,not compressed)
if(!bpos && buf(54)=='B' && buf(53)=='M' &&
i4(44)==54 && i4(40)==40 &&
i2(26)==24 && i4(24)==0 )
{
w=(i4(36)+3&-4)*3; // image width
const U32 height=i4(32);
if(w<0x30000 && height<0x10000)
{
eoi=pos+w*height; // image size in bytes
// printf("BMP %dx%d\n",w/3,height);
}
else eoi=pos;
}
// Detect .tiffile header (24 bit color,not compressed).
// Parsing is crude,won't work with weird formats.
if(!bpos)
{
if(c4==0x49492a00) tiff=pos; // Intel format only
if(pos-tiff==4&&c4!=0x08000000) tiff=0;//8=normal offset to dir
if(tiff && pos-tiff==200)//most directory should be read by now
{
int dirsize=i2(pos-tiff-4); // number of 12B dir entries
int bpp=0,compression=0,width=w=0,height=0;
for(U32 i=tiff+6; i<pos-12 && --dirsize>0; i+=12)
{
int tag=i2(pos-i);//256=width,257=height,259:1=nocompre
// 277=3 samples/pixel
int tagfmt=i2(pos-i-2); // 3=short,4=long
int taglen=i4(pos-i-4); // number of elements in tagval
int tagval=i4(pos-i-8); //1long,1-2short,or points2array
if((tagfmt==3||tagfmt==4) && taglen==1)
{
if(tag==256) width=tagval;
if(tag==257) height=tagval;
if(tag==259) compression=tagval; // 1=no compression
if(tag==277) bpp=tagval; // should be 3
}
}
if(width>0 && height>0 && width*height>50 && compression==1
&& (bpp==1||bpp==3))
eoi=tiff+width*height*bpp,w=width*bpp;
if(eoi<=pos) tiff=w=0;
// else printf("TIFF %dx%dx%d\n",width,height,bpp);
}
}
if(pos>eoi) return w=0;
if(!bpos) // Select nearby pixels as context
{
const U32 color=pos%3;
U32 mean=c3+buf(w-3)+buf(w)+buf(w+3);
const U32 var=(sqrbuf(3)+sqrbuf(w-3)+sqrbuf(w)+sqrbuf(w+3)-
mean*mean/4)>>2;
mean>>=2;
const U32 logvar=ilog(var);
U32 i=0;
cm .set(hash(++i,c3>>2,buf(w)>>2,color));
cm .set(hash(++i,c3>>2, c1>>2,color));
cm .set(hash(++i,c3>>2, c2>>2,color));
cm .set(hash(++i,buf(w)>>2,c1>>2,color));
cm .set(hash(++i,buf(w)>>2,c2>>2,color));
cm .set(hash(++i, (c3+buf(w))>>1,color));
cm .set(hash(++i, (c3+buf(w))>>3,c1>>5,c2>>5,color));
cm .set(hash(++i,mean, logvar>>5,color));
scm1.set((c3 +buf(w))>>1);
scm2.set((c3 +buf(w)-buf(w+3))>>1);
scm3.set((c3*2-buf(6))>>1);
scm4.set((buf(w)*2-buf(w*2))>>1);
scm5.set((c3 +buf(w)-buf(w-3))>>1);
scm6.set(mean>>1|logvar<<1&0x180);
}
cm .mix();
scm1.mix();
scm2.mix();
scm3.mix();
scm4.mix();
scm5.mix();
scm6.mix();
return w;
}
////////////////////////////////////////////////////////////////////////
// Model x86 code. The contexts are sparse containing only those bits
// relevant to parsing (2 prefixes,opcode,and mod and r/m fields of mod
// RM byte). Get context at buf(i) relevant to parsing 32-bit x86 code
__declspec(naked) U32 __stdcall bswap(U32 x)
{
__asm mov eax,[esp+4]
__asm bswap eax
__asm ret 4
}
void exeModel()
{
const U32 N=12;
static ContextMap cm(MEM*2,N);
static U32 state=0,base=0,aim=0,len=0;
if(!bpos)
{
U32 last4=bswap(c4);
switch(state)
{
case 0: if((last4&0xffff)==0x5a4d)
{
state=1;
base=pos-4;
aim=base+0x40;
}return;
case 1: if(pos>=aim)
if(pos>aim||last4+4<pos-base||last4>0x1000) state=0;
else {state=2;aim=base+last4+4;}
return;
case 2: if(pos>=aim)
if(pos>aim||last4!=0x4550) state=0;
else {state=3;aim+=0x108;}
return;
case 3: if(pos>=aim)
if(pos>aim||!last4||last4>MEM*8) state=0;
else {state=4;aim+=4;len=last4;}
return;
case 4: if(pos>=aim)
if(pos>aim||last4<=pos-base||last4>0x10000) state=0;
else {state=5;aim=base+last4;}
return;
case 5: if(pos>=aim) {state=6;aim+=len;type=1;}
//printf("pos=%p,aim=%p,len=%p\n",pos,aim,len);
return;
case 6: if(pos>=aim) {state=0;type=0;return;}
else for(U32 i=0;i<N;++i)
{
U32 prefix=(buf(i+2)==0x0f) +(buf(i+2)==0x66)* 2+
(buf(i+2)==0x67)*3+(buf(i+3)==0x0f)* 4+
(buf(i+3)==0x66)*8+(buf(i+3)==0x67)*12;
U32 opcode=buf(i+1);
U32 modrm =i ? buf(i)&0xc7 : 0;
cm.set(prefix|opcode<<4|modrm<<12|c1*(i>4)<<20);
}
}
}
if(state==6) cm.mix();
}
////////////////////////////////////////////////////////////////////////
// file types (order is important: the last will be sorted by filetype
// detection as the first)This combines all context models with a Mixer.
U32 contextModel()
{
static ContextMap cm(MEM*32,5);
static U32 cxt[9];
m.update();
m.add(256);
U32 is=matchModel(); // Length of longest matching context
if(is>=100)
{
m.set(0,8);
return m.p();
}
is=bmpModel(); // Image width (B) if BMP or TIFF detected,or 0
if(is>0)
{
static U32 col=0;
if(++col>=24) col=0;
m.set(2,8);
m.set(col,24);
m.set((buf(is)+c3)>>4,32);
m.set(c0,256);
return m.p();
}
if(!bpos)
{
for(U32 i=8;i;--i) cxt[i]=cxt[i-1]*257+c1+1;
cm.set(cxt[0]); //41
cm.set(cxt[2]); //33
cm.set(cxt[3]); //18
cm.set(cxt[5]); //11
if(type!=1) cm.set(cxt[8]); //13
}
const U32 order=cm.mix();
sparseModel(); //396
if(type!=1)
{
recordModel(); //177
wordModel(); //190
}
indirectModel(); //45
exeModel();
m.set(8+order+(c4&0xe0)+(c1==c2)*16,256);
m.set(c0* 4+(c1/64),1024); //37
m.set(c1/32+(c2/32)*8+(c3&192),256);//48
return m.p();
}
////////////////////////////////////////////////////////////////////////
class Predictor
{
U32 p;
public:
Predictor():p(2048) {}
U32 predict() const {return p;} // 0 ~ 4095
void update();
};
////////////////////////////////////////////////////////////////////////
void Predictor::update()
{
static APM a1(256),a2(65536);
c0+=c0+y;
if(c0>=256)
{
buf[pos++]=c0;
c3=c2; c2=c1; c1=c0-256;
c4=(c4<<8)+c1; c0=1;
}
bpos=(bpos+1)&7;
p=contextModel();
p=(a1.p(p,c0)*3+p+2)>>2;
p= a2.p(p,c0+c1*256);
}
////////////////////////////////////////////////////////////////////////
class Coder
{
public:
enum Mode{ENC,DEC};
private:
Predictor pred;
U32 x0,x1,x;
FILE *fp;
Mode mode;
enum {OUTBUFSIZE=8192};
U32 outpos;
U8 outbuf[OUTBUFSIZE];
void encode(U32 b)
{
U32 p=pred.predict(); p+=(p<2048); // 1 ~ fff
U32 m=x1-x0; m=x0+(m>>12)*p+((m&0xfff)*p>>12);
if((y=b)) x1=m; else x0=m+1;
pred.update();
while(!((x0^x1)&0xff000000))
{
outbuf[outpos++]=x1>>24;
if(outpos==OUTBUFSIZE)
{
fwrite(outbuf,1,OUTBUFSIZE,fp);
outpos=0;
}
x0<<=8;
x1=(x1<<8)+255;
}
}
U32 decode()
{
U32 p=pred.predict(); p+=(p<2048); // 1 ~ fff
U32 m=x1-x0; m=x0+(m>>12)*p+((m&0xfff)*p>>12);
if((y=x<=m)) x1=m; else x0=m+1;
pred.update();
while(!((x0^x1)&0xff000000))
{
x=(x<<8)+(fgetc(fp)&255);
x0<<=8;
x1=(x1<<8)+255;
}
return y;
}
public:
Coder(Mode m,FILE *f):mode(m),fp(f),x0(0),x1(-1),x(0),outpos(0)
{
if(mode==DEC)
for(U32 i=4;i;--i)
x=(x<<8)+(fgetc(fp)&255);
}
void reset() {type=0;}
void enc(U8 c)
{
for(U32 i=8;i;) encode((c>>--i)&1);
}
U8 dec()
{
U8 c=0;
for(U32 i=8;i;--i) c+=c+decode();
return c;
}
void flush()
{
if(mode==ENC)
{
outbuf[outpos++]=x0>>24;
fwrite(outbuf,1,outpos,fp);
outpos=0;
}
}
};
////////////////////////////////////////////////////////////////////////
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -