📄 quant1_h263.asm
字号:
[i1]=r3;
//pcount:Position of the last non-zero block coefficient in the scanning sequence after
//quantization的值没有写
rets=[sp++];
l3=[sp++];
i3=[sp++];
l2=[sp++];
i2=[sp++];
l1=[sp++];
i1=[sp++];
l0=[sp++];
i0=[sp++];
(r7:4,p5:3)=[sp++];
_xhQuantIntra_H263_C1I.end:
rts;
#else
/***********************************************************
*****************quant_h263_intra, rewriten by gary*********
performance:
ASM C
cycle count:
Ipp32u xhQuantIntra_H263_C1I(Ipp16s* pSrcDst,Ipp32s QP,Ipp32s* pCount);
************************************************************/
#if 0
_xhQuantIntra_H263_C1I:
[--sp]=(r7:4,p5:3);
[--sp]=rets;
l0=0;
l1=0;
l2=0;
i0=r0;
i1=r0;
i2.l=_multipliers_H263;
i2.h=_multipliers_H263;
m0=2;
r4=r1<<2;
m2=r4;
i2+=m2;
i0+=m0;
i1+=m0;//to start from ac coeff
p2 = r2;
r2 = 0;
p0=63;
r0=[i2];//mult
r4.l=w[i0++];
lsetup (ac_loop_h263_intra_start,ac_loop_h263_intra_end)lc0=p0;
ac_loop_h263_intra_start:
r4=r4.l(x);
r5=abs r4;
r5*=r0;
r5>>=16;//_SCALEBITS_H263
r2 = r2 | r5;
cc=bittst(r4,31);
r6=-r5;
if !cc r6=r5;
w[i1++]=r6.l;
ac_loop_h263_intra_end:
r4.l=w[i0++];
cc = r2;
r2 = 1;
r0 = -1;
if !cc r2 = r0;
[p2] = r2;
rets=[sp++];
(r7:4,p5:3)=[sp++];
_xhQuantIntra_H263_C1I.end:
rts;
#else
_xhQuantIntra_H263_C1I:
[--sp]=(r7:4,p5:3);
[--sp]=rets;
l0=0;
l1=0;
l2=0;
i0=r0;
b0=r0; //store base addr;
i1=r0;
r4=r1<<2;
m2=r4;
i2.l=_multipliers_H263_P;
i2.h=_multipliers_H263_P;
p5=r2;//the address of pcount
r2 = 0;
i2+=m2;
r0=[i2];//mult
p0=32;
r4=[i0++];
r7=r4.l(z); //store dc;
r4.l = 0;
lsetup (ac_loop_h263_intra_start,ac_loop_h263_intra_end) lc0=p0;
ac_loop_h263_intra_start:
r5=abs r4(v);
r5.l=r5.l*r0.l, r5.h=r5.h*r0.h(tfu);
r2 = r2 | r5;
r1 = r5.l(z);
r5.l = 0;
r1.h=r1.l=sign(r4.h)*r1.h+sign(r4.l)*r1.l;
r5.h=r5.l=sign(r4.h)*r5.h+sign(r4.l)*r5.l;
r6 = pack(r5.l,r1.l);
ac_loop_h263_intra_end:
[i1++]=r6 || r4=[i0++];
i0 = b0;
w[i0] = r7.l; //restore dc;
cc = r2;
r2 = 1;
r0 = -1;
if !cc r2=r0;
[p5]=r2; //返回nzCount,只需知道它是否为正数
rets=[sp++];
(r7:4,p5:3)=[sp++];
_xhQuantIntra_H263_C1I.end:
rts;
#endif
#endif //end of intra quant func selection;
/***********************************************************
*****************quant_h263_inter***************************
performance:
ASM C
cycle count:1864 7249
************************************************************/
/*********************
_xhQuant_H263_C1I:
[--sp]=(r7:4,p5:3);
[--sp]=i0;
[--sp]=l0;
[--sp]=i1;
[--sp]=l1;
[--sp]=i2;
[--sp]=l2;
[--sp]=i3;
[--sp]=l3;
[--sp]=rets;
l0=0;
i0=r0;
b0=r0;
l1=0;
i1=r0;
l2=0;
r4=r1<<2;
m2=r4;
i2.l=_multipliers_H263;
i2.h=_multipliers_H263;
i3.l=_mZigZagScan;
i3.h=_mZigZagScan;
p5=r2;//the address of pcount
r2 = -1;
i2+=m2;
r0=[i2];//mult
r7=r1<<1;
r3=r1>>1;
p0=64;
r4.l=w[i0++];
lsetup (acdc_loop_h263_inter_start,acdc_loop_h263_inter_end) lc0=p0;
acdc_loop_h263_inter_start:
r4=r4.l(x);
r5=abs r4;
r6=0;
r5=r5-r3;
cc=r5<r7;
if cc jump acdc_inter_zero;
r5*=r0;
r5>>=16;//_SCALEBITS_H263
r2=r2+r5;//sum
cc=bittst(r4,31);
r6=-r5;
if !cc r6=r5;
acdc_inter_zero:
w[i1++]=r6.l;
acdc_loop_h263_inter_end:
r4.l=w[i0++];
i1=p5; **************/
/*
p3=i3;
//p3+=1; inter from the dc
p0=64;
r7=b[p3++](z);
r3=-1;//in order to conform with the ippi
r6=b0;
lsetup(pcount_inter_start,pcount_inter_end)lc0=p0;
pcount_inter_start:
r4=r7<<1;
r4=r6+r4;
i0=r4;
r5.l=w[i0];
r5=r5.l(x);
cc=r5;
if cc r3=r7;
pcount_inter_end:
r7=b[p3++](z);
*/
/***********************************
[i1]=r2;
// r0=r2;//return(sum)
rets=[sp++];
l3=[sp++];
i3=[sp++];
l2=[sp++];
i2=[sp++];
l1=[sp++];
i1=[sp++];
l0=[sp++];
i0=[sp++];
(r7:4,p5:3)=[sp++];
_xhQuant_H263_C1I.end:
rts;
*******************************/
/******************************
//modified by gary, 2007-06-08
_xhQuant_H263_C1I:
[--sp]=(r7:4,p5:3);
[--sp]=rets;
l0=0;
l1=0;
l2=0;
// l3=0;
i0=r0;
// b0=r0;
i1=r0;
r4=r1<<2;
m2=r4;
i2.l=_multipliers_H263;
i2.h=_multipliers_H263;
// i3.l=_mZigZagScan;
// i3.h=_mZigZagScan;
p5=r2;//the address of pcount
r2 = -1;
i2+=m2;
r0=[i2];//mult
// r7=r1<<1;
r3=r1>>1;
p0=64;
r4.l=w[i0++];
lsetup (acdc_loop_h263_inter_start,acdc_loop_h263_inter_end) lc0=p0;
acdc_loop_h263_inter_start:
r4=r4.l(x);
r5=abs r4;
// r6=0;
r5=r5-r3;
// cc=r5<r7; //要注意r5可能为负数.
// if cc jump acdc_inter_zero;
r5=abs r5;
r5*=r0;
r5>>=16;//_SCALEBITS_H263
r2=r2+r5;//nzCount, 因为只需要得到nzCount的正负值, 所以可以这样简化来做.
cc=bittst(r4,31);
r6=-r5;
if !cc r6=r5;
// acdc_inter_zero:
acdc_loop_h263_inter_end:
w[i1++]=r6.l || r4.l=w[i0++];
// i1=p5;
// [i1]=r2;
[p5]=r2;
// r0=r2;//return(sum)
rets=[sp++];
(r7:4,p5:3)=[sp++];
_xhQuant_H263_C1I.end:
rts;
******************************/
//modified by gary, 2007-06-14
//553 -- 756 cycles
_xhQuant_H263_C1I:
[--sp]=(r7:4,p5:3);
[--sp]=rets;
l0=0;
l1=0;
l2=0;
i0=r0;
i1=r0;
r4=r1<<2;
m2=r4;
i2.l=_multipliers_H263_P;
i2.h=_multipliers_H263_P;
p5=r2;//the address of pcount
// r2.l = -1;
r2 = 0;
i2+=m2;
r0=[i2];//mult
// r7=r1<<1;
r7=r1>>1;
r3=r7<<16;
r3=r3+r7;
p0=32;
r4=[i0++];
lsetup (acdc_loop_h263_inter_start,acdc_loop_h263_inter_end) lc0=p0;
acdc_loop_h263_inter_start:
r5=abs r4(v);
r5=r5-|-r3;
r5=abs r5(v);
r5.l=r5.l*r0.l, r5.h=r5.h*r0.h(tfu);
// r5>>=16;//_SCALEBITS_H263
// r2.l=r2.l+r5.l(s);//nzCount, 因为只需要得到nzCount的正负值, 所以可以这样简化来做.
// r2.l=r2.l+r5.h(s);
r2 = r2 | r5;
r1 = r5.l(z);
r5.l = 0;
r1.h=r1.l=sign(r4.h)*r1.h+sign(r4.l)*r1.l;
r5.h=r5.l=sign(r4.h)*r5.h+sign(r4.l)*r5.l;
r6 = pack(r5.l,r1.l);
acdc_loop_h263_inter_end:
[i1++]=r6 || r4=[i0++];
cc = r2;
r2 = 0;
r0 = -1;
if !cc r2=r0;
// r2 = r2.l(x);
[p5]=r2; //返回nzCount,只需知道它是否为负数
rets=[sp++];
(r7:4,p5:3)=[sp++];
_xhQuant_H263_C1I.end:
rts;
/****************************
_xhQuant_H263_C1I:
[--sp]=(r7:4,p5:3);
[--sp]=rets;
l0=0;
l1=0;
l2=0;
l3=0;
i0=r0;
i1=r0;
r4=r1<<2;
m2=r4;
r0+=4;
i3=r0;
m0=8;
i2.l=_multipliers_H263_P;
i2.h=_multipliers_H263_P;
p5=r2;//the address of pcount
// r2.l = -1;
r2 = 0;
i2+=m2;
r0=[i2];//mult
i2=i3;
// r7=r1<<1;
r7=r1>>1;
r3=r7<<16;
r3=r3+r7;
p0=16;
r4=[i0++m0] || r6=[i2++m0];
lsetup (acdc_loop_h263_inter_start,acdc_loop_h263_inter_end) lc0=p0;
acdc_loop_h263_inter_start:
r5=abs r4(v);
r7=abs r6(v);
r5=r5-|-r3;
r7=r7-|-r3;
r5=abs r5(v);
r7=abs r7(v);
r5.l=r5.l*r0.l, r5.h=r5.h*r0.h(tfu);
r7.l=r7.l*r0.l, r7.h=r7.h*r0.h(tfu);
// r5>>=16;//_SCALEBITS_H263
// r2.l=r2.l+r5.l(s);//nzCount, 因为只需要得到nzCount的正负值, 所以可以这样简化来做.
// r2.l=r2.l+r5.h(s);
r2 = r2 | r5;
r1 = r5.l(z);
r5.l = 0;
r1.h=r1.l=sign(r4.h)*r1.h+sign(r4.l)*r1.l;
r5.h=r5.l=sign(r4.h)*r5.h+sign(r4.l)*r5.l;
r2 = r2 | r7;
r5 = pack(r5.l,r1.l);
r1 = r7.l(z);
r7.l = 0;
r1.h=r1.l=sign(r6.h)*r1.h+sign(r6.l)*r1.l;
r7.h=r7.l=sign(r6.h)*r7.h+sign(r6.l)*r7.l || [i1++m0]=r5 || r4=[i0++m0];
r7 = pack(r7.l,r1.l);
acdc_loop_h263_inter_end:
[i3++m0]=r7 || r6=[i2++m0];
cc = r2;
r2 = 0;
r0 = -1;
if !cc r2=r0;
// r2 = r2.l(x);
[p5]=r2; //返回nzCount,只需知道它是否为负数
rets=[sp++];
(r7:4,p5:3)=[sp++];
_xhQuant_H263_C1I.end:
rts;
*************************/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -