📄 quant_mpeg4.asm
字号:
.global _xhQuantInvIntra_MPEG4_16s_C1I;
.global _xhQuantInv_MPEG4_16s_C1I;
.global _xhQuantIntra_MPEG4_16s_C1I;
.global _xhQuant_MPEG4_16s_C1I;
.extern _mZigZagScan;
.section L1_data_b;
.global _multipliers_MPEG;
.var _multipliers_MPEG[32] =
{
0x0, 0x010001,0x8001, 0x5556,
0x4001, 0x3334, 0x2aab, 0x2493,
0x2001, 0x1c72, 0x199a, 0x1746,
0x1556, 0x13b2, 0x124a, 0x1112,
0x1001, 0x0f10, 0x0e39, 0x0d7a,
0x0ccd, 0x0c31, 0x0ba3, 0x0b22,
0x0aab, 0x0a3e, 0x09d9, 0x097c,
0x0925, 0x08d4, 0x0889, 0x0843
};
.section L1_code;
/*************************************************************************
****QuantInvIntra_MPEG4_16s_C1I(Ipp16s* pSrcDst,int QP,Ipp16s*pQPMatrix)**
Here the pQPMatrix is Ipp16s,not char .
performance:
ASM C
cycle count: 1497 5467
*************************************************************************/
_xhQuantInvIntra_MPEG4_16s_C1I:
[--sp]=(r7:4,p5:3);
[--sp]=i0;
[--sp]=l0;
[--sp]=i1;
[--sp]=l1;
[--sp]=i2;
[--sp]=l2;
[--sp]=rets;
i0=r0;
m0=2;
l2=0;
i2=r2;
i2+=m0;
p3=i2;
l0=0;
i0+=m0;
i1=i0;
mnop||r0.l=w[i1++]||r3=w[p3++](x);
p0=63;
lsetup (ac_loop_mpeg4_intra_inv_start,ac_loop_mpeg4_intra_inv_end)lc0=p0;
ac_loop_mpeg4_intra_inv_start:
r0=r0.l(x);
cc=r0;
if !cc jump ac_loop_store;
r7=abs r0;
r7*=r3;
r7*=r1;
r7>>=3;
cc=bittst(r0,31);
r0=r7;
if cc jump negativevalue;
r6=2047;
cc=r7<r6;
if !cc r0=r6;
jump ac_loop_store;
negativevalue:
r0=-r7;
r6=-2048;
r5=2048;
cc=r7<r5;
if !cc r0=r6;
ac_loop_store:
w[i0++]=r0.l;
ac_loop_mpeg4_intra_inv_end:
mnop||r0.l=w[i1++]||r3=w[p3++](x);
rets=[sp++];
l2=[sp++];
i2=[sp++];
l1=[sp++];
i1=[sp++];
l0=[sp++];
i0=[sp++];
(r7:4,p5:3)=[sp++];
_xhQuantInvIntra_MPEG4_16s_C1I.end:
rts;
/**********************************************************************
***QuantInv_MPEG4_16s_C1I(Ipp16s* pSrcDst,int QP, Ipp16s* pQPMatrix)**
Here the pQPMatrix is Ipp16s,not char .
performance:
ASM C
cycle count: 1034 3701
***********************************************************************/
_xhQuantInv_MPEG4_16s_C1I:
[--sp]=(r7:4,p5:3);
[--sp]=i0;
[--sp]=l0;
[--sp]=i1;
[--sp]=l1;
[--sp]=i2;
[--sp]=l2;
[--sp]=rets;
p3=r2;
l0=0;
i0=r0;
l1=0;
i1=r0;
r4=0;//sum
r0.l=w[i0++]||r3=w[p3++](x);
p0=64;
lsetup(acdc_loop_mpeg4_inter_inv_start,acdc_loop_mpeg4_inter_inv_end)lc0=p0;
acdc_loop_mpeg4_inter_inv_start:
r0=r0.l(x);
cc=r0;
if !cc jump acdc_zero;
r2=abs r0;
r2<<=1;
r2+=1;
r2*=r1;
r2*=r3;
r2>>=4;
cc=bittst(r0,31);
r0=r2;
if cc jump acdc_value_neg;
r7=2047;
cc=r2<r7;
if !cc r0=r7;
jump acdc_zero;
acdc_value_neg:
r0=-r2;
r7=2048;
r6=-2048;
cc=r2<r7;
if !cc r0=r6;
acdc_zero:
w[i1++]=r0.l;
r4=r4^r0;
acdc_loop_mpeg4_inter_inv_end:
r0.l=w[i0++]||r3=w[p3++](x);
cc=bittst(r4,0);
if cc jump undo_mismatch;
m0=2;
r5=1;
i1-=m0;
r0.l=w[i1];
r0=r0.l(x);
r0=r0^r5;
w[i1]=r0.l;
undo_mismatch:
rets=[sp++];
l2=[sp++];
i2=[sp++];
l1=[sp++];
i1=[sp++];
l0=[sp++];
i0=[sp++];
(r7:4,p5:3)=[sp++];
_xhQuantInv_MPEG4_16s_C1I.end:
rts;
/*****************************************************************
*******QuantIntra_MPEG4_16s_C1I,(
Ipp16s* pSrcDst,
Ipp32s QP,
const Ipp32f* pQPMatrix,
Ipp32s* pCount)
we use the Ipp8u mDefaultIntraQuantMatrix instead of the Ipp32f pQPMatrix,
because when use the pQPMatrix we will do multiply the fraction and the
integer,this is difficult.so we still use theIpp8u mDefaultIntraQuantMatrix
performence:
ASM C
cycle count: 3231 10359
****************************************************************/
_xhQuantIntra_MPEG4_16s_C1I:
[--sp]=(r7:4,p5:3);
[--sp]=i0;
[--sp]=l0;
[--sp]=i1;
[--sp]=l1;
[--sp]=i3;
[--sp]=l3;
[--sp]=rets;
i0=r0;
m0=2;
r2+=1;
l0=0;
i0+=m0;//to start from the ac coeff
b0=r0;//reserve for the calculate the pcount
l1=0;
i1=i0;
p3=r2;//pQPMatrix
l3=0;
i3.l=_mZigZagScan;
i3.h=_mZigZagScan;
r7.l=_multipliers_MPEG;
r7.h=_multipliers_MPEG;
r2=r1<<2;
r7=r2+r7;
p4=r7;
r2=[p4];//mult
r5=3;
p5=[sp+12+56];//for the pcount,you could use [p5]=pcount to store the value
//for the pcount
r1*=r5;//multiply the VM18P
r1+=2;
r1>>=2;//quantd
p1=16;
r0.l=w[i0++]||r5=b[p3++](z);//attention this only a byte ,so offset is 1.
p0=63;
lsetup(ac_loop_mpeg4_intra_start,ac_loop_mpeg4_intra_end)lc0=p0;
ac_loop_mpeg4_intra_start:
r0=r0.l(x);
cc=r0;
if !cc jump ac_zero;
r4=abs r0;
r4<<=4;
r6=r5>>1;
r4=r4+r6;
divs(r4,r5);
lsetup(div_start,div_end)lc1=p1;
div_start:
div_end:
divq(r4,r5);
r4=r4.l(x);
r4=r4+r1;
//r4*=r5;//(level << 4) * pQPMatrix[i]
//缺0.5
r4*=r2;
r4>>=17;//SCALEBITS_MPEG
cc=bittst(r0,31);
r0=r4;
r4=-r4;
if cc r0=r4;
ac_zero:
w[i1++]=r0.l;
ac_loop_mpeg4_intra_end:
r0.l=w[i0++]||r5=b[p3++](z);
//缺pcount的计算
i1=p5;//pcount
//w[i1]=-1;
p3=i3;
p3+=1;
p0=63;
r7=b[p3++](z);
r3=-1;//in order to conform with the ippi
r6=b0;
lsetup(pcount_start,pcount_end)lc0=p0;
pcount_start:
r4=r7<<1;
r4=r6+r4;
i0=r4;
r5.l=w[i0];
r5=r5.l(x);
cc=r5;
if cc r3=r7;
pcount_end:
r7=b[p3++](z);
[i1]=r3;
r0=0;
rets=[sp++];
l3=[sp++];
i3=[sp++];
l1=[sp++];
i1=[sp++];
l0=[sp++];
i0=[sp++];
(r7:4,p5:3)=[sp++];
_xhQuantIntra_MPEG4_16s_C1I.end:
rts;
/************************************************************************
*************Quant_MPEG4_16s_C1I(
Ipp16s* pSrcDst,
Ipp32s QP,
const Ipp32f* pQPMatrix,
Ipp32s* pCount)**********************************
we use the Ipp8u mDefaultIntraQuantMatrix instead of the Ipp32f pQPMatrix,
because when use the pQPMatrix we will do multiply the fraction and the
integer,this is difficult.so we still use theIpp8u mDefaultIntraQuantMatrix
performence:
ASM C
cycle count: 2193 7760
**************************************************************************/
_xhQuant_MPEG4_16s_C1I:
[--sp]=(r7:4,p5:3);
[--sp]=i0;
[--sp]=l0;
[--sp]=i1;
[--sp]=l1;
[--sp]=i3;
[--sp]=l3;
[--sp]=rets;
i0=r0;
l0=0;
b0=r0;
l1=0;
i1=i0;
p3=r2;//pQPMatrix
l3=0;
i3.l=_mZigZagScan;
i3.h=_mZigZagScan;
r7.l=_multipliers_MPEG;
r7.h=_multipliers_MPEG;
r2=r1<<2;
r7=r2+r7;
p4=r7;
p5=[sp+12+56];//for the pcount
r1=0;//sum
r2=[p4];//mult
r0.l=w[i0++]||r5=b[p3++](z);
p0=64;
p1=16;
lsetup(acdc_loop_mpeg4_inter_start,acdc_loop_mpeg4_inter_end)lc0=p0;
acdc_loop_mpeg4_inter_start:
r0=r0.l(x);
cc=r0;
if !cc jump acdc_inter_zero;
r4=abs r0;
r4<<=4;
r6=r5>>1;
r4=r4+r6;
divs(r4,r5);
lsetup(div_inter_start,div_inter_end)lc1=p1;
div_inter_start:
div_inter_end:
divq(r4,r5);
r4=r4.l(x);
// r4*=r5;//(level << 4)*pQPMatrix[i] 缺0.5
r4*=r2;
r4>>=17; //SCALEBITS_MPEG
r1=r1+r4;//sum
cc=bittst(r0,31);
r0=r4;
r4=-r4;
if cc r0=r4;
/* if !cc jump acdc_inter_zero;
r0=-r4; */
acdc_inter_zero:
w[i1++]=r0.l;
acdc_loop_mpeg4_inter_end:
r0.l=w[i0++]||r5=b[p3++](z);
//缺pcount的计算
i1=p5;//pcount
//w[i1]=-1;
p3=i3;
p0=64;
r7=b[p3++](z);
r3=-1;//in order to conform with the ippi
r6=b0;
lsetup(pcount_inter_start,pcount_inter_end)lc0=p0;
pcount_inter_start:
r4=r7<<1;
r4=r6+r4;
i0=r4;
r5.l=w[i0];
r5=r5.l(x);
cc=r5;
if cc r3=r7;
pcount_inter_end:
r7=b[p3++](z);
[i1]=r3;
r0=r1;
rets=[sp++];
l3=[sp++];
i3=[sp++];
l1=[sp++];
i1=[sp++];
l0=[sp++];
i0=[sp++];
(r7:4,p5:3)=[sp++];
_xhQuant_MPEG4_16s_C1I.end:
rts;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -