📄 get_interpolat_uv.asm
字号:
#define IPPVC_MC_APX_FF 0
#define IPPVC_MC_APX_FH 4
#define IPPVC_MC_APX_HF 8
#define IPPVC_MC_APX_HH 12
.global _get_interpolate_uv;
.section L1_code;
/**************************************************************************
void get_interpolate_uv(Ipp8u *pSrc,int srcStep,Ipp8u *pDst,Ipp32s mcType);
***************************************************************************/
_get_interpolate_uv:
[--sp]=(r7:4,p5:3);
[--sp]=rets;
l3=0;
L2=0;
l1=0;
L0=0;
I3=R2;//DST
i0=r0;//the address of pSrc
m3=r0;//the address of pSrc
b3=r0;
p5=r1;//the srcCurStep
r6=[sp+44];//mcType
m1=r1;
R7=R1;
p0=8;
//M2=12;// in order to match Step =16;
/***********************IPPVC_MC_APX_FF**********************/
cc=r6==IPPVC_MC_APX_FF;
if !cc jump FH_8x8;
R7+=-8;
m0=R7;//refstep modify
i1=i0;
lsetup (ff_8x8_start,ff_8x8_end)lc0=p0;
disalgnexcpt||r0=[i0++]||r2=[i1++];
ff_8x8_start:
disalgnexcpt||r1=[i0++]||r3=[i1++];
r6=byteop1p(r1:0,r3:2)||r0=[i0++m0]||r2=[i1++m0];
r7=byteop1p(r1:0,r3:2)(r)||[i3++]=r6||r0=[i0++];
ff_8x8_end:
disalgnexcpt||[i3++]=r7||r2=[i1++];
jump interpolate_8x8_end;
/**********************IPPVC_MC_APX_FH***********************/
FH_8x8:
r5=IPPVC_MC_APX_FH;
cc=r6==r5;
if !cc jump HF_8x8;
m1=r7;//ref step
r7+=-8;
m0=r7;//refmodify
lsetup (fh_8x8_start,fh_8x8_end)lc0=p0;
i1=i0;
i1+=m1;
disalgnexcpt||r0=[i0++]||r2=[i1++];
fh_8x8_start:
disalgnexcpt||r1=[i0++]||r3=[i1++];
r6=byteop1p(r1:0,r3:2)||r0=[i0++m0]||r2=[i1++m0];
r7=byteop1p(r1:0,r3:2)(r)||[i3++]=r6||r0=[i0++];
fh_8x8_end:
disalgnexcpt||[i3++]=r7||r2=[i1++];
jump interpolate_8x8_end;
/************************IPPVC_MC_APX_HF************************/
HF_8x8:
r5=IPPVC_MC_APX_HF;
cc=r6==r5;
if !cc jump HH_8x8;
m1=1;//to make the align8
r7+=-8;
m0=r7;
i1=i0;
i1+=m1;
r6=3; //这一段主要是为I0的后两位全为1
r7=i0; //考虑的。因为此时I0+1后,后两位
r7=r7&r6; //全为0,此时所选的寄存器,就不是
cc=r7==r6; //R3,而是R2,故分开考虑。
if cc jump byte_align_HF;
lsetup(hf_8x8_start,hf_8x8_end)lc0=p0;
i2=i0;
disalgnexcpt||r0=[i0++]||r2=[i2++];
hf_8x8_start:
disalgnexcpt||r1=[i0++]||r3=[i2++];
r6=byteop1p(r1:0,r3:2)||r0=[i0++m0]||r2=[i2++m0];
r7=byteop1p(r1:0,r3:2)(r)||[i3++]=r6||r0=[i0++];
hf_8x8_end:
disalgnexcpt||[i3++]=r7||r2=[i2++];
jump interpolate_8x8_end;
byte_align_HF: //特殊情况
lsetup(bytealign_start,bytealign_end) lc0=p0;
i2=i0;
i2+=4;
disalgnexcpt||r0=[i0++]||r2=[i2++];
bytealign_start:
disalgnexcpt||r1=[i0++]||r3=[i2++];
r6=byteop1p(r1:0,r3:2)||r0=[i0++m0]||r2=[i2++m0];
r7=byteop1p(r1:0,r3:2)(r)||[i3++]=r6||r0=[i0++];
bytealign_end:
disalgnexcpt||[i3++]=r7||r2=[i2++];
jump interpolate_8x8_end;
/************************IPPVC_MC_APX_HH************************/
HH_8x8:
/*
m1=1;
m2=r7;//src step
r7+=-8;
m0=r7;
i1=i0;
r6=3; //这一段主要是为I0的后两位全为1
r7=i0; //考虑的。因为此时I0+1后,后两位
r7=r7&r6; //全为0,此时所选的寄存器,就不是
cc=r7==r6; //R3,而是R2,故分开考虑。
if cc jump byte_align_HH;
// p1=2;
lsetup(hh_8x8_start,hh_8x8_end)lc0=p0;
i2=i0;
i2+=m2;
disalgnexcpt||r0=[i0++]||r2=[i2++];
hh_8x8_start:
disalgnexcpt||r1=[i0++]||r3=[i2++];
r6=byteop2p(r1:0,r3:2)(rndl);
i0+=m1;
i1+=m1;
r7=byteop2p(r1:0,r3:2)(rndh);
i0-=m1;
i1-=m1;
r7=r7+r6;
disalgnexcpt||r0=[i0++m0];
disalgnexcpt||[i3++]=r7||r2=[i2++m0];
r6=byteop2p(r1:0,r3:2)(rndl,r);
i1+=m1;
i0+=m1;
r7=byteop2p(r1:0,r3:2)(rndh,r);
i1-=m1;
i0-=m1;
disalgnexcpt||r0=[i0++];
r7=r7+r6;
hh_8x8_end:
disalgnexcpt||[i3++]=r7||r2=[i2++];
jump interpolate_8x8_end;
byte_align_HH://特殊情况
lsetup(bytealign_hh_start,bytealign_hh_end)lc0=p0;
i2=i0;
i2+=m2;
disalgnexcpt||r0=[i0++]||r2=[i2++];
bytealign_hh_start:
disalgnexcpt||r1=[i0++]||r3=[i2++];
r6=byteop2p(r1:0,r3:2)(rndl);
i0+=m1;
i1+=m1;
r7=byteop2p(r1:0,r3:2)(rndh,r);
i0-=m1;
i1-=m1;
r7=r7+r6;
disalgnexcpt||r0=[i0++m0];
disalgnexcpt||[i3++]=r7||r2=[i2++m0];
r6=byteop2p(r1:0,r3:2)(rndl,r);
i0+=m1;
i1+=m1;
r7=byteop2p(r1:0,r3:2)(rndh);
i0-=m1;
i1-=m1;
disalgnexcpt||r0=[i0++];
r7=r7+r6;
bytealign_hh_end:
disalgnexcpt||[i3++]=r7||r2=[i2++];
//jump interpolate_8x8_end;
*/
M0 = 7;
M3 = -3 (X);
I2 = R0; // address of the top-left pel in best match reference block
R0 = R0 + R1 (S); //next row
I1 = R0; // Address of best match - stride
I0 = R0;
R1 += -5; //-9
M1 = R1;
P2 = 8; //9
LSETUP(AVGHV_RND_ST, AVGHV_RND_END) LC0 = P2;
DISALGNEXCPT || R0 = [I1++] || R2 = [I2++];
DISALGNEXCPT || R1 = [I1++M3] || R3 = [I2++M3];
R7 = BYTEOP2P(R1:0,R3:2) (RNDL) || R0 = [I1++M0];
AVGHV_RND_ST:
DISALGNEXCPT || I0 += M3 || R2 = [I2++M0];
R6 = BYTEOP2P(R1:0,R3:2) (RNDH) || R0 = [I1++M3] || R2 = [I2++M3];
R7 = R6 + R7 (NS) || I0 -= M3;
R7 = BYTEOP2P(R1:0,R3:2) (RNDL, R) || [I3++] = R7 || R1 = [I1++M1];
DISALGNEXCPT || I0 += M3 || R3 = [I2++M1];
R6 = BYTEOP2P(R1:0,R3:2) (RNDH, R) || R0 = [I1++] || R2 = [I2++];
R7 = R6 + R7 (NS) || I0 -= M3;
DISALGNEXCPT || R1 = [I1++M3] || R3 = [I2++M3];
AVGHV_RND_END:
R7 = BYTEOP2P(R1:0,R3:2) (RNDL) || [I3++] = R7 || R0 = [I1++M0];
interpolate_8x8_end:
RETS=[SP++];
(R7:4,P5:3)=[SP++];
_get_interpolate_uv.end:
rts;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -