⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 reconblock.asm

📁 adi bf533视频编码程序
💻 ASM
字号:
.section L1_code;
//.global _xhReconBlockHalfpel_MPEG4_8u;
.global _xhReconBlockHalfpel_MPEG4_8u_MOD;
/*****************************************************************************************
ippiReconBlockHalfpel_MPEG4_8u (const Ipp8u* pSrc, int srcStep,
       Ipp16s pResidue[64], Ipp8u* pDst, int dstStep, const IppMotionVector* pMV,
       int roundControl)
       
       performance:
                            ASM        C
                  cycle:    728       10750  
*******************************************************************************************/
/*
_xhReconBlockHalfpel_MPEG4_8u:

                [--sp]=(r7:4,p5:3);
                [--sp]=i0;
                [--sp]=l0;
                [--sp]=i1;
                [--sp]=l1;
                [--sp]=i2;
                [--sp]=l2; 
                [--sp]=i3;
                [--sp]=l3;
                [--sp]=rets;
                
                b0=r1;//srcstep
//liu move here
                r7=[sp+20+64];//PMV
                i3=r7;
 
                
                l2=0;
                l1=0;
                l0=0;
                l3=0;
//liu                r7=[sp+20+64];//PMV
//                i3=r7;
                
                r7.l=w[i3++];//pMV->dx
                r6.l=w[i3++];//pMV->dy
                r7=r7.l(x);
                r6=r6.l(x);
                r5=r7>>>1;//mv_off_x
                r4=r6>>>1;//mv_off_y
                r4*=r1;//mv_off_y*srcstep
                r4=r4+r5;//mv_off_y*srcstep+mv_off_x
                r0=r0+r4;//point to pstart
                
                
                
                
                r1=[sp+64+12]; //pDst
                i3=r1;
                i0=r0;
                i1=r0;//psrc
                r4=b0;//srcstep
                r5=[sp+16+64];//dststep
                b1=r5;
                p5=r2;// point to the Presidual
                p0=8;
                
                
                cc=bittst(r7,0);//mv_off_x
                if cc jump HF_HH8;
                cc=bittst(r6,0);
                if cc jump FH8;
                   
         
         
              
              r5+=-4;
              r4+=-8;
              m0=r4;//src step modify
              m1=r5;//dst step  modify
              i1=i0;
             lsetup (ff_8x8_start,ff_8x8_end)lc0=p0;
                disalgnexcpt||r0=[i0++]||r2=[i1++];
      
              ff_8x8_start:
                   disalgnexcpt||r1=[i0++]||r3=[i1++];
                   r6=byteop1p(r1:0,r3:2)||r0=[i0++m0]||r2=[i1++m0];
                   r7=byteop1p(r1:0,r3:2)(r)||[i3++]=r6||r0=[i0++];     
              ff_8x8_end:
                     disalgnexcpt||[i3++m1]=r7||r2=[i1++];
                            
             jump halfcopyblock_end;
             
            FH8:
               m1=r4;//src step       
               r4+=-8;
               m0=r4;//src modify
               r5+=-4;
               m2=r5;//dst modify
               lsetup (fh_8x8_start,fh_8x8_end)lc0=p0;
                   i1=i0;
                   i1+=m1;
                  disalgnexcpt||r0=[i0++]||r2=[i1++];
               fh_8x8_start:
                   disalgnexcpt||r1=[i0++]||r3=[i1++]; 
                   r6=byteop1p(r1:0,r3:2)||r0=[i0++m0]||r2=[i1++m0];
                   r7=byteop1p(r1:0,r3:2)(r)||[i3++]=r6||r0=[i0++];       
               fh_8x8_end:
                   disalgnexcpt||[i3++m2]=r7||r2=[i1++];         
               jump  halfcopyblock_end;
                
             HF_HH8:
                 cc=bittst(r6,0);
                 if cc jump HH8;
                 m1=1;//to make the align8    
                 r4+=-8;
                 m0=r4;
                 i1=i0;
                 i1+=m1;
                 r5+=-4;
                 m3=r5;
                 r6=3;       //这一段主要是为I0的后两位全为1
                 r7=i0;      //考虑的。因为此时I0+1后,后两位
                 r7=r7&r6;   //全为0,此时所选的寄存器,就不是
                 cc=r7==r6;  //R3,而是R2,故分开考虑。
                 if cc jump byte_align_HF;
                lsetup(hf_8x8_start,hf_8x8_end)lc0=p0;
                i2=i0;
                disalgnexcpt||r0=[i0++]||r2=[i2++];
                hf_8x8_start:
 
                   
                   disalgnexcpt||r1=[i0++]||r3=[i2++];
                   r6=byteop1p(r1:0,r3:2)||r0=[i0++m0]||r2=[i2++m0];
                   r7=byteop1p(r1:0,r3:2)(r)||[i3++]=r6||r0=[i0++];
                   
              hf_8x8_end:     
                   disalgnexcpt||[i3++m3]=r7||r2=[i2++];                 
                jump halfcopyblock_end;
                
             byte_align_HF: //特殊情况
                 lsetup(bytealign_start,bytealign_end) lc0=p0;
                 i2=i0;
                 i2+=4;
                  disalgnexcpt||r0=[i0++]||r2=[i2++];
                
                bytealign_start:
                    disalgnexcpt||r1=[i0++]||r3=[i2++];
                    r6=byteop1p(r1:0,r3:2)||r0=[i0++m0]||r2=[i2++m0];       
                    r7=byteop1p(r1:0,r3:2)(r)||[i3++]=r6||r0=[i0++];
                bytealign_end:
                    disalgnexcpt||[i3++m3]=r7||r2=[i2++];
                    jump halfcopyblock_end;
                    
              HH8:
               r5+=-4;
               m3=r5; 
               m1=1;
               m2=r4;//src step
               r4+=-8;
               m0=r4;
               i1=i0;
                 r6=3;       //这一段主要是为I0的后两位全为1
                 r7=i0;      //考虑的。因为此时I0+1后,后两位
                 r7=r7&r6;   //全为0,此时所选的寄存器,就不是
                 cc=r7==r6;  //R3,而是R2,故分开考虑。
               if cc jump byte_align_HH;

              // p1=2;
               lsetup(hh_8x8_start,hh_8x8_end)lc0=p0;          
                    i2=i0;
                    i2+=m2;
                    disalgnexcpt||r0=[i0++]||r2=[i2++];
               hh_8x8_start:
                    
                    disalgnexcpt||r1=[i0++]||r3=[i2++];
            
                   
                    r6=byteop2p(r1:0,r3:2)(rndl);
                    i0+=m1;
                    i1+=m1;
                    r7=byteop2p(r1:0,r3:2)(rndh);
                    i0-=m1;
                    i1-=m1;
                    r7=r7+r6;
                    disalgnexcpt||r0=[i0++m0];
                    disalgnexcpt||[i3++]=r7||r2=[i2++m0];
                    r6=byteop2p(r1:0,r3:2)(rndl,r);
                    i1+=m1;
                    i0+=m1;
                    r7=byteop2p(r1:0,r3:2)(rndh,r);
                    i1-=m1;
                    i0-=m1;                    
                    disalgnexcpt||r0=[i0++];
                    r7=r7+r6;
              hh_8x8_end:
                   disalgnexcpt||[i3++m3]=r7||r2=[i2++];
                   jump  halfcopyblock_end;
             byte_align_HH://特殊情况
                   lsetup(bytealign_hh_start,bytealign_hh_end)lc0=p0;
                    i2=i0;
                    i2+=m2;
                    disalgnexcpt||r0=[i0++]||r2=[i2++];
                  bytealign_hh_start:
                     disalgnexcpt||r1=[i0++]||r3=[i2++];  
                     r6=byteop2p(r1:0,r3:2)(rndl);
                     i0+=m1;
                     i1+=m1;
                     
                     r7=byteop2p(r1:0,r3:2)(rndh,r);
                     i0-=m1;
                     i1-=m1;
                     r7=r7+r6;
                     disalgnexcpt||r0=[i0++m0];
                     disalgnexcpt||[i3++]=r7||r2=[i2++m0];
                     r6=byteop2p(r1:0,r3:2)(rndl,r);
                     i0+=m1;
                     i1+=m1;
                     r7=byteop2p(r1:0,r3:2)(rndh);
                     i0-=m1;
                     i1-=m1;
                     disalgnexcpt||r0=[i0++];
                     r7=r7+r6;
                  bytealign_hh_end:
                   disalgnexcpt||[i3++m3]=r7||r2=[i2++];
                   //jump  halfcopyblock_end;                          
                       
                 
  halfcopyblock_end:
                  r1=[sp+64+12]; //pDst
                  i0=r1;
 //liu                 p3=i0;
 //                 p4=i0;
                  i1=r1;
                 // r2=p5;//residual
                  i2=p5;
                  r7=b1;//the step of pDst
                  r7+=-9;
//liu move here
                  p3=i0;
                  p4=i0;
 
                  p2=r7;
                  p0=8;
                  p1=8;
                  lsetup(reconstruct_start,reconstruct_end)lc0=p0;
                      r0=b[p3++](z)||r2.l=w[i2++];
                  reconstruct_start:
                       lsetup(reconstruct1_start,reconstruct1_end)lc1=p1;
                      reconstruct1_start:
                        r2=r2.l(x);
                        r0=r0<<8;
                        r5=byteop3p(r3:2,r1:0)(lo);
                        r0=b[p3++](z)||r2.l=w[i2++];
                      reconstruct1_end:
                        b[p4++]=r5; 
                        p3=p3+p2;
                        p4=p4+p2;
                        r6=p4;
                        r6+=1;
                        p4=r6;
                 reconstruct_end:      
                       r0=b[p3++](z);
                       
                       
                  rets=[sp++];
                  l3=[sp++];
                  i3=[sp++];
                  l2=[sp++];
                  i2=[sp++];
                  l1=[sp++];
                  i1=[sp++];
                  l0=[sp++];
                  i0=[sp++];
                  (r7:4,p5:3)=[sp++];
                  rts;
_xhReconBlockHalfpel_MPEG4_8u.end:   
*/


#if 0
/*****************************************************************************
ippiReconBlockHalfpel_MPEG4_8u (const Ipp8u* pSrc, int srcStep,
       Ipp16s pResidue[64], Ipp8u* pDst, int dstStep, int roundControl)
*****************************************************************************/
_xhReconBlockHalfpel_MPEG4_8u_MOD:
                [--sp]=(r7:4,p5:3);
                [--sp]=i0;
                [--sp]=l0;
                [--sp]=i1;
                [--sp]=l1;
                [--sp]=i2;
                [--sp]=l2; 
                [--sp]=i3;
                [--sp]=l3;
                [--sp]=rets;
                
                L0=0;
                L1=0;
                I1=R0;//SRC
                P5=R1;//SRC Step
                R1+=-4;
                M1=R1;
                I2=R2;//Residue 
                R2=[SP+76];
                I0=R2;//DST
                R3=[SP+80];//DST Step
                R3+=-4;
                M0=R3;
                P0=8;
                
                LSETUP(reconstruct_mod_start,reconstruct_mod_end)LC0=P0;
                  R0=[I1++]||R4=[I2++];
                  R1=[I1++M1];
                  (R7,R6) = BYTEUNPACK R1:0||R5=[I2++];
                reconstruct_mod_start:
                    
                    R2=R4+|+R6(S)||R4=[I2++];
                    R3=R5+|+R7(S)||R5=[I2++];
                    R2 = BYTEPACK (R2,R3);
                    
                    (R7,R6) = BYTEUNPACK R1:0(R)||[I0++]=R2;
                    R2=R4+|+R6(S)||R4=[I2++]||R0=[I1++];
                    R3=R5+|+R7(S)||R5=[I2++]||R1=[I1++M1];
                    R2 = BYTEPACK (R2,R3);
                reconstruct_mod_end:
                    (R7,R6) = BYTEUNPACK R1:0||[I0++M0]=R2;
                  
                 rets=[sp++];
                  l3=[sp++];
                  i3=[sp++];
                  l2=[sp++];
                  i2=[sp++];
                  l1=[sp++];
                  i1=[sp++];
                  l0=[sp++];
                  i0=[sp++];
                  (r7:4,p5:3)=[sp++];
_xhReconBlockHalfpel_MPEG4_8u_MOD.END:
RTS; 

#else

#if 0          
/*****************************************************************************
ippiReconBlockHalfpel_MPEG4_8u (const Ipp8u* pSrc, int srcStep,
       Ipp16s pResidue[64], Ipp8u* pDst, int dstStep, int roundControl)
*****************************************************************************/
_xhReconBlockHalfpel_MPEG4_8u_MOD:
	[--sp]=(r7:4,p5:3);
	[--sp]=rets;
	
	R7=0;
	L0=R7;
	L1=R7;
	L2=R7;
	L3=R7;
	
	I1=R0;//SRC
	R1+=-4;	//src step;
	M1=R1;
	I2=R2;//Residue 
	R2+=4;
	I3=R2;
	M2=8;
	
	R2=[SP+44];
	I0=R2;//DST
	R3=[SP+48];//DST Step
	R3+=-4;
	M0=R3;
	P0=8;

	R4=[I2++M2] || R5=[I3++M2];
	R2=PACK(R5.L, R4.L) || R0=[I1++];
	LSETUP(reconstruct_mod_start,reconstruct_mod_end)LC0=P0;
	reconstruct_mod_start:
		R6=BYTEOP3P(R3:2, R1:0)(LO) || R1=[I1++M1];
		R2=PACK(R5.H, R4.H) || R4=[I2++M2] || R5=[I3++M2];
		R6<<=8;
		R7=BYTEOP3P(R3:2, R1:0)(HI) || R0=[I1++];
		R3=PACK(R5.L, R4.L);
		R7>>=8;
		R6=R6|R7;
		R6=BYTEOP3P(R3:2, R1:0)(LO,R) || [I0++]=R6;
		R3=PACK(R5.H, R4.H) || R4=[I2++M2] || R5=[I3++M2];
		R6<<=8;
		R7=BYTEOP3P(R3:2, R1:0)(HI,R) || R0=[I1++];
		R7>>=8;
		R6=R6|R7;
	reconstruct_mod_end:
		R2=PACK(R5.L, R4.L) || [I0++M0]=R6;
	
_xhReconBlockHalfpel_MPEG4_8u_MOD.END:
	rets=[sp++];
	(r7:4,p5:3)=[sp++];
	RTS;
#endif

/*****************************************************************************
ippiReconBlockHalfpel_MPEG4_8u (const Ipp8u* pSrc, int srcStep,
       Ipp16s pResidue[64], Ipp8u* pDst, int dstStep, int roundControl)
*****************************************************************************/
_xhReconBlockHalfpel_MPEG4_8u_MOD:
	[--sp]=(r7:4,p5:3);
	[--sp]=rets;
	
	R7=0;
	L0=R7;
	L1=R7;
	L2=R7;
//	L3=R7;
	
	I1=R0;//SRC
	R1+=-4;	//src step;
	M1=R1;
	I2=R2;//Residue 
//	R2+=4;
//	I3=R2;
//	M2=8;
	
	R2=[SP+44];
	I0=R2;//DST
	R3=[SP+48];//DST Step
	R3+=-4;
	M0=R3;
	P0=8;

	R2=[I2++] || R0=[I1++];
	(R1, R0)=BYTEUNPACK R1:0 || R3=[I2++];
	LSETUP(reconstruct_mod_start,reconstruct_mod_end)LC0=P0;
	reconstruct_mod_start:
		R0<<=8;
		R1<<=8;
		R6=BYTEOP3P(R3:2, R1:0)(LO) || R2=[I2++];
		R7=BYTEOP3P(R3:2, R1:0)(LO,R) || R3=[I2++] || R0=[I1++M1];
		(R1, R0)=BYTEUNPACK R1:0;
		R0<<=8;
		R1<<=8;
		R5=BYTEPACK(R6, R7);
		R6=BYTEOP3P(R3:2, R1:0)(LO) || [I0++]=R5 || R2=[I2++];
		R7=BYTEOP3P(R3:2, R1:0)(LO,R) || R3=[I2++] || R0=[I1++];
		R5=BYTEPACK(R6, R7);
		
	reconstruct_mod_end:
		(R1, R0)=BYTEUNPACK R1:0 || [I0++M0]=R5;
	
_xhReconBlockHalfpel_MPEG4_8u_MOD.END:
	rets=[sp++];
	(r7:4,p5:3)=[sp++];
	RTS;	

#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -