⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 getdiff.asm

📁 adi bf533视频编码程序
💻 ASM
字号:

.global _xhGetDiff8x8_8u16s_C1;
.global _GetDiff8x8_8u16s_C1;
#define  IPPVC_MC_APX_FF   0
#define  IPPVC_MC_APX_FH   4
#define  IPPVC_MC_APX_HF   8
#define  IPPVC_MC_APX_HH   12
.section L1_data_b;
.byte _interpolate_8x8[64];

.section L1_code;
/***************************************************************************
ippiGetDiff8x8_8u16s_C1 (const Ipp8u* pSrcCur, Ipp32s srcCurStep,
          const Ipp8u* pSrcRef, Ipp32s srcRefStep, Ipp16s* pDstDiff, Ipp32s
    dstDiffStep, Ipp16s* pDstPredictor, Ipp32s dstPredictorStep, Ipp32s mcType,
       Ipp32s roundControl);
       
       performance:
                            ASM      C
             cycle count :  169     4752    :FF
                            203     6977    :FH
                            233     6982    :HF
                            286     8969    :HH
*****************************************************************************/
_xhGetDiff8x8_8u16s_C1:

    
// 没有考虑roundcontrol位

             [--sp]=(r7:4,p5:3);
             [--sp]=i0;
             [--sp]=l0;
             [--sp]=i1;
             [--sp]=l1;
             [--sp]=i2;
             [--sp]=l2;
             [--sp]=i3;
             [--sp]=l3;
             [--sp]=rets;
             
             l3=0;
             i3.l=_interpolate_8x8;
             i3.h=_interpolate_8x8;
             l2=0;
             
             l0=0;
             i0=r2;//the address of pSrcRef
             
             l1=0;
             i1=r2;
             
             m3=r0;//the address of pSrcCur
             b3=r0;
             p5=r1;//the srcCurStep
             
             r7=[sp+12+64];//srcrefStep
             r6=[sp+32+64];//mcType
             
             m1=r7;
             p0=8;         
/***********************IPPVC_MC_APX_FF**********************/
            cc=r6==IPPVC_MC_APX_FF;
            if !cc jump FH_8x8;
          
              r7+=-8;
              m0=r7;//refstep modify
            
              i1=i0;
             lsetup (ff_8x8_start,ff_8x8_end)lc0=p0;
                disalgnexcpt||r0=[i0++]||r2=[i1++];
      
              ff_8x8_start:
                   disalgnexcpt||r1=[i0++]||r3=[i1++];
                   r6=byteop1p(r1:0,r3:2)||r0=[i0++m0]||r2=[i1++m0];
                   r7=byteop1p(r1:0,r3:2)(r)||[i3++]=r6||r0=[i0++];     
              ff_8x8_end:
                     disalgnexcpt||[i3++]=r7||r2=[i1++]; 
             jump interpolate_8x8_end;
/**********************IPPVC_MC_APX_FH***********************/
           FH_8x8:
               r5=IPPVC_MC_APX_FH;
               cc=r6==r5;
               if !cc jump HF_8x8;
            
               m1=r7;//ref step       
               r7+=-8;
               m0=r7;//refmodify
              
               lsetup (fh_8x8_start,fh_8x8_end)lc0=p0;
                   i1=i0;
                   i1+=m1;
                  disalgnexcpt||r0=[i0++]||r2=[i1++];
               fh_8x8_start:
                   disalgnexcpt||r1=[i0++]||r3=[i1++]; 
                   r6=byteop1p(r1:0,r3:2)||r0=[i0++m0]||r2=[i1++m0];
                   r7=byteop1p(r1:0,r3:2)(r)||[i3++]=r6||r0=[i0++];       
               fh_8x8_end:
                   disalgnexcpt||[i3++]=r7||r2=[i1++];      
                   jump interpolate_8x8_end;
/************************IPPVC_MC_APX_HF************************/
           HF_8x8:
              r5=IPPVC_MC_APX_HF;
              cc=r6==r5;
              if !cc jump HH_8x8;
                 m1=1;//to make the align8    
                 r7+=-8;
                 m0=r7;
                 i1=i0;
                 i1+=m1;
                
                 r6=3;       //这一段主要是为I0的后两位全为1
                 r7=i0;      //考虑的。因为此时I0+1后,后两位
                 r7=r7&r6;   //全为0,此时所选的寄存器,就不是
                 cc=r7==r6;  //R3,而是R2,故分开考虑。
                 if cc jump byte_align_HF;
                lsetup(hf_8x8_start,hf_8x8_end)lc0=p0;
                i2=i0;
                disalgnexcpt||r0=[i0++]||r2=[i2++];
                hf_8x8_start:
 
                   
                   disalgnexcpt||r1=[i0++]||r3=[i2++];
                   r6=byteop1p(r1:0,r3:2)||r0=[i0++m0]||r2=[i2++m0];
                   r7=byteop1p(r1:0,r3:2)(r)||[i3++]=r6||r0=[i0++];
                   
              hf_8x8_end:     
                   disalgnexcpt||[i3++]=r7||r2=[i2++];                 
                jump interpolate_8x8_end;
                
             byte_align_HF: //特殊情况
                 lsetup(bytealign_start,bytealign_end) lc0=p0;
                 i2=i0;
                 i2+=4;
                  disalgnexcpt||r0=[i0++]||r2=[i2++];
                
                bytealign_start:
                    disalgnexcpt||r1=[i0++]||r3=[i2++];
                    r6=byteop1p(r1:0,r3:2)||r0=[i0++m0]||r2=[i2++m0];       
                    r7=byteop1p(r1:0,r3:2)(r)||[i3++]=r6||r0=[i0++];
                bytealign_end:
                    disalgnexcpt||[i3++]=r7||r2=[i2++];
                    
                    
              jump interpolate_8x8_end;
/************************IPPVC_MC_APX_HH************************/
           HH_8x8:
              
             
           /*
               m1=1;
               m2=r7;//src step
               r7+=-8;
               m0=r7;
               i1=i0;
                 r6=3;       //这一段主要是为I0的后两位全为1
                 r7=i0;      //考虑的。因为此时I0+1后,后两位
                 r7=r7&r6;   //全为0,此时所选的寄存器,就不是
                 cc=r7==r6;  //R3,而是R2,故分开考虑。
               if cc jump byte_align_HH;

              // p1=2;
               lsetup(hh_8x8_start,hh_8x8_end)lc0=p0;          
                    i2=i0;
                    i2+=m2;
                    disalgnexcpt||r0=[i0++]||r2=[i2++];
               hh_8x8_start:
                    disalgnexcpt||r1=[i0++]||r3=[i2++];
                    r6=byteop2p(r1:0,r3:2)(rndl);
                    i0+=m1;
                    i1+=m1;
                    r7=byteop2p(r1:0,r3:2)(rndh);
                    i0-=m1;
                    i1-=m1;
                    r7=r7+r6;
                    disalgnexcpt||r0=[i0++m0];
                    disalgnexcpt||[i3++]=r7||r2=[i2++m0];
                    r6=byteop2p(r1:0,r3:2)(rndl,r);
                    i1+=m1;
                    i0+=m1;
                    r7=byteop2p(r1:0,r3:2)(rndh,r);
                    i1-=m1;
                    i0-=m1;                    
                    disalgnexcpt||r0=[i0++];
                    r7=r7+r6;
              hh_8x8_end:
                   disalgnexcpt||[i3++]=r7||r2=[i2++];
                   jump  interpolate_8x8_end;
             byte_align_HH://特殊情况
                   lsetup(bytealign_hh_start,bytealign_hh_end)lc0=p0;
                    i2=i0;
                    i2+=m2;
                    disalgnexcpt||r0=[i0++]||r2=[i2++];
                  bytealign_hh_start:
                     disalgnexcpt||r1=[i0++]||r3=[i2++];  
                     r6=byteop2p(r1:0,r3:2)(rndl);
                     i0+=m1;
                     i1+=m1;
                     
                     r7=byteop2p(r1:0,r3:2)(rndh,r);
                     i0-=m1;
                     i1-=m1;
                     r7=r7+r6;
                     disalgnexcpt||r0=[i0++m0];
                     disalgnexcpt||[i3++]=r7||r2=[i2++m0];
                     r6=byteop2p(r1:0,r3:2)(rndl,r);
                     i0+=m1;
                     i1+=m1;
                     r7=byteop2p(r1:0,r3:2)(rndh);
                     i0-=m1;
                     i1-=m1;
                     disalgnexcpt||r0=[i0++];
                     r7=r7+r6;
                  bytealign_hh_end:
                   disalgnexcpt||[i3++]=r7||r2=[i2++];  
                   //jump  interpolate_8x8_end;   
                   
       */
                  
    M0 = 7;
    M3 = -3 (X);  
    I2 = R2;                // address of the top-left pel in best match reference block
    R2 = R2 + R7 (S);       //next row
    I1 = R2;                // Address of best match - stride
    I0 = R2;
    R7 += -5;        //-9
    M1 = R7;
    P2 = 8;   //9    
    
    
    LSETUP(AVGHV_RND_ST, AVGHV_RND_END) LC0 = P2;
    DISALGNEXCPT || R0 = [I1++] || R2 = [I2++];
    DISALGNEXCPT || R1 = [I1++M3] || R3 = [I2++M3];
    R7 = BYTEOP2P(R1:0,R3:2) (RNDL) || R0 = [I1++M0];
AVGHV_RND_ST:    
        DISALGNEXCPT || I0 += M3 || R2 = [I2++M0];
        R6 = BYTEOP2P(R1:0,R3:2) (RNDH) || R0 = [I1++M3] || R2 = [I2++M3];
        R7 = R6 + R7 (NS) || I0 -= M3;
        R7 = BYTEOP2P(R1:0,R3:2) (RNDL, R) || [I3++] = R7 || R1 = [I1++M1];
        DISALGNEXCPT || I0 += M3 || R3 = [I2++M1];
        R6 = BYTEOP2P(R1:0,R3:2) (RNDH, R) || R0 = [I1++] || R2 = [I2++];
        R7 = R6 + R7 (NS) || I0 -= M3;        
        DISALGNEXCPT || R1 = [I1++M3] || R3 = [I2++M3];
AVGHV_RND_END:
        R7 = BYTEOP2P(R1:0,R3:2) (RNDL) || [I3++] = R7 || R0 = [I1++M0];
               
        
      interpolate_8x8_end:
                         
      
                   i3.l=_interpolate_8x8;
                   i3.h=_interpolate_8x8;
                   r6=i3;
                   i0=b3;//the address of pSrc
                   r7=p5;//tht srcstep    
                   i1=i0;
                   p0=8;
                   p4=[sp+16+64];
                   r7+=-4;
                   m0=r7;
                   lsetup(getdiff_start,getdiff_end)lc0=p0;
                         r0=[i0++]||r2=[i3++];
                   getdiff_start:
                       
                       (r4,r5)=byteop16m(r1:0,r3:2)||r1=[i0++m0]||r3=[i3++];
                         [p4++]=r5||r0=[i0++];
                       (r4,r5)=byteop16m(r1:0,r3:2)(r)||r2=[i3++]||[p4++]=r4;
                          [p4++]=r5;
                   getdiff_end:              
                          [p4++]=r4;
                
                        
                          
                         r0=r6;  
                         rets=[sp++];
                         l3=[sp++];
                         i3=[sp++]; 
                         l2=[sp++];
                         i2=[sp++];
                         l1=[sp++];
                         i1=[sp++];                 
                 
                         l0=[sp++];
                         i0=[sp++];
                        (r7:4,p5:3)=[sp++];

_xhGetDiff8x8_8u16s_C1.end:
                        rts;
                        
                        
/**********************************************************************************
GetDiff8x8_8u16s_C1(Ipp8u *pCur,int step,Ipp8u *_match_16x16,Ipp16s* pDstDiff)
必须保证pCur 和match块的步长一致。
**********************************************************************************/
_GetDiff8x8_8u16s_C1:

             [--sp]=(r7:4,p5:3);
             [--sp]=i0;
             [--sp]=l0;
             [--sp]=i1;
             [--sp]=l1;
             [--sp]=i2;
             [--sp]=l2;
             [--sp]=i3;
             [--sp]=l3;
             [--sp]=rets;
             i0=r0;//src1
             I1=I0;
             i2=r2;//src2
             r6=r1;//step
             r6+=-4;
             m0=r6;
             r7=[sp+12+64];
             i3=r7;//dst
             p0=8;
             r0=[i0++]||r2=[i2++];
             
             lsetup(getdiff_st,getdiff_ed)LC0=p0;
                getdiff_st:
                     (r4,r5)=byteop16m(r1:0,r3:2)||r1=[i0++m0]||r3=[i2++m0];
                     [i3++]=r5||r0=[i0++];
                     (r4,r5)=byteop16m(r1:0,r3:2)(r)||r2=[i2++]||[i3++]=r4;
                     [i3++]=r5;
                getdiff_ed:
                     [i3++]=r4;
             
             
                 rets=[sp++];
                 l3=[sp++];
                 i3=[sp++]; 
                 l2=[sp++];
                 i2=[sp++];
                 l1=[sp++];
                 i1=[sp++];                 
                 
                 l0=[sp++];
                 i0=[sp++];
                 (r7:4,p5:3)=[sp++];
_GetDiff8x8_8u16s_C1.end:
rts;
                                

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -