⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 meanabsdev.asm

📁 adi bf533视频编码程序
💻 ASM
字号:
.section L1_code;
.global _xhMeanAbsDev16x16_8u32s_C1R_MOD;
//.global  _xhMeanAbsDev16x16_8u32s_C1R;

/*******************************************************************
     ippiMeanAbsDev16x16_8u32s_C1R (const Ipp8u *pSrc, int srcStep,
                               Ipp32s *pDst);
     I0和I1必须是4得倍数。
     performance:
                        ASM     C
                 cycle: 274    16813                     
*********************************************************************/
/*_xhMeanAbsDev16x16_8u32s_C1R:
         
                      [--sp]=(r7:4,p5:3);
                      [--sp]=i0;
                      [--sp]=l0;
                      [--sp]=i1;
                      [--sp]=l1;
                      [--sp]=i2;
                      [--sp]=l2;
                      [--sp]=i3;
                      [--sp]=l3;
                      [--sp]=rets;
                      
                      l0=0;
                      l1=0;
                      a0=0;
                      i0=r0;//psrc
                      i1=r0;//psrc
                      b0=r0;//psrc
                      p5=r1;//srcstep
                      m1=r1;
                      i2=r2;//pdst
                      i1+=m1;//下一行
                      r7=0;//sum
                      r1+=-16;
                      r2=p5;
                      r1=r2+r1;
                      m0=r1;
                      p0=8;
                      lsetup(sum_start,sum_end)lc0=p0;
                      r0=[i0++]||r2=[i1++];
                     sum_start:
                       (r5,r4)=byteop16p(r1:0,r3:2)||r1=[i0++]||r3=[i1++];
                        r6=r5+|+r4;
                       (r5,r4)=byteop16p(r1:0,r3:2)(r)||r0=[i0++]||r2=[i1++];
                        r1=r5+|+r4(s);
                        r6=r6+|+r1(s);
                        r7=r7+|+r6(s);
                        (r5,r4)=byteop16p(r1:0,r3:2)||r1=[i0++]||r3=[i1++];
                        r6=r5+|+r4(s); 
                        (r5,r4)=byteop16p(r1:0,r3:2)(r)||r0=[i0++m0]||r2=[i1++m0];
                        r1=r5+|+r4(s);
                        r6=r1+|+r6(s);
                        r7=r7+|+r6(s);
                      sum_end:
                        r0=[i0++]||r2=[i1++]; 
                        r6.l=r7.h+r7.l(s);
                        r7=r6.l(x);
                        
                        r6=128;
                        r7=r6+r7;
                        r7=r7>>8;//the value is in [0,255]
                        
                        r6=r7<<8;
                        r2=r7|r6;
                        r6=r6<<8;
                        r2=r2|r6;
                        r6=r6<<8;
                        r2=r2|r6;//in order to use saa,the all bytes must be the mean
                        r3=r2;
                        i0=b0;//psrc 
                        i1=b0;
                        r0=p5;//srcstep
                        r0+=-12;
                        m0=r0;
                        p0=16;
                        a1=a0=0;
                      lsetup(dev_start,dev_end)lc0=p0;                        
                        r0=[i0++];
                       dev_start:
                        saa(r1:0,r3:2)||r1=[i0++];
                        saa(r1:0,r3:2)(r)||r0=[i0++];
                        saa(r1:0,r3:2)||r1=[i0++m0];   
                        dev_end:
                        saa(r1:0,r3:2)(r)||r0=[i0++];
                        
                        r6=a1.l+a1.h,r7=a0.l+a0.h;
                        r3=r6+r7(s); 
                        [i2]=r3;//put the dev in pdst
                         
                       rets=[sp++];
                       l3=[sp++];
                       i3=[sp++];
                       l2=[sp++];
                       i2=[sp++];
                       l1=[sp++];
                       i1=[sp++];
                       l0=[sp++];
                       i0=[sp++];
                   (r7:4,p5:3)=[sp++];  
_xhMeanAbsDev16x16_8u32s_C1R.end:
                   rts;    
                         */  
                         
 _xhMeanAbsDev16x16_8u32s_C1R_MOD:                        
                         
                      [--sp]=(r7:4,p5:3);
                      [--sp]=i0;
                      [--sp]=l0;
                      [--sp]=i1;
                      [--sp]=l1;
                      [--sp]=i2;
                      [--sp]=l2;
                      [--sp]=i3;
                      [--sp]=l3;
                      [--sp]=rets;
                      
                      l0=0;
                      l1=0;
                      a0=0;
                      i0=r0;//psrc
                      i1=r0;//psrc
                      b0=r0;//psrc
                      m1=16;
                      i2=r1;//pdst
                      i1+=m1;//下一行
                      r7=0;//sum
                     
                  
                      m0=20;
                      p0=8;
                      lsetup(sum_start,sum_end)lc0=p0;
                      r0=[i0++]||r2=[i1++];
                     sum_start:
                       (r5,r4)=byteop16p(r1:0,r3:2)||r1=[i0++]||r3=[i1++];
                        r6=r5+|+r4;
                       (r5,r4)=byteop16p(r1:0,r3:2)(r)||r0=[i0++]||r2=[i1++];
                        r1=r5+|+r4(s);
                        r6=r6+|+r1(s);
                        r7=r7+|+r6(s);
                        (r5,r4)=byteop16p(r1:0,r3:2)||r1=[i0++m0]||r3=[i1++m0];
                        r6=r5+|+r4(s); 
                        (r5,r4)=byteop16p(r1:0,r3:2)(r)||r0=[i0++]||r2=[i1++];
                        r1=r5+|+r4(s);
                        r6=r1+|+r6(s);
                        
                      sum_end:
                        r7=r7+|+r6(s);
                        
                        r6.l=r7.h+r7.l(s);
                        r7=r6.l(x);
                        
                        r6=128;
                        r7=r6+r7;
                        r7=r7>>8;//the value is in [0,255]
                        
                        r6=r7<<8;
                        r2=r7|r6;
                        r6=r6<<8;
                        r2=r2|r6;
                        r6=r6<<8;
                        r2=r2|r6;//in order to use saa,the all bytes must be the mean
                        r3=r2;
                        i0=b0;//psrc 
                        i1=b0;
                   
                        p0=16;
                        a1=a0=0;
                      lsetup(dev_start,dev_end)lc0=p0;                        
                        r0=[i0++];
                       dev_start:
                        saa(r1:0,r3:2)||r1=[i0++];
                        saa(r1:0,r3:2)(r)||r0=[i0++];
                        saa(r1:0,r3:2)||r1=[i0++];   
                        dev_end:
                        saa(r1:0,r3:2)(r)||r0=[i0++];
                        
                        r6=a1.l+a1.h,r7=a0.l+a0.h;
                        r3=r6+r7(s); 
                        [i2]=r3;//put the dev in pdst
                         
                       rets=[sp++];
                       l3=[sp++];
                       i3=[sp++];
                       l2=[sp++];
                       i2=[sp++];
                       l1=[sp++];
                       i1=[sp++];
                       l0=[sp++];
                       i0=[sp++];
                   (r7:4,p5:3)=[sp++];   
 _xhMeanAbsDev16x16_8u32s_C1R_MOD.end:
 rts;         

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -