⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 quant_mpeg4.asm

📁 adi bf533视频编码程序
💻 ASM
字号:

.global _xhQuantInvIntra_MPEG4_16s_C1I;
.global _xhQuantInv_MPEG4_16s_C1I;
.global _xhQuantIntra_MPEG4_16s_C1I;
.global _xhQuant_MPEG4_16s_C1I;
.extern _mZigZagScan;
.section L1_data_b;
.global _multipliers_MPEG;
.var _multipliers_MPEG[32] =
{
	0x0,     0x010001,0x8001, 0x5556,
	0x4001,  0x3334, 0x2aab, 0x2493,
	0x2001, 0x1c72,  0x199a, 0x1746,
	0x1556, 0x13b2,  0x124a, 0x1112,
	0x1001, 0x0f10,  0x0e39, 0x0d7a,
	0x0ccd, 0x0c31,  0x0ba3, 0x0b22,
	0x0aab, 0x0a3e,  0x09d9, 0x097c,
	0x0925, 0x08d4,  0x0889, 0x0843
 };

.section L1_code;
/*************************************************************************
****QuantInvIntra_MPEG4_16s_C1I(Ipp16s* pSrcDst,int QP,Ipp16s*pQPMatrix)** 
Here the pQPMatrix is Ipp16s,not char .
performance:
                        ASM       C
          cycle count: 1497      5467
*************************************************************************/
_xhQuantInvIntra_MPEG4_16s_C1I:
           [--sp]=(r7:4,p5:3);
           [--sp]=i0;
           [--sp]=l0;
           [--sp]=i1;
           [--sp]=l1;
           [--sp]=i2;
           [--sp]=l2;
           [--sp]=rets;
           
           
           i0=r0;          
           m0=2;
           l2=0;
           i2=r2;
           i2+=m0;
           p3=i2;
           l0=0;

           i0+=m0;
           i1=i0;

         mnop||r0.l=w[i1++]||r3=w[p3++](x);
           p0=63;
           lsetup (ac_loop_mpeg4_intra_inv_start,ac_loop_mpeg4_intra_inv_end)lc0=p0;
         ac_loop_mpeg4_intra_inv_start:
            r0=r0.l(x);
            cc=r0;
            if !cc jump ac_loop_store;
            r7=abs r0;
            r7*=r3;
            r7*=r1;
            r7>>=3;
            cc=bittst(r0,31);
            r0=r7;
            if cc jump negativevalue;
            r6=2047;
            cc=r7<r6;
            if !cc r0=r6;
            jump ac_loop_store;
         negativevalue:
            r0=-r7;
            r6=-2048;
            r5=2048;
            cc=r7<r5;
            if !cc r0=r6;  
         ac_loop_store:
            w[i0++]=r0.l;
         ac_loop_mpeg4_intra_inv_end:
            mnop||r0.l=w[i1++]||r3=w[p3++](x);
            
            
            
            rets=[sp++];
            l2=[sp++];
            i2=[sp++];
            l1=[sp++];
            i1=[sp++];
            l0=[sp++];
            i0=[sp++];
            (r7:4,p5:3)=[sp++];
            
_xhQuantInvIntra_MPEG4_16s_C1I.end:
            rts;            

/**********************************************************************
***QuantInv_MPEG4_16s_C1I(Ipp16s* pSrcDst,int QP, Ipp16s* pQPMatrix)**
Here the pQPMatrix is Ipp16s,not char .
performance:
                        ASM       C
          cycle count: 1034      3701
***********************************************************************/
_xhQuantInv_MPEG4_16s_C1I:
           [--sp]=(r7:4,p5:3);
           [--sp]=i0;
           [--sp]=l0;
           [--sp]=i1;
           [--sp]=l1;
           [--sp]=i2;
           [--sp]=l2;
           [--sp]=rets;    
           
           p3=r2;
           l0=0;  
           i0=r0;
           l1=0;
           i1=r0;
           r4=0;//sum
           
           r0.l=w[i0++]||r3=w[p3++](x);
           p0=64;
           lsetup(acdc_loop_mpeg4_inter_inv_start,acdc_loop_mpeg4_inter_inv_end)lc0=p0;
           
        acdc_loop_mpeg4_inter_inv_start:
             r0=r0.l(x);
             cc=r0;
             if !cc jump acdc_zero;
             r2=abs r0;
             r2<<=1;
             r2+=1;
             r2*=r1;
             r2*=r3;
             r2>>=4;
             cc=bittst(r0,31);
             r0=r2;
             if cc jump acdc_value_neg;
             r7=2047;
             cc=r2<r7;
             if !cc r0=r7;
             jump acdc_zero;
         acdc_value_neg: 
             r0=-r2;
             r7=2048;
             r6=-2048;
             cc=r2<r7;
             if !cc r0=r6;
         acdc_zero:
             w[i1++]=r0.l; 
             r4=r4^r0;
         acdc_loop_mpeg4_inter_inv_end:
             r0.l=w[i0++]||r3=w[p3++](x);
             
              cc=bittst(r4,0);
              if cc jump undo_mismatch;
              m0=2;
              r5=1;
              i1-=m0;
              r0.l=w[i1];
              r0=r0.l(x);
              r0=r0^r5;
              w[i1]=r0.l; 
        undo_mismatch:      
               rets=[sp++];
               l2=[sp++];
               i2=[sp++];
               l1=[sp++];
               i1=[sp++];
               l0=[sp++];
               i0=[sp++];
               (r7:4,p5:3)=[sp++];
               
_xhQuantInv_MPEG4_16s_C1I.end:
              rts;
             
            
/*****************************************************************
*******QuantIntra_MPEG4_16s_C1I,(
                     Ipp16s* pSrcDst,
                     Ipp32s  QP,
               const Ipp32f* pQPMatrix,
                     Ipp32s* pCount)
 we use the Ipp8u mDefaultIntraQuantMatrix instead of the Ipp32f pQPMatrix,
 because when use the  pQPMatrix we will do multiply the fraction and the 
 integer,this is difficult.so we still use theIpp8u mDefaultIntraQuantMatrix 
 performence:
                        ASM     C
       cycle count:    3231    10359                 
****************************************************************/                
_xhQuantIntra_MPEG4_16s_C1I:
                [--sp]=(r7:4,p5:3);
                [--sp]=i0;
                [--sp]=l0;
                [--sp]=i1;
                [--sp]=l1;
                [--sp]=i3;
                [--sp]=l3;
          	    [--sp]=rets;  
          	    
          	    
           	    i0=r0;         	    
          	    m0=2;
          	    r2+=1;
          	    l0=0;

          	    i0+=m0;//to start from the ac coeff
          	    b0=r0;//reserve for the calculate the pcount
          	    l1=0;
          	    i1=i0;
          	    p3=r2;//pQPMatrix
          	    l3=0;
      		    i3.l=_mZigZagScan;
        		i3.h=_mZigZagScan;
        		
          	    r7.l=_multipliers_MPEG;
          	    r7.h=_multipliers_MPEG;
          	    r2=r1<<2;
          	    r7=r2+r7;
          	    p4=r7;
          	    r2=[p4];//mult
          	    r5=3;
          	    p5=[sp+12+56];//for the pcount,you could use [p5]=pcount to store the value
          	    //for the pcount
          	    r1*=r5;//multiply the VM18P
          	    r1+=2;
          	    r1>>=2;//quantd
          	    p1=16;
          	    
          	    
          	    r0.l=w[i0++]||r5=b[p3++](z);//attention this only a byte ,so  offset is 1.
          	    p0=63;
          	    
          	    lsetup(ac_loop_mpeg4_intra_start,ac_loop_mpeg4_intra_end)lc0=p0;
          	  ac_loop_mpeg4_intra_start:
          	    r0=r0.l(x);
          	    cc=r0;
          	    if !cc jump  ac_zero;
          	    r4=abs r0;
          	    r4<<=4;
      
          	    r6=r5>>1;
          	    r4=r4+r6;
          	    
          	  
          	    divs(r4,r5);
          	    lsetup(div_start,div_end)lc1=p1;
          	    div_start:
          	    div_end:
          	         divq(r4,r5);
          	     r4=r4.l(x);
          	     
          	     r4=r4+r1;
          	     
          	     //r4*=r5;//(level << 4) * pQPMatrix[i]
          	            //缺0.5
          	     r4*=r2;
          	     r4>>=17;//SCALEBITS_MPEG
          	     cc=bittst(r0,31);
          	     r0=r4;
          	     r4=-r4;
          	     if cc r0=r4;      
          	ac_zero:
          	   w[i1++]=r0.l;    
            ac_loop_mpeg4_intra_end:  
          	     r0.l=w[i0++]||r5=b[p3++](z); 
          	     
          	//缺pcount的计算
          	      
          i1=p5;//pcount
          //w[i1]=-1;
          p3=i3;
          p3+=1;
          p0=63;
          r7=b[p3++](z);
          r3=-1;//in order to conform with the ippi 
          r6=b0;
          lsetup(pcount_start,pcount_end)lc0=p0;
            pcount_start:
                r4=r7<<1;
                r4=r6+r4;
                i0=r4;
                r5.l=w[i0];
                r5=r5.l(x);
                cc=r5;
                if cc r3=r7;
            pcount_end:
                r7=b[p3++](z);
                
                [i1]=r3;
          	
          	    r0=0;
          	   rets=[sp++];
          	   l3=[sp++];
          	   i3=[sp++];
               l1=[sp++];
               i1=[sp++];
               l0=[sp++];
               i0=[sp++];
               (r7:4,p5:3)=[sp++];   
_xhQuantIntra_MPEG4_16s_C1I.end:
                rts;                          
	         
/************************************************************************
*************Quant_MPEG4_16s_C1I(
                        Ipp16s*  pSrcDst,
                        Ipp32s   QP,
                  const Ipp32f*  pQPMatrix,
                        Ipp32s*  pCount)**********************************
 we use the Ipp8u mDefaultIntraQuantMatrix instead of the Ipp32f pQPMatrix,
 because when use the  pQPMatrix we will do multiply the fraction and the 
 integer,this is difficult.so we still use theIpp8u mDefaultIntraQuantMatrix 
 performence:
                       ASM          C
       cycle count:    2193       7760            
**************************************************************************/ 
_xhQuant_MPEG4_16s_C1I:   
                [--sp]=(r7:4,p5:3);
                [--sp]=i0;
                [--sp]=l0;
                [--sp]=i1;
                [--sp]=l1;
                [--sp]=i3;
                [--sp]=l3;
          	    [--sp]=rets;  
          	    
				i0=r0;
          	    l0=0;
          	    
          	    b0=r0;
          	    l1=0;
          	    i1=i0;
          	    p3=r2;//pQPMatrix
          	    l3=0;
      		    i3.l=_mZigZagScan;
        		i3.h=_mZigZagScan;
        		          	    
          	    r7.l=_multipliers_MPEG;
          	    r7.h=_multipliers_MPEG;
          	    r2=r1<<2;
          	    r7=r2+r7;
          	    p4=r7;
          	    
          	    p5=[sp+12+56];//for the pcount
          	     
          	    
          	    r1=0;//sum
          	    r2=[p4];//mult
          	    r0.l=w[i0++]||r5=b[p3++](z);
          	    p0=64;
          	    p1=16;
          	    lsetup(acdc_loop_mpeg4_inter_start,acdc_loop_mpeg4_inter_end)lc0=p0;
          	  acdc_loop_mpeg4_inter_start:
          	    r0=r0.l(x);
          	    cc=r0;
          	    if !cc jump  acdc_inter_zero;
          	    r4=abs r0;
          	    r4<<=4;
          	    
          	    r6=r5>>1;
          	    r4=r4+r6;
          	    
          	    
          	    divs(r4,r5);
          	    lsetup(div_inter_start,div_inter_end)lc1=p1;
          	    div_inter_start:
          	    div_inter_end:
          	         divq(r4,r5);
          	     r4=r4.l(x);
          	     
          	    // r4*=r5;//(level << 4)*pQPMatrix[i]  缺0.5
          	     r4*=r2;
          	     r4>>=17;			     //SCALEBITS_MPEG
          	     r1=r1+r4;//sum
          	     cc=bittst(r0,31);
          	     r0=r4;
          	     r4=-r4;
          	     if cc r0=r4;
          	     /* if !cc jump acdc_inter_zero;
          	      r0=-r4; */    
          	acdc_inter_zero:
          	   w[i1++]=r0.l;    
            acdc_loop_mpeg4_inter_end:  
          	     r0.l=w[i0++]||r5=b[p3++](z);
          	//缺pcount的计算
          i1=p5;//pcount
          //w[i1]=-1;
          p3=i3;
          
          p0=64;
          r7=b[p3++](z);
          r3=-1;//in order to conform with the ippi 
          r6=b0;
          lsetup(pcount_inter_start,pcount_inter_end)lc0=p0;
            pcount_inter_start:
                r4=r7<<1;
                r4=r6+r4;
                i0=r4;
                r5.l=w[i0];
                r5=r5.l(x);
                cc=r5;
                if cc r3=r7;
            pcount_inter_end:
                r7=b[p3++](z);
                
                [i1]=r3;
          	      
          	
          	   r0=r1;
          	   rets=[sp++];
          	   l3=[sp++];
          	   i3=[sp++];
               l1=[sp++];
               i1=[sp++];
               l0=[sp++];
               i0=[sp++];
               (r7:4,p5:3)=[sp++];   	    
_xhQuant_MPEG4_16s_C1I.end:
               rts;   

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -