📄 quant_h263_ia64.s

📁 wince下的xvidcore开发库,可用于MP4等视频播放开发
💻 S
📖 第 1 页 / 共 2 页
字号:
上一页 12
         mov ar.lc = r2
         br.ret.sptk.many b0
         .endp dequant_h263_intra_ia64#
 
 
 
 // uint32_t quant_h263_inter_ia64(int16_t *coeff, const int16_t *data, const uint32_t quant)
 
 
 
         .common quant_h263_inter#,8,8
         .align 16
         .global quant_h263_inter_ia64#
         .proc quant_h263_inter_ia64#
 quant_h263_inter_ia64:
 
 
 //*******************************************************
 //*                                                     *
 //*     const uint32_t mult = multipliers[quant];       *
 //*     const uint16_t quant_m_2 = quant << 1;          *
 //*     const uint16_t quant_d_2 = quant >> 1;          *
 //*     int sum = 0;                                    *
 //*     uint32_t i;                                     *
 //*     int16_t acLevel,acL;                            *
 //*                                                     *
 //*******************************************************/
 
 
 
         LL=3            // LL = load latency
                         //if LL is changed, you'll also have to change the .pred.rel... parts below!    
         .prologue
         addl r14 = @ltoff(multipliers#), gp
         dep.z r15 = r34, 2, 32
         .save ar.lc, r2
         mov r2 = ar.lc
         ;;
         .body
         alloc r9=ar.pfs,0,24,0,24
         mov r17 = ar.ec
         mov r10 = pr
         ld8 r14 = [r14]
         extr.u r16 = r34, 1, 16         //r16 = quant_d_2
         dep.z r20 = r34, 1, 15          //r20 = quant_m_2
         ;;
         add r15 = r15, r14
         mov r21 = r16                   //r21 = quant_d_2
         mov r8 = r0                     //r8  = sum = 0
         mov pr.rot    = 0               //p16-p63 = 0
         ;;
         ld4 r15 = [r15]
         addl r14 = 63, r0
         mov pr.rot = 1 << 16            //p16=1 
         ;;
         mov ar.lc = r14
         mov ar.ec = LL+9
         mov r29 = r15
         ;;
         mov r15 = r33                   //r15 = data
         mov r18 = r32                   //r18 = coeff
         ;;
         
         
         .rotr ac1[LL+3], ac2[8], ac3[2]
         .rotp p[LL+9], cmp1[8], cmp1neg[8],cmp2[5], cmp2neg[2]
 
 
 
 //*******************************************************************************
 //*                                                                             *
 //*     for (i = 0; i < 64; i++) {                                              *
 //*             acL=acLevel = data[i];                                          *
 //*             acLevel = ((acLevel < 0)?-acLevel:acLevel) - quant_d_2;         *
 //*             if (acLevel < quant_m_2){                                       *
 //*                     acLevel = 0;                                            *
 //*             }                                                               *
 //*             acLevel = (acLevel * mult) >> SCALEBITS;                        *
 //*             sum += acLevel;                                                 *
 //*             coeff[i] = ((acL < 0)?-acLevel:acLevel);                        *
 //*     }                                                                       *               
 //*                                                                             *       
 //*******************************************************************************/ 
 
 
 
 .explicit
 .L58:
         .pred.rel "clear", p29, p37
         .pred.rel "mutex", p29, p37
 
                                                                         //pipeline stage
 {.mmi
         (p[0])          ld2 ac1[0]   = [r15],2                          //   0          acL=acLevel = data[i];
         (p[LL+1])       sub ac2[0]   = r0, ac1[LL+1]                    //   LL+1       ac2=-acLevel
         (p[LL])         sxt2 ac1[LL] = ac1[LL]                          //   LL
 }
 {.mmi
         (p[LL+1])       cmp4.le cmp1[0], cmp1neg[0] = r0, ac1[LL+1]     //   LL+1       cmp1 = (0<=acLevel)  ;   cmp1neg = !(0<=acLevel)
         (p[LL+4])       cmp4.le cmp2[0], cmp2neg[0] = r20, ac2[3]       //   LL+4       cmp2 = (quant_m_2 < acLevel)  ; cmp2neg = !(quant_m_2 < acLevel)
         (cmp1[1])       sub ac2[1]   = ac1[LL+2], r21                   //   LL+2       acLevel = acLevel - quant_d_2;
 }
 {.mmi
         (cmp2neg[1])    mov ac2[4] = r0                                 //   LL+5       if (acLevel < quant_m_2) acLevel=0;
         (cmp1neg[1])    sub ac2[1]   = ac2[1], r21                      //   LL+2       acLevel = ac2 - quant_d_2;
         (p[LL+3])       sxt2 ac2[2]   = ac2[2]                          //   LL+3
 }
 {.mmi
         .pred.rel "mutex", p34, p42
         (cmp1[6])       mov ac3[0] = ac2[6]                             //   LL+7       ac3 = acLevel;
         (cmp1neg[6])    sub ac3[0] = r0, ac2[6]                         //   LL+7       ac3 = -acLevel;
         (p[LL+6])       pmpyshr2.u ac2[5] = r29, ac2[5], 16             //   LL+6       acLevel = (acLevel * mult) >> SCALEBITS;
 }
 {.mib
         (p[LL+8])       st2 [r18] = ac3[1] , 2                          //   LL+8       coeff[i] = ac3;
         (cmp2[4])       add r8 = r8, ac2[7]                             //   LL+8       sum += acLevel; 
         br.ctop.sptk.few .L58
         ;;
 }
 
         .pred.rel "clear", p29, p37
 .default
         mov ar.ec = r17
         ;;
         mov ar.lc = r2
         mov pr = r10, -1
         mov ar.pfs = r9
         br.ret.sptk.many b0
         .endp quant_h263_inter_ia64#
 
 
 
 
 
 
 
 // void dequant_h263_inter_ia64(int16_t *data, const int16_t *coeff, const uint32_t quant)
 
         .common dequant_h263_inter#,8,8
         .align 16
         .global dequant_h263_inter_ia64#
         .proc dequant_h263_inter_ia64#
 dequant_h263_inter_ia64:
         
 //***********************************************************************
 //*                                                                     *
 //*     const uint16_t quant_m_2 = quant << 1;                          *
 //*     const uint16_t quant_add = (quant & 1 ? quant : quant - 1);     *
 //*     uint32_t i;                                                     *
 //*                                                                     *               
 //***********************************************************************
         
         
         
         
         .prologue
         andcm r14 = 1, r34
         dep.z r29 = r34, 1, 15
         alloc r9=ar.pfs,0,32,0,32
         .save ar.lc, r2
         mov r2 = ar.lc
         ;;
         .body
         sub r15 = r34, r14              // r15 = quant
         addl r14 = 63, r0
         addl r21 = -2048, r0
         addl r20 = 2047, r0
         mov r16 = ar.ec
         mov r17 = pr
         ;;
         zxt2 r15 = r15
         mov ar.lc = r14
         mov pr.rot = 0
         ;;
         adds r14 = 0, r33               // r14 = coeff
         mov r18 = r32                   // r18 = data
         mov ar.ec = LL+10
         mov pr.rot = 1 << 16
         ;;
 
 //*******************************************************************************
 //*                                                                             *
 //*for (i = 0; i < 64; i++) {                                                   *
 //*             int16_t acLevel = coeff[i];                                     *
 //*                                                                             *               
 //*             if (acLevel == 0)                                               *
 //*             {                                                               *
 //*                     data[i] = 0;                                            *
 //*             }                                                               *
 //*             else if (acLevel < 0)                                           *
 //*             {                                                               *
 //*                     acLevel = acLevel * quant_m_2 - quant_add;              *
 //*                     data[i] = (acLevel >= -2048 ? acLevel : -2048);         *
 //*             }                                                               *
 //*             else // if (acLevel > 0)                                        *
 //*             {                                                               *
 //*                     acLevel = acLevel * quant_m_2 + quant_add;              *
 //*                     data[i] = (acLevel <= 2047 ? acLevel : 2047);           *
 //*             }                                                               *               
 //*     }                                                                       *
 //*                                                                             *       
 //*******************************************************************************/
 
 
         
         LL=2    // LL := load latency
                 //if LL is changed, you'll also have to change the .pred.rel... parts below!
         
         
         .rotr ac1[LL+10], x[5], y1[3], y2[3]
         .rotp p[LL+10] , cmp1neg[8], cmp2[5], cmp2neg[5],cmp3[2], cmp3neg[2]
         
 .explicit       
                                                                 //pipeline stage
         
 .L60:
         .pred.rel "clear", p36
         .pred.rel "mutex", p47, p49
         .pred.rel "mutex", p46, p48
         .pred.rel "mutex", p40, p45
         .pred.rel "mutex", p39, p44
         .pred.rel "mutex", p38, p43
         .pred.rel "mutex", p37, p42
         .pred.rel "mutex", p36, p41
 {.mmi   
         (p[0])ld2 ac1[0] = [r14] ,2                             //      0       acLevel = coeff[i];
         (p[LL+1])cmp4.ne p6, cmp1neg[0] = 0, ac1[LL+1]          //      LL+1
         (p[LL])sxt2 ac1[LL] = ac1[LL]                           //      LL
 
 }
 {.mmi
         (p[LL+1])cmp4.le cmp2[0], cmp2neg[0] = r0, ac1[LL+1]    //      LL+1
         (cmp2[1]) mov x[0] = r20                                //      LL+2
         (p[LL+2])pmpyshr2.u ac1[LL+2] = r29, ac1[LL+2], 0       //      LL+2
 }
 {.mmi
         (cmp2neg[1]) mov x[0] = r21                             //      LL+2
         (cmp2[2]) add ac1[LL+3] = ac1[LL+3], r15                //      LL+3
         (cmp2neg[2]) sub ac1[LL+3] = ac1[LL+3], r15             //      LL+3
 
 }
 {.mmi
         (cmp2neg[4]) mov y1[0] = ac1[LL+5]                      //      LL+5
         (cmp2neg[4]) mov y2[0] = x[3]                           //      LL+5
         (p[LL+4])sxt2 ac1[LL+4] = ac1[LL+4]                     //      LL+4
 }
 {.mmi
         (cmp2[4]) mov y1[0] = x[3]                              //      LL+5
         (cmp2[4]) mov y2[0] = ac1[LL+5]                         //      LL+5
         (p[LL+6])cmp4.le cmp3[0], cmp3neg[0] = x[4], ac1[LL+6]  //      LL+6
 }
 {.mmi
         (cmp3[1]) mov ac1[LL+7] = y1[2]                         //      LL+7
         (cmp3neg[1]) mov ac1[LL+7] = y2[2]                      //      LL+7
         (cmp1neg[7])  mov ac1[LL+8] = r0                        //      LL+8
 }
 {.mbb
         (p[LL+9])st2 [r18] = ac1[LL+9] ,2                       //      LL+9
         nop.b 0x0
         br.ctop.sptk.few .L60
         ;;
 }
         .pred.rel "clear", p36
 .default
         mov ar.lc = r2
         mov ar.pfs = r9
         mov ar.ec  = r16
         mov pr = r17, -1
         ;;
         mov ar.lc = r2
         br.ret.sptk.many b0
         .endp dequant_h263_inter_ia64#
         .ident  "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -