⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 halfpel8_refine_ia64.s

📁 wince下的xvidcore开发库,可用于MP4等视频播放开发
💻 S
📖 第 1 页 / 共 2 页
字号:
         ld8 ref2a[6] = [refaa[2]], iEdgedWidth
         ld8 ref2b[6] = [refab[2]], iEdgedWidth
         ;; 
         ld8 cur[7] = [cura]
         ld8 ref0a[7] = [refaa[0]], iEdgedWidth
         ld8 ref0b[7] = [refab[0]], iEdgedWidth
         ld8 ref1a[7] = [refaa[1]], iEdgedWidth
         ld8 mpr[7] = [refab[1]], iEdgedWidth
         ld8 ref2a[7] = [refaa[2]]
         ld8 ref2b[7] = [refab[2]]
         ;; 
         ld8 ref0a[8] = [refaa[0]]
         ld8 ref0b[8] = [refab[0]]
         ld8 ref1a[8] = [refaa[1]]
         ld8 mpr[8] = [refab[1]]
         ;;
 
 
         // Align ref1
         
         shr.u ref1a[0] = ref1a[0], nob1
         shr.u ref1a[1] = ref1a[1], nob1
         shr.u ref1a[2] = ref1a[2], nob1
         shr.u ref1a[3] = ref1a[3], nob1
         shr.u ref1a[4] = ref1a[4], nob1
         shr.u ref1a[5] = ref1a[5], nob1
         shr.u ref1a[6] = ref1a[6], nob1
         shr.u ref1a[7] = ref1a[7], nob1
         shr.u ref1a[8] = ref1a[8], nob1
 
         shl mpr[0] = mpr[0], nob64m1
         shl mpr[1] = mpr[1], nob64m1
         shl mpr[2] = mpr[2], nob64m1
         shl mpr[3] = mpr[3], nob64m1
         shl mpr[4] = mpr[4], nob64m1
         shl mpr[5] = mpr[5], nob64m1
         shl mpr[6] = mpr[6], nob64m1
         shl mpr[7] = mpr[7], nob64m1
         shl mpr[8] = mpr[8], nob64m1
         ;; 
 .explicit
 {.mii
         or ref1a[0] = ref1a[0], mpr[0]
         shr.u ref0a[0] = ref0a[0], nob02
         shr.u ref0a[1] = ref0a[1], nob02
 }
 {.mmi
         or ref1a[1] = ref1a[1], mpr[1]
         or ref1a[2] = ref1a[2], mpr[2]
         shr.u ref0a[2] = ref0a[2], nob02
 }
 {.mii
         or ref1a[3] = ref1a[3], mpr[3]
         shr.u ref0a[3] = ref0a[3], nob02
         shr.u ref0a[4] = ref0a[4], nob02
 }
 {.mmi
         or ref1a[4] = ref1a[4], mpr[4]
         or ref1a[5] = ref1a[5], mpr[5]
         shr.u ref0a[5] = ref0a[5], nob02
 }
 {.mii
         or ref1a[6] = ref1a[6], mpr[6]
         shr.u ref0a[6] = ref0a[6], nob02
         shr.u ref0a[7] = ref0a[7], nob02
 }
 {.mii
         or ref1a[7] = ref1a[7], mpr[7]
         or ref1a[8] = ref1a[8], mpr[8]
         shr.u ref0a[8] = ref0a[8], nob02
 }
 .default
         // ref1a[] now contains center position values
         // mpr[] not used any more
         
         // Align ref0 left
         
         ;; 
         shl mpr[0] = ref0b[0], nob56m02
         shl mpr[1] = ref0b[1], nob56m02
         shl mpr[2] = ref0b[2], nob56m02
         shl mpr[3] = ref0b[3], nob56m02
         shl mpr[4] = ref0b[4], nob56m02
         shl mpr[5] = ref0b[5], nob56m02
         shl mpr[6] = ref0b[6], nob56m02
         shl mpr[7] = ref0b[7], nob56m02
         shl mpr[8] = ref0b[8], nob56m02
 
         shl ref0b[0] = ref0b[0], nob64m02
         shl ref0b[1] = ref0b[1], nob64m02
         shl ref0b[2] = ref0b[2], nob64m02
         shl ref0b[3] = ref0b[3], nob64m02
         shl ref0b[4] = ref0b[4], nob64m02
         shl ref0b[5] = ref0b[5], nob64m02
         shl ref0b[6] = ref0b[6], nob64m02
         shl ref0b[7] = ref0b[7], nob64m02
         shl ref0b[8] = ref0b[8], nob64m02
         ;; 
         or ref0a[0] = ref0a[0], ref0b[0]
         or ref0a[1] = ref0a[1], ref0b[1]
         or ref0a[2] = ref0a[2], ref0b[2]
         or ref0a[3] = ref0a[3], ref0b[3]
         or ref0a[4] = ref0a[4], ref0b[4]
         or ref0a[5] = ref0a[5], ref0b[5]
         or ref0a[6] = ref0a[6], ref0b[6]
         or ref0a[7] = ref0a[7], ref0b[7]
         or ref0a[8] = ref0a[8], ref0b[8]
         ;;
 
         // ref0a[] now contains left position values
         // mpr[] contains intermediate result for right position values (former ref0a << 56 - nob02)
         
         // Align ref0 right
 
         // Shift one byte more to the right (seen als big-endian)
         shr.u ref0b[0] = ref0a[0], 8
         shr.u ref0b[1] = ref0a[1], 8
         shr.u ref0b[2] = ref0a[2], 8
         shr.u ref0b[3] = ref0a[3], 8
         shr.u ref0b[4] = ref0a[4], 8
         shr.u ref0b[5] = ref0a[5], 8
         shr.u ref0b[6] = ref0a[6], 8
         shr.u ref0b[7] = ref0a[7], 8
         shr.u ref0b[8] = ref0a[8], 8
         ;;
 .explicit
 {.mii
         or  ref0b[0] = ref0b[0], mpr[0]
         shr.u ref2a[0] = ref2a[0], nob02
         shr.u ref2a[1] = ref2a[1], nob02
 }
 {.mmi
         or  ref0b[1] = ref0b[1], mpr[1]
         or  ref0b[2] = ref0b[2], mpr[2]
         shr.u ref2a[2] = ref2a[2], nob02
 }
 {.mii
         or  ref0b[3] = ref0b[3], mpr[3]
         shr.u ref2a[3] = ref2a[3], nob02
         shr.u ref2a[4] = ref2a[4], nob02
 }
 {.mmi
         or  ref0b[4] = ref0b[4], mpr[4]
         or  ref0b[5] = ref0b[5], mpr[5]
         shr.u ref2a[5] = ref2a[5], nob02
 }
 {.mii
         or  ref0b[6] = ref0b[6], mpr[6]
         shr.u ref2a[6] = ref2a[6], nob02
         shr.u ref2a[7] = ref2a[7], nob02
 }
 .default
         or  ref0b[7] = ref0b[7], mpr[7]
         or  ref0b[8] = ref0b[8], mpr[8]
         
         // ref0b[] now contains right position values
         // mpr[] not needed any more
 
         
         // Align ref2 left
         
         ;; 
         shl mpr[0] = ref2b[0], nob56m02
         shl mpr[1] = ref2b[1], nob56m02
         shl mpr[2] = ref2b[2], nob56m02
         shl mpr[3] = ref2b[3], nob56m02
         shl mpr[4] = ref2b[4], nob56m02
         shl mpr[5] = ref2b[5], nob56m02
         shl mpr[6] = ref2b[6], nob56m02
         shl mpr[7] = ref2b[7], nob56m02
 
         shl ref2b[0] = ref2b[0], nob64m02
         shl ref2b[1] = ref2b[1], nob64m02
         shl ref2b[2] = ref2b[2], nob64m02
         shl ref2b[3] = ref2b[3], nob64m02
         shl ref2b[4] = ref2b[4], nob64m02
         shl ref2b[5] = ref2b[5], nob64m02
         shl ref2b[6] = ref2b[6], nob64m02
         shl ref2b[7] = ref2b[7], nob64m02
         ;; 
         or ref2a[0] = ref2a[0], ref2b[0]
         or ref2a[1] = ref2a[1], ref2b[1]
         or ref2a[2] = ref2a[2], ref2b[2]
         or ref2a[3] = ref2a[3], ref2b[3]
         or ref2a[4] = ref2a[4], ref2b[4]
         or ref2a[5] = ref2a[5], ref2b[5]
         or ref2a[6] = ref2a[6], ref2b[6]
         or ref2a[7] = ref2a[7], ref2b[7]
         ;;
 
         // ref2a[] now contains left position values
         // mpr[] contains intermediate result for right position values (former ref2a << 56 - nob02)
         
         // Align ref2 right
 
         // Shift one byte more to the right (seen als big-endian)
         shr.u ref2b[0] = ref2a[0], 8
         shr.u ref2b[1] = ref2a[1], 8
         shr.u ref2b[2] = ref2a[2], 8
         shr.u ref2b[3] = ref2a[3], 8
         shr.u ref2b[4] = ref2a[4], 8
         shr.u ref2b[5] = ref2a[5], 8
         shr.u ref2b[6] = ref2a[6], 8
         shr.u ref2b[7] = ref2a[7], 8
         ;; 
         or  ref2b[0] = ref2b[0], mpr[0]
         or  ref2b[1] = ref2b[1], mpr[1]
         or  ref2b[2] = ref2b[2], mpr[2]
         or  ref2b[3] = ref2b[3], mpr[3]
         or  ref2b[4] = ref2b[4], mpr[4]
         or  ref2b[5] = ref2b[5], mpr[5]
         or  ref2b[6] = ref2b[6], mpr[6]
         or  ref2b[7] = ref2b[7], mpr[7]
         
 
         // ref2b[] now contains right position values
         // mpr[] not needed any more
 
 
                 
         // Let's SAD
 
         // Left top corner
         
 
         sub dx = backupX, dx
         psad1 mpr[0] = cur[0], ref0a[0]
         psad1 mpr[1] = cur[1], ref0a[1]
 
         sub dy = backupY, dy
         psad1 mpr[2] = cur[2], ref0a[2]
         psad1 mpr[3] = cur[3], ref0a[3]
         psad1 mpr[4] = cur[4], ref0a[4]
         psad1 mpr[5] = cur[5], ref0a[5]
         psad1 mpr[6] = cur[6], ref0a[6]
         psad1 mpr[7] = cur[7], ref0a[7]
         ;; 
 .include "../../src/motion/ia64_asm/calc_delta_1.s"
         
         // Top edge
 
         psad1 mpr[0] = cur[0], ref1a[0]
         psad1 mpr[1] = cur[1], ref1a[1]
         psad1 mpr[2] = cur[2], ref1a[2]
         psad1 mpr[3] = cur[3], ref1a[3]
         psad1 mpr[4] = cur[4], ref1a[4]
 
         add dx = 1, dx
         psad1 mpr[5] = cur[5], ref1a[5]
         psad1 mpr[6] = cur[6], ref1a[6]
 
         psad1 mpr[7] = cur[7], ref1a[7]
         ;;
 
 .include "../../src/motion/ia64_asm/calc_delta_2.s"
 (lt)    cmp.lt.unc fb, p0 = mpr[8], iMinSAD
 .include "../../src/motion/ia64_asm/calc_delta_3.s"
         
         // Right top corner
 
 
         psad1 mpr[0] = cur[0], ref0b[0]
         psad1 mpr[1] = cur[1], ref0b[1]
         psad1 mpr[2] = cur[2], ref0b[2]
         psad1 mpr[3] = cur[3], ref0b[3]
         psad1 mpr[4] = cur[4], ref0b[4]
         
         add backupX = 1, backupX
         psad1 mpr[5] = cur[5], ref0b[5]
         psad1 mpr[6] = cur[6], ref0b[6]
 
         add dx = 1, dx
         psad1 mpr[7] = cur[7], ref0b[7]
         ;;
         
 .include "../../src/motion/ia64_asm/calc_delta_1.s"
 (t)     cmp.lt.unc fb, p0 = iSAD, iMinSAD
         ;; 
         
         // Left edge
 
 (fb)    mov iMinSAD = iSAD
         psad1 mpr[0] = cur[0], ref2a[0]
 
 (fb)    mov currX = backupX
         psad1 mpr[1] = cur[1], ref2a[1]
         psad1 mpr[2] = cur[2], ref2a[2]
 
 (fb)    mov currY = backupY
         psad1 mpr[3] = cur[3], ref2a[3]
         psad1 mpr[4] = cur[4], ref2a[4]
 
         add backupX = 1, backupX
         psad1 mpr[5] = cur[5], ref2a[5]
         psad1 mpr[6] = cur[6], ref2a[6]
 
         psad1 mpr[7] = cur[7], ref2a[7]
 
         add dx = -2, dx
         add dy = 1, dy
         ;;
         
 .include "../../src/motion/ia64_asm/calc_delta_2.s"
 (rt)    cmp.lt.unc fb, p0 = mpr[8], iMinSAD
 .include "../../src/motion/ia64_asm/calc_delta_3.s"
         
         // Right edge
 
         
         psad1 mpr[0] = cur[0], ref2b[0]
         psad1 mpr[1] = cur[1], ref2b[1]
         psad1 mpr[2] = cur[2], ref2b[2]
         psad1 mpr[3] = cur[3], ref2b[3]
         psad1 mpr[4] = cur[4], ref2b[4]
 
         add backupX = -2, backupX
         psad1 mpr[5] = cur[5], ref2b[5]
         psad1 mpr[6] = cur[6], ref2b[6]
 
         add backupY = 1, backupY
         add dx = 2, dx
         psad1 mpr[7] = cur[7], ref2b[7]
         ;;
         
 .include "../../src/motion/ia64_asm/calc_delta_1.s"
 (l)     cmp.lt.unc fb, p0 = iSAD, iMinSAD
         ;;
         
         // Left bottom corner
 
 (fb)    mov iMinSAD = iSAD
         psad1 mpr[0] = cur[0], ref0a[1]
 
 (fb)    mov currX = backupX
         psad1 mpr[1] = cur[1], ref0a[2]
         psad1 mpr[2] = cur[2], ref0a[3]
 
 (fb)    mov currY = backupY
         psad1 mpr[3] = cur[3], ref0a[4]
         psad1 mpr[4] = cur[4], ref0a[5]
 
         add backupX = 2, backupX
         psad1 mpr[5] = cur[5], ref0a[6]
         psad1 mpr[6] = cur[6], ref0a[7]
 
         psad1 mpr[7] = cur[7], ref0a[8]
 
         add dx = -2, dx
         add dy = 1, dy
         ;;
         
 .include "../../src/motion/ia64_asm/calc_delta_2.s"
 (r)     cmp.lt.unc fb, p0 = mpr[8], iMinSAD
 .include "../../src/motion/ia64_asm/calc_delta_3.s"
         
         // Bottom edge
         
         psad1 mpr[0] = cur[0], ref1a[1]
         psad1 mpr[1] = cur[1], ref1a[2]
         psad1 mpr[2] = cur[2], ref1a[3]
         psad1 mpr[3] = cur[3], ref1a[4]
         psad1 mpr[4] = cur[4], ref1a[5]
 
         add backupX = -2, backupX
         psad1 mpr[5] = cur[5], ref1a[6]
         psad1 mpr[6] = cur[6], ref1a[7]
 
         add backupY = 1, backupY
         add dx = 1, dx
         psad1 mpr[7] = cur[7], ref1a[8]
         ;;
         
 .include "../../src/motion/ia64_asm/calc_delta_1.s"
 (lb)    cmp.lt.unc fb, p0 = iSAD, iMinSAD
         ;; 
         // Right bottom corner
 
 
 (fb)    mov iMinSAD = iSAD
         psad1 mpr[0] = cur[0], ref0b[1]
 
 (fb)    mov currX = backupX
         psad1 mpr[1] = cur[1], ref0b[2]
         psad1 mpr[2] = cur[2], ref0b[3]
 
 (fb)    mov currY = backupY
         psad1 mpr[3] = cur[3], ref0b[4]
         psad1 mpr[4] = cur[4], ref0b[5]
         
         add backupX = 1, backupX
         psad1 mpr[5] = cur[5], ref0b[6]
         psad1 mpr[6] = cur[6], ref0b[7]
 
         add dx = 1, dx
         psad1 mpr[7] = cur[7], ref0b[8]
         ;;
         
 .include "../../src/motion/ia64_asm/calc_delta_2.s"
 (b)     cmp.lt.unc fb, p0 = mpr[8], iMinSAD
 .include "../../src/motion/ia64_asm/calc_delta_3.s"
 
 (rb)    getf.sig ret0 = fmv
         add backupX = 1, backupX
         ;; 
 (rb)    add iSAD = iSAD, ret0
         ;; 
 (rb)    cmp.lt.unc fb, p0 = iSAD, iMinSAD
         ;; 
 (fb)    mov iMinSAD = iSAD
 (fb)    mov currX = backupX
 (fb)    mov currY = backupY
         ;; 
         
         // Write back result
  
         st4 [currMV] = currX
         st4 [currYAddress] = currY
         mov ret0 = iMinSAD
 
         // Restore important registers
 
         ;;
         mov pr = prsave, -1
         mov ar.pfs = pfs        
         br.ret.sptk.many b0
         
         .endp Halfpel8_Refine_ia64#

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -