📄 halfpel8_refine_ia64.s
字号:
ld8 mpr[6] = [refab[1]], iEdgedWidth ld8 ref2a[6] = [refaa[2]], iEdgedWidth ld8 ref2b[6] = [refab[2]], iEdgedWidth ;; ld8 cur[7] = [cura] ld8 ref0a[7] = [refaa[0]], iEdgedWidth ld8 ref0b[7] = [refab[0]], iEdgedWidth ld8 ref1a[7] = [refaa[1]], iEdgedWidth ld8 mpr[7] = [refab[1]], iEdgedWidth ld8 ref2a[7] = [refaa[2]] ld8 ref2b[7] = [refab[2]] ;; ld8 ref0a[8] = [refaa[0]] ld8 ref0b[8] = [refab[0]] ld8 ref1a[8] = [refaa[1]] ld8 mpr[8] = [refab[1]] ;; // Align ref1 shr.u ref1a[0] = ref1a[0], nob1 shr.u ref1a[1] = ref1a[1], nob1 shr.u ref1a[2] = ref1a[2], nob1 shr.u ref1a[3] = ref1a[3], nob1 shr.u ref1a[4] = ref1a[4], nob1 shr.u ref1a[5] = ref1a[5], nob1 shr.u ref1a[6] = ref1a[6], nob1 shr.u ref1a[7] = ref1a[7], nob1 shr.u ref1a[8] = ref1a[8], nob1 shl mpr[0] = mpr[0], nob64m1 shl mpr[1] = mpr[1], nob64m1 shl mpr[2] = mpr[2], nob64m1 shl mpr[3] = mpr[3], nob64m1 shl mpr[4] = mpr[4], nob64m1 shl mpr[5] = mpr[5], nob64m1 shl mpr[6] = mpr[6], nob64m1 shl mpr[7] = mpr[7], nob64m1 shl mpr[8] = mpr[8], nob64m1 ;; .explicit{.mii or ref1a[0] = ref1a[0], mpr[0] shr.u ref0a[0] = ref0a[0], nob02 shr.u ref0a[1] = ref0a[1], nob02}{.mmi or ref1a[1] = ref1a[1], mpr[1] or ref1a[2] = ref1a[2], mpr[2] shr.u ref0a[2] = ref0a[2], nob02}{.mii or ref1a[3] = ref1a[3], mpr[3] shr.u ref0a[3] = ref0a[3], nob02 shr.u ref0a[4] = ref0a[4], nob02}{.mmi or ref1a[4] = ref1a[4], mpr[4] or ref1a[5] = ref1a[5], mpr[5] shr.u ref0a[5] = ref0a[5], nob02}{.mii or ref1a[6] = ref1a[6], mpr[6] shr.u ref0a[6] = ref0a[6], nob02 shr.u ref0a[7] = ref0a[7], nob02}{.mii or ref1a[7] = ref1a[7], mpr[7] or ref1a[8] = ref1a[8], mpr[8] shr.u ref0a[8] = ref0a[8], nob02}.default // ref1a[] now contains center position values // mpr[] not used any more // Align ref0 left ;; shl mpr[0] = ref0b[0], nob56m02 shl mpr[1] = ref0b[1], nob56m02 shl mpr[2] = ref0b[2], nob56m02 shl mpr[3] = ref0b[3], nob56m02 shl mpr[4] = ref0b[4], nob56m02 shl mpr[5] = ref0b[5], nob56m02 shl mpr[6] = ref0b[6], nob56m02 shl mpr[7] = ref0b[7], nob56m02 shl mpr[8] = ref0b[8], nob56m02 shl ref0b[0] = ref0b[0], nob64m02 shl ref0b[1] = ref0b[1], nob64m02 shl ref0b[2] = ref0b[2], nob64m02 shl ref0b[3] = ref0b[3], nob64m02 shl ref0b[4] = ref0b[4], nob64m02 shl ref0b[5] = ref0b[5], nob64m02 shl ref0b[6] = ref0b[6], nob64m02 shl ref0b[7] = ref0b[7], nob64m02 shl ref0b[8] = ref0b[8], nob64m02 ;; or ref0a[0] = ref0a[0], ref0b[0] or ref0a[1] = ref0a[1], ref0b[1] or ref0a[2] = ref0a[2], ref0b[2] or ref0a[3] = ref0a[3], ref0b[3] or ref0a[4] = ref0a[4], ref0b[4] or ref0a[5] = ref0a[5], ref0b[5] or ref0a[6] = ref0a[6], ref0b[6] or ref0a[7] = ref0a[7], ref0b[7] or ref0a[8] = ref0a[8], ref0b[8] ;; // ref0a[] now contains left position values // mpr[] contains intermediate result for right position values (former ref0a << 56 - nob02) // Align ref0 right // Shift one byte more to the right (seen als big-endian) shr.u ref0b[0] = ref0a[0], 8 shr.u ref0b[1] = ref0a[1], 8 shr.u ref0b[2] = ref0a[2], 8 shr.u ref0b[3] = ref0a[3], 8 shr.u ref0b[4] = ref0a[4], 8 shr.u ref0b[5] = ref0a[5], 8 shr.u ref0b[6] = ref0a[6], 8 shr.u ref0b[7] = ref0a[7], 8 shr.u ref0b[8] = ref0a[8], 8 ;;.explicit{.mii or ref0b[0] = ref0b[0], mpr[0] shr.u ref2a[0] = ref2a[0], nob02 shr.u ref2a[1] = ref2a[1], nob02}{.mmi or ref0b[1] = ref0b[1], mpr[1] or ref0b[2] = ref0b[2], mpr[2] shr.u ref2a[2] = ref2a[2], nob02}{.mii or ref0b[3] = ref0b[3], mpr[3] shr.u ref2a[3] = ref2a[3], nob02 shr.u ref2a[4] = ref2a[4], nob02}{.mmi or ref0b[4] = ref0b[4], mpr[4] or ref0b[5] = ref0b[5], mpr[5] shr.u ref2a[5] = ref2a[5], nob02}{.mii or ref0b[6] = ref0b[6], mpr[6] shr.u ref2a[6] = ref2a[6], nob02 shr.u ref2a[7] = ref2a[7], nob02}.default or ref0b[7] = ref0b[7], mpr[7] or ref0b[8] = ref0b[8], mpr[8] // ref0b[] now contains right position values // mpr[] not needed any more // Align ref2 left ;; shl mpr[0] = ref2b[0], nob56m02 shl mpr[1] = ref2b[1], nob56m02 shl mpr[2] = ref2b[2], nob56m02 shl mpr[3] = ref2b[3], nob56m02 shl mpr[4] = ref2b[4], nob56m02 shl mpr[5] = ref2b[5], nob56m02 shl mpr[6] = ref2b[6], nob56m02 shl mpr[7] = ref2b[7], nob56m02 shl ref2b[0] = ref2b[0], nob64m02 shl ref2b[1] = ref2b[1], nob64m02 shl ref2b[2] = ref2b[2], nob64m02 shl ref2b[3] = ref2b[3], nob64m02 shl ref2b[4] = ref2b[4], nob64m02 shl ref2b[5] = ref2b[5], nob64m02 shl ref2b[6] = ref2b[6], nob64m02 shl ref2b[7] = ref2b[7], nob64m02 ;; or ref2a[0] = ref2a[0], ref2b[0] or ref2a[1] = ref2a[1], ref2b[1] or ref2a[2] = ref2a[2], ref2b[2] or ref2a[3] = ref2a[3], ref2b[3] or ref2a[4] = ref2a[4], ref2b[4] or ref2a[5] = ref2a[5], ref2b[5] or ref2a[6] = ref2a[6], ref2b[6] or ref2a[7] = ref2a[7], ref2b[7] ;; // ref2a[] now contains left position values // mpr[] contains intermediate result for right position values (former ref2a << 56 - nob02) // Align ref2 right // Shift one byte more to the right (seen als big-endian) shr.u ref2b[0] = ref2a[0], 8 shr.u ref2b[1] = ref2a[1], 8 shr.u ref2b[2] = ref2a[2], 8 shr.u ref2b[3] = ref2a[3], 8 shr.u ref2b[4] = ref2a[4], 8 shr.u ref2b[5] = ref2a[5], 8 shr.u ref2b[6] = ref2a[6], 8 shr.u ref2b[7] = ref2a[7], 8 ;; or ref2b[0] = ref2b[0], mpr[0] or ref2b[1] = ref2b[1], mpr[1] or ref2b[2] = ref2b[2], mpr[2] or ref2b[3] = ref2b[3], mpr[3] or ref2b[4] = ref2b[4], mpr[4] or ref2b[5] = ref2b[5], mpr[5] or ref2b[6] = ref2b[6], mpr[6] or ref2b[7] = ref2b[7], mpr[7] // ref2b[] now contains right position values // mpr[] not needed any more // Let's SAD // Left top corner sub dx = backupX, dx psad1 mpr[0] = cur[0], ref0a[0] psad1 mpr[1] = cur[1], ref0a[1] sub dy = backupY, dy psad1 mpr[2] = cur[2], ref0a[2] psad1 mpr[3] = cur[3], ref0a[3] psad1 mpr[4] = cur[4], ref0a[4] psad1 mpr[5] = cur[5], ref0a[5] psad1 mpr[6] = cur[6], ref0a[6] psad1 mpr[7] = cur[7], ref0a[7] ;; .include "../../src/motion/ia64_asm/calc_delta_1.s" // Top edge psad1 mpr[0] = cur[0], ref1a[0] psad1 mpr[1] = cur[1], ref1a[1] psad1 mpr[2] = cur[2], ref1a[2] psad1 mpr[3] = cur[3], ref1a[3] psad1 mpr[4] = cur[4], ref1a[4] add dx = 1, dx psad1 mpr[5] = cur[5], ref1a[5] psad1 mpr[6] = cur[6], ref1a[6] psad1 mpr[7] = cur[7], ref1a[7] ;;.include "../../src/motion/ia64_asm/calc_delta_2.s"(lt) cmp.lt.unc fb, p0 = mpr[8], iMinSAD.include "../../src/motion/ia64_asm/calc_delta_3.s" // Right top corner psad1 mpr[0] = cur[0], ref0b[0] psad1 mpr[1] = cur[1], ref0b[1] psad1 mpr[2] = cur[2], ref0b[2] psad1 mpr[3] = cur[3], ref0b[3] psad1 mpr[4] = cur[4], ref0b[4] add backupX = 1, backupX psad1 mpr[5] = cur[5], ref0b[5] psad1 mpr[6] = cur[6], ref0b[6] add dx = 1, dx psad1 mpr[7] = cur[7], ref0b[7] ;; .include "../../src/motion/ia64_asm/calc_delta_1.s"(t) cmp.lt.unc fb, p0 = iSAD, iMinSAD ;; // Left edge(fb) mov iMinSAD = iSAD psad1 mpr[0] = cur[0], ref2a[0](fb) mov currX = backupX psad1 mpr[1] = cur[1], ref2a[1] psad1 mpr[2] = cur[2], ref2a[2](fb) mov currY = backupY psad1 mpr[3] = cur[3], ref2a[3] psad1 mpr[4] = cur[4], ref2a[4] add backupX = 1, backupX psad1 mpr[5] = cur[5], ref2a[5] psad1 mpr[6] = cur[6], ref2a[6] psad1 mpr[7] = cur[7], ref2a[7] add dx = -2, dx add dy = 1, dy ;; .include "../../src/motion/ia64_asm/calc_delta_2.s"(rt) cmp.lt.unc fb, p0 = mpr[8], iMinSAD.include "../../src/motion/ia64_asm/calc_delta_3.s" // Right edge psad1 mpr[0] = cur[0], ref2b[0] psad1 mpr[1] = cur[1], ref2b[1] psad1 mpr[2] = cur[2], ref2b[2] psad1 mpr[3] = cur[3], ref2b[3] psad1 mpr[4] = cur[4], ref2b[4] add backupX = -2, backupX psad1 mpr[5] = cur[5], ref2b[5] psad1 mpr[6] = cur[6], ref2b[6] add backupY = 1, backupY add dx = 2, dx psad1 mpr[7] = cur[7], ref2b[7] ;; .include "../../src/motion/ia64_asm/calc_delta_1.s"(l) cmp.lt.unc fb, p0 = iSAD, iMinSAD ;; // Left bottom corner(fb) mov iMinSAD = iSAD psad1 mpr[0] = cur[0], ref0a[1](fb) mov currX = backupX psad1 mpr[1] = cur[1], ref0a[2] psad1 mpr[2] = cur[2], ref0a[3](fb) mov currY = backupY psad1 mpr[3] = cur[3], ref0a[4] psad1 mpr[4] = cur[4], ref0a[5] add backupX = 2, backupX psad1 mpr[5] = cur[5], ref0a[6] psad1 mpr[6] = cur[6], ref0a[7] psad1 mpr[7] = cur[7], ref0a[8] add dx = -2, dx add dy = 1, dy ;; .include "../../src/motion/ia64_asm/calc_delta_2.s"(r) cmp.lt.unc fb, p0 = mpr[8], iMinSAD.include "../../src/motion/ia64_asm/calc_delta_3.s" // Bottom edge psad1 mpr[0] = cur[0], ref1a[1] psad1 mpr[1] = cur[1], ref1a[2] psad1 mpr[2] = cur[2], ref1a[3] psad1 mpr[3] = cur[3], ref1a[4] psad1 mpr[4] = cur[4], ref1a[5] add backupX = -2, backupX psad1 mpr[5] = cur[5], ref1a[6] psad1 mpr[6] = cur[6], ref1a[7] add backupY = 1, backupY add dx = 1, dx psad1 mpr[7] = cur[7], ref1a[8] ;; .include "../../src/motion/ia64_asm/calc_delta_1.s"(lb) cmp.lt.unc fb, p0 = iSAD, iMinSAD ;; // Right bottom corner(fb) mov iMinSAD = iSAD psad1 mpr[0] = cur[0], ref0b[1](fb) mov currX = backupX psad1 mpr[1] = cur[1], ref0b[2] psad1 mpr[2] = cur[2], ref0b[3](fb) mov currY = backupY psad1 mpr[3] = cur[3], ref0b[4] psad1 mpr[4] = cur[4], ref0b[5] add backupX = 1, backupX psad1 mpr[5] = cur[5], ref0b[6] psad1 mpr[6] = cur[6], ref0b[7] add dx = 1, dx psad1 mpr[7] = cur[7], ref0b[8] ;; .include "../../src/motion/ia64_asm/calc_delta_2.s"(b) cmp.lt.unc fb, p0 = mpr[8], iMinSAD.include "../../src/motion/ia64_asm/calc_delta_3.s"(rb) getf.sig ret0 = fmv add backupX = 1, backupX ;; (rb) add iSAD = iSAD, ret0 ;; (rb) cmp.lt.unc fb, p0 = iSAD, iMinSAD ;; (fb) mov iMinSAD = iSAD(fb) mov currX = backupX(fb) mov currY = backupY ;; // Write back result st4 [currMV] = currX st4 [currYAddress] = currY mov ret0 = iMinSAD // Restore important registers ;; mov pr = prsave, -1 mov ar.pfs = pfs br.ret.sptk.many b0 .endp Halfpel8_Refine_ia64#
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -