📄 halfpel8_refine_ia64.s
字号:
ld8 ref2a[6] = [refaa[2]], iEdgedWidth
ld8 ref2b[6] = [refab[2]], iEdgedWidth
;;
ld8 cur[7] = [cura]
ld8 ref0a[7] = [refaa[0]], iEdgedWidth
ld8 ref0b[7] = [refab[0]], iEdgedWidth
ld8 ref1a[7] = [refaa[1]], iEdgedWidth
ld8 mpr[7] = [refab[1]], iEdgedWidth
ld8 ref2a[7] = [refaa[2]]
ld8 ref2b[7] = [refab[2]]
;;
ld8 ref0a[8] = [refaa[0]]
ld8 ref0b[8] = [refab[0]]
ld8 ref1a[8] = [refaa[1]]
ld8 mpr[8] = [refab[1]]
;;
// Align ref1
shr.u ref1a[0] = ref1a[0], nob1
shr.u ref1a[1] = ref1a[1], nob1
shr.u ref1a[2] = ref1a[2], nob1
shr.u ref1a[3] = ref1a[3], nob1
shr.u ref1a[4] = ref1a[4], nob1
shr.u ref1a[5] = ref1a[5], nob1
shr.u ref1a[6] = ref1a[6], nob1
shr.u ref1a[7] = ref1a[7], nob1
shr.u ref1a[8] = ref1a[8], nob1
shl mpr[0] = mpr[0], nob64m1
shl mpr[1] = mpr[1], nob64m1
shl mpr[2] = mpr[2], nob64m1
shl mpr[3] = mpr[3], nob64m1
shl mpr[4] = mpr[4], nob64m1
shl mpr[5] = mpr[5], nob64m1
shl mpr[6] = mpr[6], nob64m1
shl mpr[7] = mpr[7], nob64m1
shl mpr[8] = mpr[8], nob64m1
;;
.explicit
{.mii
or ref1a[0] = ref1a[0], mpr[0]
shr.u ref0a[0] = ref0a[0], nob02
shr.u ref0a[1] = ref0a[1], nob02
}
{.mmi
or ref1a[1] = ref1a[1], mpr[1]
or ref1a[2] = ref1a[2], mpr[2]
shr.u ref0a[2] = ref0a[2], nob02
}
{.mii
or ref1a[3] = ref1a[3], mpr[3]
shr.u ref0a[3] = ref0a[3], nob02
shr.u ref0a[4] = ref0a[4], nob02
}
{.mmi
or ref1a[4] = ref1a[4], mpr[4]
or ref1a[5] = ref1a[5], mpr[5]
shr.u ref0a[5] = ref0a[5], nob02
}
{.mii
or ref1a[6] = ref1a[6], mpr[6]
shr.u ref0a[6] = ref0a[6], nob02
shr.u ref0a[7] = ref0a[7], nob02
}
{.mii
or ref1a[7] = ref1a[7], mpr[7]
or ref1a[8] = ref1a[8], mpr[8]
shr.u ref0a[8] = ref0a[8], nob02
}
.default
// ref1a[] now contains center position values
// mpr[] not used any more
// Align ref0 left
;;
shl mpr[0] = ref0b[0], nob56m02
shl mpr[1] = ref0b[1], nob56m02
shl mpr[2] = ref0b[2], nob56m02
shl mpr[3] = ref0b[3], nob56m02
shl mpr[4] = ref0b[4], nob56m02
shl mpr[5] = ref0b[5], nob56m02
shl mpr[6] = ref0b[6], nob56m02
shl mpr[7] = ref0b[7], nob56m02
shl mpr[8] = ref0b[8], nob56m02
shl ref0b[0] = ref0b[0], nob64m02
shl ref0b[1] = ref0b[1], nob64m02
shl ref0b[2] = ref0b[2], nob64m02
shl ref0b[3] = ref0b[3], nob64m02
shl ref0b[4] = ref0b[4], nob64m02
shl ref0b[5] = ref0b[5], nob64m02
shl ref0b[6] = ref0b[6], nob64m02
shl ref0b[7] = ref0b[7], nob64m02
shl ref0b[8] = ref0b[8], nob64m02
;;
or ref0a[0] = ref0a[0], ref0b[0]
or ref0a[1] = ref0a[1], ref0b[1]
or ref0a[2] = ref0a[2], ref0b[2]
or ref0a[3] = ref0a[3], ref0b[3]
or ref0a[4] = ref0a[4], ref0b[4]
or ref0a[5] = ref0a[5], ref0b[5]
or ref0a[6] = ref0a[6], ref0b[6]
or ref0a[7] = ref0a[7], ref0b[7]
or ref0a[8] = ref0a[8], ref0b[8]
;;
// ref0a[] now contains left position values
// mpr[] contains intermediate result for right position values (former ref0a << 56 - nob02)
// Align ref0 right
// Shift one byte more to the right (seen als big-endian)
shr.u ref0b[0] = ref0a[0], 8
shr.u ref0b[1] = ref0a[1], 8
shr.u ref0b[2] = ref0a[2], 8
shr.u ref0b[3] = ref0a[3], 8
shr.u ref0b[4] = ref0a[4], 8
shr.u ref0b[5] = ref0a[5], 8
shr.u ref0b[6] = ref0a[6], 8
shr.u ref0b[7] = ref0a[7], 8
shr.u ref0b[8] = ref0a[8], 8
;;
.explicit
{.mii
or ref0b[0] = ref0b[0], mpr[0]
shr.u ref2a[0] = ref2a[0], nob02
shr.u ref2a[1] = ref2a[1], nob02
}
{.mmi
or ref0b[1] = ref0b[1], mpr[1]
or ref0b[2] = ref0b[2], mpr[2]
shr.u ref2a[2] = ref2a[2], nob02
}
{.mii
or ref0b[3] = ref0b[3], mpr[3]
shr.u ref2a[3] = ref2a[3], nob02
shr.u ref2a[4] = ref2a[4], nob02
}
{.mmi
or ref0b[4] = ref0b[4], mpr[4]
or ref0b[5] = ref0b[5], mpr[5]
shr.u ref2a[5] = ref2a[5], nob02
}
{.mii
or ref0b[6] = ref0b[6], mpr[6]
shr.u ref2a[6] = ref2a[6], nob02
shr.u ref2a[7] = ref2a[7], nob02
}
.default
or ref0b[7] = ref0b[7], mpr[7]
or ref0b[8] = ref0b[8], mpr[8]
// ref0b[] now contains right position values
// mpr[] not needed any more
// Align ref2 left
;;
shl mpr[0] = ref2b[0], nob56m02
shl mpr[1] = ref2b[1], nob56m02
shl mpr[2] = ref2b[2], nob56m02
shl mpr[3] = ref2b[3], nob56m02
shl mpr[4] = ref2b[4], nob56m02
shl mpr[5] = ref2b[5], nob56m02
shl mpr[6] = ref2b[6], nob56m02
shl mpr[7] = ref2b[7], nob56m02
shl ref2b[0] = ref2b[0], nob64m02
shl ref2b[1] = ref2b[1], nob64m02
shl ref2b[2] = ref2b[2], nob64m02
shl ref2b[3] = ref2b[3], nob64m02
shl ref2b[4] = ref2b[4], nob64m02
shl ref2b[5] = ref2b[5], nob64m02
shl ref2b[6] = ref2b[6], nob64m02
shl ref2b[7] = ref2b[7], nob64m02
;;
or ref2a[0] = ref2a[0], ref2b[0]
or ref2a[1] = ref2a[1], ref2b[1]
or ref2a[2] = ref2a[2], ref2b[2]
or ref2a[3] = ref2a[3], ref2b[3]
or ref2a[4] = ref2a[4], ref2b[4]
or ref2a[5] = ref2a[5], ref2b[5]
or ref2a[6] = ref2a[6], ref2b[6]
or ref2a[7] = ref2a[7], ref2b[7]
;;
// ref2a[] now contains left position values
// mpr[] contains intermediate result for right position values (former ref2a << 56 - nob02)
// Align ref2 right
// Shift one byte more to the right (seen als big-endian)
shr.u ref2b[0] = ref2a[0], 8
shr.u ref2b[1] = ref2a[1], 8
shr.u ref2b[2] = ref2a[2], 8
shr.u ref2b[3] = ref2a[3], 8
shr.u ref2b[4] = ref2a[4], 8
shr.u ref2b[5] = ref2a[5], 8
shr.u ref2b[6] = ref2a[6], 8
shr.u ref2b[7] = ref2a[7], 8
;;
or ref2b[0] = ref2b[0], mpr[0]
or ref2b[1] = ref2b[1], mpr[1]
or ref2b[2] = ref2b[2], mpr[2]
or ref2b[3] = ref2b[3], mpr[3]
or ref2b[4] = ref2b[4], mpr[4]
or ref2b[5] = ref2b[5], mpr[5]
or ref2b[6] = ref2b[6], mpr[6]
or ref2b[7] = ref2b[7], mpr[7]
// ref2b[] now contains right position values
// mpr[] not needed any more
// Let's SAD
// Left top corner
sub dx = backupX, dx
psad1 mpr[0] = cur[0], ref0a[0]
psad1 mpr[1] = cur[1], ref0a[1]
sub dy = backupY, dy
psad1 mpr[2] = cur[2], ref0a[2]
psad1 mpr[3] = cur[3], ref0a[3]
psad1 mpr[4] = cur[4], ref0a[4]
psad1 mpr[5] = cur[5], ref0a[5]
psad1 mpr[6] = cur[6], ref0a[6]
psad1 mpr[7] = cur[7], ref0a[7]
;;
.include "../../src/motion/ia64_asm/calc_delta_1.s"
// Top edge
psad1 mpr[0] = cur[0], ref1a[0]
psad1 mpr[1] = cur[1], ref1a[1]
psad1 mpr[2] = cur[2], ref1a[2]
psad1 mpr[3] = cur[3], ref1a[3]
psad1 mpr[4] = cur[4], ref1a[4]
add dx = 1, dx
psad1 mpr[5] = cur[5], ref1a[5]
psad1 mpr[6] = cur[6], ref1a[6]
psad1 mpr[7] = cur[7], ref1a[7]
;;
.include "../../src/motion/ia64_asm/calc_delta_2.s"
(lt) cmp.lt.unc fb, p0 = mpr[8], iMinSAD
.include "../../src/motion/ia64_asm/calc_delta_3.s"
// Right top corner
psad1 mpr[0] = cur[0], ref0b[0]
psad1 mpr[1] = cur[1], ref0b[1]
psad1 mpr[2] = cur[2], ref0b[2]
psad1 mpr[3] = cur[3], ref0b[3]
psad1 mpr[4] = cur[4], ref0b[4]
add backupX = 1, backupX
psad1 mpr[5] = cur[5], ref0b[5]
psad1 mpr[6] = cur[6], ref0b[6]
add dx = 1, dx
psad1 mpr[7] = cur[7], ref0b[7]
;;
.include "../../src/motion/ia64_asm/calc_delta_1.s"
(t) cmp.lt.unc fb, p0 = iSAD, iMinSAD
;;
// Left edge
(fb) mov iMinSAD = iSAD
psad1 mpr[0] = cur[0], ref2a[0]
(fb) mov currX = backupX
psad1 mpr[1] = cur[1], ref2a[1]
psad1 mpr[2] = cur[2], ref2a[2]
(fb) mov currY = backupY
psad1 mpr[3] = cur[3], ref2a[3]
psad1 mpr[4] = cur[4], ref2a[4]
add backupX = 1, backupX
psad1 mpr[5] = cur[5], ref2a[5]
psad1 mpr[6] = cur[6], ref2a[6]
psad1 mpr[7] = cur[7], ref2a[7]
add dx = -2, dx
add dy = 1, dy
;;
.include "../../src/motion/ia64_asm/calc_delta_2.s"
(rt) cmp.lt.unc fb, p0 = mpr[8], iMinSAD
.include "../../src/motion/ia64_asm/calc_delta_3.s"
// Right edge
psad1 mpr[0] = cur[0], ref2b[0]
psad1 mpr[1] = cur[1], ref2b[1]
psad1 mpr[2] = cur[2], ref2b[2]
psad1 mpr[3] = cur[3], ref2b[3]
psad1 mpr[4] = cur[4], ref2b[4]
add backupX = -2, backupX
psad1 mpr[5] = cur[5], ref2b[5]
psad1 mpr[6] = cur[6], ref2b[6]
add backupY = 1, backupY
add dx = 2, dx
psad1 mpr[7] = cur[7], ref2b[7]
;;
.include "../../src/motion/ia64_asm/calc_delta_1.s"
(l) cmp.lt.unc fb, p0 = iSAD, iMinSAD
;;
// Left bottom corner
(fb) mov iMinSAD = iSAD
psad1 mpr[0] = cur[0], ref0a[1]
(fb) mov currX = backupX
psad1 mpr[1] = cur[1], ref0a[2]
psad1 mpr[2] = cur[2], ref0a[3]
(fb) mov currY = backupY
psad1 mpr[3] = cur[3], ref0a[4]
psad1 mpr[4] = cur[4], ref0a[5]
add backupX = 2, backupX
psad1 mpr[5] = cur[5], ref0a[6]
psad1 mpr[6] = cur[6], ref0a[7]
psad1 mpr[7] = cur[7], ref0a[8]
add dx = -2, dx
add dy = 1, dy
;;
.include "../../src/motion/ia64_asm/calc_delta_2.s"
(r) cmp.lt.unc fb, p0 = mpr[8], iMinSAD
.include "../../src/motion/ia64_asm/calc_delta_3.s"
// Bottom edge
psad1 mpr[0] = cur[0], ref1a[1]
psad1 mpr[1] = cur[1], ref1a[2]
psad1 mpr[2] = cur[2], ref1a[3]
psad1 mpr[3] = cur[3], ref1a[4]
psad1 mpr[4] = cur[4], ref1a[5]
add backupX = -2, backupX
psad1 mpr[5] = cur[5], ref1a[6]
psad1 mpr[6] = cur[6], ref1a[7]
add backupY = 1, backupY
add dx = 1, dx
psad1 mpr[7] = cur[7], ref1a[8]
;;
.include "../../src/motion/ia64_asm/calc_delta_1.s"
(lb) cmp.lt.unc fb, p0 = iSAD, iMinSAD
;;
// Right bottom corner
(fb) mov iMinSAD = iSAD
psad1 mpr[0] = cur[0], ref0b[1]
(fb) mov currX = backupX
psad1 mpr[1] = cur[1], ref0b[2]
psad1 mpr[2] = cur[2], ref0b[3]
(fb) mov currY = backupY
psad1 mpr[3] = cur[3], ref0b[4]
psad1 mpr[4] = cur[4], ref0b[5]
add backupX = 1, backupX
psad1 mpr[5] = cur[5], ref0b[6]
psad1 mpr[6] = cur[6], ref0b[7]
add dx = 1, dx
psad1 mpr[7] = cur[7], ref0b[8]
;;
.include "../../src/motion/ia64_asm/calc_delta_2.s"
(b) cmp.lt.unc fb, p0 = mpr[8], iMinSAD
.include "../../src/motion/ia64_asm/calc_delta_3.s"
(rb) getf.sig ret0 = fmv
add backupX = 1, backupX
;;
(rb) add iSAD = iSAD, ret0
;;
(rb) cmp.lt.unc fb, p0 = iSAD, iMinSAD
;;
(fb) mov iMinSAD = iSAD
(fb) mov currX = backupX
(fb) mov currY = backupY
;;
// Write back result
st4 [currMV] = currX
st4 [currYAddress] = currY
mov ret0 = iMinSAD
// Restore important registers
;;
mov pr = prsave, -1
mov ar.pfs = pfs
br.ret.sptk.many b0
.endp Halfpel8_Refine_ia64#
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -