⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 halfpel8_refine_ia64.s

📁 这是一个压缩解压包,用C语言进行编程的,里面有详细的源代码.
💻 S
📖 第 1 页 / 共 2 页
字号:
	ld8 mpr[6] = [refab[1]], iEdgedWidth	ld8 ref2a[6] = [refaa[2]], iEdgedWidth	ld8 ref2b[6] = [refab[2]], iEdgedWidth	;; 	ld8 cur[7] = [cura]	ld8 ref0a[7] = [refaa[0]], iEdgedWidth	ld8 ref0b[7] = [refab[0]], iEdgedWidth	ld8 ref1a[7] = [refaa[1]], iEdgedWidth	ld8 mpr[7] = [refab[1]], iEdgedWidth	ld8 ref2a[7] = [refaa[2]]	ld8 ref2b[7] = [refab[2]]	;; 	ld8 ref0a[8] = [refaa[0]]	ld8 ref0b[8] = [refab[0]]	ld8 ref1a[8] = [refaa[1]]	ld8 mpr[8] = [refab[1]]	;;	// Align ref1	     	shr.u ref1a[0] = ref1a[0], nob1     	shr.u ref1a[1] = ref1a[1], nob1     	shr.u ref1a[2] = ref1a[2], nob1     	shr.u ref1a[3] = ref1a[3], nob1     	shr.u ref1a[4] = ref1a[4], nob1     	shr.u ref1a[5] = ref1a[5], nob1     	shr.u ref1a[6] = ref1a[6], nob1     	shr.u ref1a[7] = ref1a[7], nob1     	shr.u ref1a[8] = ref1a[8], nob1	shl mpr[0] = mpr[0], nob64m1	shl mpr[1] = mpr[1], nob64m1	shl mpr[2] = mpr[2], nob64m1	shl mpr[3] = mpr[3], nob64m1	shl mpr[4] = mpr[4], nob64m1	shl mpr[5] = mpr[5], nob64m1	shl mpr[6] = mpr[6], nob64m1	shl mpr[7] = mpr[7], nob64m1	shl mpr[8] = mpr[8], nob64m1	;; .explicit{.mii	or ref1a[0] = ref1a[0], mpr[0]     	shr.u ref0a[0] = ref0a[0], nob02     	shr.u ref0a[1] = ref0a[1], nob02}{.mmi	or ref1a[1] = ref1a[1], mpr[1]	or ref1a[2] = ref1a[2], mpr[2]     	shr.u ref0a[2] = ref0a[2], nob02}{.mii	or ref1a[3] = ref1a[3], mpr[3]     	shr.u ref0a[3] = ref0a[3], nob02     	shr.u ref0a[4] = ref0a[4], nob02}{.mmi	or ref1a[4] = ref1a[4], mpr[4]	or ref1a[5] = ref1a[5], mpr[5]	shr.u ref0a[5] = ref0a[5], nob02}{.mii	or ref1a[6] = ref1a[6], mpr[6]     	shr.u ref0a[6] = ref0a[6], nob02     	shr.u ref0a[7] = ref0a[7], nob02}{.mii	or ref1a[7] = ref1a[7], mpr[7]	or ref1a[8] = ref1a[8], mpr[8]     	shr.u ref0a[8] = ref0a[8], nob02}.default	// ref1a[] now contains center position values	// mpr[] not used any more		// Align ref0 left		;; 	shl mpr[0] = ref0b[0], nob56m02	shl mpr[1] = ref0b[1], nob56m02	shl mpr[2] = ref0b[2], nob56m02	shl mpr[3] = ref0b[3], nob56m02	shl mpr[4] = ref0b[4], nob56m02	shl mpr[5] = ref0b[5], nob56m02	shl mpr[6] = ref0b[6], nob56m02	shl mpr[7] = ref0b[7], nob56m02	shl mpr[8] = ref0b[8], nob56m02	shl ref0b[0] = ref0b[0], nob64m02	shl ref0b[1] = ref0b[1], nob64m02	shl ref0b[2] = ref0b[2], nob64m02	shl ref0b[3] = ref0b[3], nob64m02	shl ref0b[4] = ref0b[4], nob64m02	shl ref0b[5] = ref0b[5], nob64m02	shl ref0b[6] = ref0b[6], nob64m02	shl ref0b[7] = ref0b[7], nob64m02	shl ref0b[8] = ref0b[8], nob64m02	;; 	or ref0a[0] = ref0a[0], ref0b[0]	or ref0a[1] = ref0a[1], ref0b[1]	or ref0a[2] = ref0a[2], ref0b[2]	or ref0a[3] = ref0a[3], ref0b[3]	or ref0a[4] = ref0a[4], ref0b[4]	or ref0a[5] = ref0a[5], ref0b[5]	or ref0a[6] = ref0a[6], ref0b[6]	or ref0a[7] = ref0a[7], ref0b[7]	or ref0a[8] = ref0a[8], ref0b[8]	;;	// ref0a[] now contains left position values	// mpr[] contains intermediate result for right position values (former ref0a << 56 - nob02)		// Align ref0 right	// Shift one byte more to the right (seen als big-endian)	shr.u ref0b[0] = ref0a[0], 8	shr.u ref0b[1] = ref0a[1], 8	shr.u ref0b[2] = ref0a[2], 8	shr.u ref0b[3] = ref0a[3], 8	shr.u ref0b[4] = ref0a[4], 8	shr.u ref0b[5] = ref0a[5], 8	shr.u ref0b[6] = ref0a[6], 8	shr.u ref0b[7] = ref0a[7], 8	shr.u ref0b[8] = ref0a[8], 8	;;.explicit{.mii	or  ref0b[0] = ref0b[0], mpr[0]     	shr.u ref2a[0] = ref2a[0], nob02     	shr.u ref2a[1] = ref2a[1], nob02}{.mmi	or  ref0b[1] = ref0b[1], mpr[1]	or  ref0b[2] = ref0b[2], mpr[2]     	shr.u ref2a[2] = ref2a[2], nob02}{.mii	or  ref0b[3] = ref0b[3], mpr[3]     	shr.u ref2a[3] = ref2a[3], nob02     	shr.u ref2a[4] = ref2a[4], nob02}{.mmi	or  ref0b[4] = ref0b[4], mpr[4]	or  ref0b[5] = ref0b[5], mpr[5]     	shr.u ref2a[5] = ref2a[5], nob02}{.mii	or  ref0b[6] = ref0b[6], mpr[6]     	shr.u ref2a[6] = ref2a[6], nob02     	shr.u ref2a[7] = ref2a[7], nob02}.default	or  ref0b[7] = ref0b[7], mpr[7]	or  ref0b[8] = ref0b[8], mpr[8]		// ref0b[] now contains right position values	// mpr[] not needed any more		// Align ref2 left		;; 	shl mpr[0] = ref2b[0], nob56m02	shl mpr[1] = ref2b[1], nob56m02	shl mpr[2] = ref2b[2], nob56m02	shl mpr[3] = ref2b[3], nob56m02	shl mpr[4] = ref2b[4], nob56m02	shl mpr[5] = ref2b[5], nob56m02	shl mpr[6] = ref2b[6], nob56m02	shl mpr[7] = ref2b[7], nob56m02	shl ref2b[0] = ref2b[0], nob64m02	shl ref2b[1] = ref2b[1], nob64m02	shl ref2b[2] = ref2b[2], nob64m02	shl ref2b[3] = ref2b[3], nob64m02	shl ref2b[4] = ref2b[4], nob64m02	shl ref2b[5] = ref2b[5], nob64m02	shl ref2b[6] = ref2b[6], nob64m02	shl ref2b[7] = ref2b[7], nob64m02	;; 	or ref2a[0] = ref2a[0], ref2b[0]	or ref2a[1] = ref2a[1], ref2b[1]	or ref2a[2] = ref2a[2], ref2b[2]	or ref2a[3] = ref2a[3], ref2b[3]	or ref2a[4] = ref2a[4], ref2b[4]	or ref2a[5] = ref2a[5], ref2b[5]	or ref2a[6] = ref2a[6], ref2b[6]	or ref2a[7] = ref2a[7], ref2b[7]	;;	// ref2a[] now contains left position values	// mpr[] contains intermediate result for right position values (former ref2a << 56 - nob02)		// Align ref2 right	// Shift one byte more to the right (seen als big-endian)	shr.u ref2b[0] = ref2a[0], 8	shr.u ref2b[1] = ref2a[1], 8	shr.u ref2b[2] = ref2a[2], 8	shr.u ref2b[3] = ref2a[3], 8	shr.u ref2b[4] = ref2a[4], 8	shr.u ref2b[5] = ref2a[5], 8	shr.u ref2b[6] = ref2a[6], 8	shr.u ref2b[7] = ref2a[7], 8	;; 	or  ref2b[0] = ref2b[0], mpr[0]	or  ref2b[1] = ref2b[1], mpr[1]	or  ref2b[2] = ref2b[2], mpr[2]	or  ref2b[3] = ref2b[3], mpr[3]	or  ref2b[4] = ref2b[4], mpr[4]	or  ref2b[5] = ref2b[5], mpr[5]	or  ref2b[6] = ref2b[6], mpr[6]	or  ref2b[7] = ref2b[7], mpr[7]		// ref2b[] now contains right position values	// mpr[] not needed any more			// Let's SAD	// Left top corner		sub dx = backupX, dx	psad1 mpr[0] = cur[0], ref0a[0]	psad1 mpr[1] = cur[1], ref0a[1]	sub dy = backupY, dy	psad1 mpr[2] = cur[2], ref0a[2]	psad1 mpr[3] = cur[3], ref0a[3]	psad1 mpr[4] = cur[4], ref0a[4]	psad1 mpr[5] = cur[5], ref0a[5]	psad1 mpr[6] = cur[6], ref0a[6]	psad1 mpr[7] = cur[7], ref0a[7]	;; .include "../../src/motion/ia64_asm/calc_delta_1.s"		// Top edge	psad1 mpr[0] = cur[0], ref1a[0]	psad1 mpr[1] = cur[1], ref1a[1]	psad1 mpr[2] = cur[2], ref1a[2]	psad1 mpr[3] = cur[3], ref1a[3]	psad1 mpr[4] = cur[4], ref1a[4]	add dx = 1, dx	psad1 mpr[5] = cur[5], ref1a[5]	psad1 mpr[6] = cur[6], ref1a[6]	psad1 mpr[7] = cur[7], ref1a[7]	;;.include "../../src/motion/ia64_asm/calc_delta_2.s"(lt)	cmp.lt.unc fb, p0 = mpr[8], iMinSAD.include "../../src/motion/ia64_asm/calc_delta_3.s"		// Right top corner	psad1 mpr[0] = cur[0], ref0b[0]	psad1 mpr[1] = cur[1], ref0b[1]	psad1 mpr[2] = cur[2], ref0b[2]	psad1 mpr[3] = cur[3], ref0b[3]	psad1 mpr[4] = cur[4], ref0b[4]		add backupX = 1, backupX	psad1 mpr[5] = cur[5], ref0b[5]	psad1 mpr[6] = cur[6], ref0b[6]	add dx = 1, dx	psad1 mpr[7] = cur[7], ref0b[7]	;;	.include "../../src/motion/ia64_asm/calc_delta_1.s"(t)	cmp.lt.unc fb, p0 = iSAD, iMinSAD	;; 		// Left edge(fb)	mov iMinSAD = iSAD	psad1 mpr[0] = cur[0], ref2a[0](fb)	mov currX = backupX	psad1 mpr[1] = cur[1], ref2a[1]	psad1 mpr[2] = cur[2], ref2a[2](fb)	mov currY = backupY	psad1 mpr[3] = cur[3], ref2a[3]	psad1 mpr[4] = cur[4], ref2a[4]	add backupX = 1, backupX	psad1 mpr[5] = cur[5], ref2a[5]	psad1 mpr[6] = cur[6], ref2a[6]	psad1 mpr[7] = cur[7], ref2a[7]	add dx = -2, dx	add dy = 1, dy	;;	.include "../../src/motion/ia64_asm/calc_delta_2.s"(rt)	cmp.lt.unc fb, p0 = mpr[8], iMinSAD.include "../../src/motion/ia64_asm/calc_delta_3.s"		// Right edge		psad1 mpr[0] = cur[0], ref2b[0]	psad1 mpr[1] = cur[1], ref2b[1]	psad1 mpr[2] = cur[2], ref2b[2]	psad1 mpr[3] = cur[3], ref2b[3]	psad1 mpr[4] = cur[4], ref2b[4]	add backupX = -2, backupX	psad1 mpr[5] = cur[5], ref2b[5]	psad1 mpr[6] = cur[6], ref2b[6]	add backupY = 1, backupY	add dx = 2, dx	psad1 mpr[7] = cur[7], ref2b[7]	;;	.include "../../src/motion/ia64_asm/calc_delta_1.s"(l)	cmp.lt.unc fb, p0 = iSAD, iMinSAD	;;		// Left bottom corner(fb)	mov iMinSAD = iSAD	psad1 mpr[0] = cur[0], ref0a[1](fb)	mov currX = backupX	psad1 mpr[1] = cur[1], ref0a[2]	psad1 mpr[2] = cur[2], ref0a[3](fb)	mov currY = backupY	psad1 mpr[3] = cur[3], ref0a[4]	psad1 mpr[4] = cur[4], ref0a[5]	add backupX = 2, backupX	psad1 mpr[5] = cur[5], ref0a[6]	psad1 mpr[6] = cur[6], ref0a[7]	psad1 mpr[7] = cur[7], ref0a[8]	add dx = -2, dx	add dy = 1, dy	;;	.include "../../src/motion/ia64_asm/calc_delta_2.s"(r)	cmp.lt.unc fb, p0 = mpr[8], iMinSAD.include "../../src/motion/ia64_asm/calc_delta_3.s"		// Bottom edge		psad1 mpr[0] = cur[0], ref1a[1]	psad1 mpr[1] = cur[1], ref1a[2]	psad1 mpr[2] = cur[2], ref1a[3]	psad1 mpr[3] = cur[3], ref1a[4]	psad1 mpr[4] = cur[4], ref1a[5]	add backupX = -2, backupX	psad1 mpr[5] = cur[5], ref1a[6]	psad1 mpr[6] = cur[6], ref1a[7]	add backupY = 1, backupY	add dx = 1, dx	psad1 mpr[7] = cur[7], ref1a[8]	;;	.include "../../src/motion/ia64_asm/calc_delta_1.s"(lb)	cmp.lt.unc fb, p0 = iSAD, iMinSAD	;; 	// Right bottom corner(fb)	mov iMinSAD = iSAD	psad1 mpr[0] = cur[0], ref0b[1](fb)	mov currX = backupX	psad1 mpr[1] = cur[1], ref0b[2]	psad1 mpr[2] = cur[2], ref0b[3](fb)	mov currY = backupY	psad1 mpr[3] = cur[3], ref0b[4]	psad1 mpr[4] = cur[4], ref0b[5]		add backupX = 1, backupX	psad1 mpr[5] = cur[5], ref0b[6]	psad1 mpr[6] = cur[6], ref0b[7]	add dx = 1, dx	psad1 mpr[7] = cur[7], ref0b[8]	;;	.include "../../src/motion/ia64_asm/calc_delta_2.s"(b)	cmp.lt.unc fb, p0 = mpr[8], iMinSAD.include "../../src/motion/ia64_asm/calc_delta_3.s"(rb)	getf.sig ret0 = fmv	add backupX = 1, backupX	;; (rb)	add iSAD = iSAD, ret0	;; (rb)	cmp.lt.unc fb, p0 = iSAD, iMinSAD	;; (fb)	mov iMinSAD = iSAD(fb)	mov currX = backupX(fb)	mov currY = backupY	;; 		// Write back result 	st4 [currMV] = currX	st4 [currYAddress] = currY	mov ret0 = iMinSAD	// Restore important registers	;;	mov pr = prsave, -1	mov ar.pfs = pfs		br.ret.sptk.many b0		.endp Halfpel8_Refine_ia64#

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -