⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sad_mmx.asm

📁 网络MPEG4IP流媒体开发源代码
💻 ASM
📖 第 1 页 / 共 2 页
字号:
;===========================================================================;; uint32_t sad8_xmm(const uint8_t * const cur,;					const uint8_t * const ref,;					const uint32_t stride);;; experimental!;;===========================================================================align 16cglobal sad8_xmmsad8_xmm		push 	esi		push 	edi		mov 	esi, [esp + 8 + 4]	; ref		mov 	edi, [esp + 8 + 8]	; cur		mov 	ecx, [esp + 8 + 12]	; stride		mov     edx, ecx		shl     edx, 1;		mov	eax, 4				pxor mm6, mm6			; mm6 = sum = 0;.loop		movq mm0, [esi]			; ref		movq mm2, [esi+ecx]		; ref2		psadbw mm0, [edi]		; mm0 = |ref - cur|		psadbw mm2, [edi+ecx]	; mm0 = |ref2 - cur2|				paddusw mm6,mm0			; sum += mm01		paddusw mm6,mm2			; sum += mm23		add	esi, edx		add	edi, edx;		dec	eax;		jnz	.loop		movq mm0, [esi]		movq mm2, [esi+ecx]		psadbw mm0, [edi]		psadbw mm2, [edi+ecx]				paddusw mm6,mm0		paddusw mm6,mm2		add	esi, edx		add	edi, edx		movq mm0, [esi]		movq mm2, [esi+ecx]		psadbw mm0, [edi]		psadbw mm2, [edi+ecx]				paddusw mm6,mm0		paddusw mm6,mm2		add	esi, edx		add	edi, edx		movq mm0, [esi]		movq mm2, [esi+ecx]		psadbw mm0, [edi]		psadbw mm2, [edi+ecx]				paddusw mm6,mm0		paddusw mm6,mm2		movd eax, mm6		pop 	edi		pop 	esi		ret;===========================================================================;; uint32_t dev16_mmx(const uint8_t * const cur,;					const uint32_t stride);;;===========================================================================align 16cglobal dev16_mmxdev16_mmx		push 	esi		push 	edi		pxor mm4, mm4			; mm23 = sum = 0		pxor mm5, mm5		mov 	esi, [esp + 8 + 4]	; cur		mov 	ecx, [esp + 8 + 8]	; stride		mov     edi, esi		mov	eax, 16		pxor mm7, mm7			; mm7 = 0.loop1		movq mm0, [esi]		movq mm2, [esi + 8]		movq mm1, mm0		movq mm3, mm2		punpcklbw mm0, mm7		punpcklbw mm2, mm7		punpckhbw mm1, mm7				punpckhbw mm3, mm7				paddw mm0, mm1		paddw mm2, mm3		paddw mm4, mm0		paddw mm5, mm2		add	esi, ecx		dec	eax		jnz	.loop1		paddusw	mm4, mm5		pmaddwd mm4, [mmx_one]	; merge sum		movq mm5, mm4		psrlq mm5, 32 		paddd mm4, mm5		psllq mm4, 32			; blank upper dword		psrlq mm4, 32 + 8		; mm4 /= (16*16)		punpckldq mm4, mm4				packssdw mm4, mm4		; mm4 = mean		pxor mm6, mm6			; mm6 = dev = 0		mov	eax, 16.loop2		movq mm0, [edi]		movq mm2, [edi + 8]		movq mm1, mm0		movq mm3, mm2		punpcklbw mm0, mm7		punpcklbw mm2, mm7		punpckhbw mm1, mm7		; mm01 = cur		punpckhbw mm3, mm7		; mm23 = cur2		movq mm5, mm4			;		psubusw mm5, mm0		;		psubusw mm0, mm4		;		por mm0, mm5			;		movq mm5, mm4			;		psubusw mm5, mm1		;		psubusw mm1, mm4		;		por mm1, mm5			; mm01 = |mm01 - mm4|		movq mm5, mm4			;		psubusw mm5, mm2		;		psubusw mm2, mm4		;		por mm2, mm5			;		movq mm5, mm4			;		psubusw mm5, mm3		;		psubusw mm3, mm4		;		por mm3, mm5			; mm23 = |mm23 - mm4|		paddw mm0, mm1		paddw mm2, mm3		paddw mm6, mm0		paddw mm6, mm2			; dev += mm01 + mm23		add	edi, ecx		dec	eax		jnz	.loop2		pmaddwd mm6, [mmx_one]	; merge dev		movq mm7, mm6		psrlq mm7, 32 		paddd mm6, mm7		movd eax, mm6		pop 	edi		pop 	esi		ret;===========================================================================;; uint32_t dev16_xmm(const uint8_t * const cur,;					const uint32_t stride);;; experimental!;;===========================================================================align 16cglobal dev16_xmmdev16_xmm		push 	esi		push 	edi		pxor mm4, mm4			; mm23 = sum = 0		mov 	esi, [esp + 8 + 4]	; cur		mov 	ecx, [esp + 8 + 8]	; stride		mov     edi, esi;		mov	eax, 16		pxor mm7, mm7			; mm7 = 0;.loop1		movq mm0, [esi]		movq mm2, [esi + 8]		psadbw mm0, mm7			; abs(cur0 - 0) + abs(cur1 - 0) + ... + abs(cur7 - 0) -> mm0		psadbw mm2, mm7			; abs(cur8 - 0) + abs(cur9 - 0) + ... + abs(cur15 - 0) -> mm2		paddw mm4,mm0			; mean += mm0		paddw mm4,mm2			; mean += mm2		add	esi, ecx;		dec	eax;		jnz	.loop1		movq mm0, [esi]		movq mm2, [esi + 8]		psadbw mm0, mm7		psadbw mm2, mm7		paddw mm4,mm0		paddw mm4,mm2		add	esi, ecx		movq mm0, [esi]		movq mm2, [esi + 8]		psadbw mm0, mm7		psadbw mm2, mm7		paddw mm4,mm0		paddw mm4,mm2		add	esi, ecx		movq mm0, [esi]		movq mm2, [esi + 8]		psadbw mm0, mm7		psadbw mm2, mm7		paddw mm4,mm0		paddw mm4,mm2		add	esi, ecx		movq mm0, [esi]		movq mm2, [esi + 8]		psadbw mm0, mm7		psadbw mm2, mm7		paddw mm4,mm0		paddw mm4,mm2		add	esi, ecx		movq mm0, [esi]		movq mm2, [esi + 8]		psadbw mm0, mm7		psadbw mm2, mm7		paddw mm4,mm0		paddw mm4,mm2		add	esi, ecx		movq mm0, [esi]		movq mm2, [esi + 8]		psadbw mm0, mm7		psadbw mm2, mm7		paddw mm4,mm0		paddw mm4,mm2		add	esi, ecx		movq mm0, [esi]		movq mm2, [esi + 8]		psadbw mm0, mm7		psadbw mm2, mm7		paddw mm4,mm0		paddw mm4,mm2		add	esi, ecx		movq mm0, [esi]		movq mm2, [esi + 8]		psadbw mm0, mm7		psadbw mm2, mm7		paddw mm4,mm0		paddw mm4,mm2		add	esi, ecx		movq mm0, [esi]		movq mm2, [esi + 8]		psadbw mm0, mm7		psadbw mm2, mm7		paddw mm4,mm0		paddw mm4,mm2		add	esi, ecx		movq mm0, [esi]		movq mm2, [esi + 8]		psadbw mm0, mm7		psadbw mm2, mm7		paddw mm4,mm0		paddw mm4,mm2		add	esi, ecx		movq mm0, [esi]		movq mm2, [esi + 8]		psadbw mm0, mm7		psadbw mm2, mm7		paddw mm4,mm0		paddw mm4,mm2		add	esi, ecx		movq mm0, [esi]		movq mm2, [esi + 8]		psadbw mm0, mm7		psadbw mm2, mm7		paddw mm4,mm0		paddw mm4,mm2		add	esi, ecx		movq mm0, [esi]		movq mm2, [esi + 8]		psadbw mm0, mm7		psadbw mm2, mm7		paddw mm4,mm0		paddw mm4,mm2		add	esi, ecx		movq mm0, [esi]		movq mm2, [esi + 8]		psadbw mm0, mm7		psadbw mm2, mm7		paddw mm4,mm0		paddw mm4,mm2		add	esi, ecx		movq mm0, [esi]		movq mm2, [esi + 8]		psadbw mm0, mm7		psadbw mm2, mm7		paddw mm4,mm0		paddw mm4,mm2		movq mm5, mm4		psllq mm5, 32 		paddd mm4, mm5		psrld mm4, 8		packssdw mm4, mm4		packuswb mm4, mm4		pxor mm6, mm6			; mm6 = dev = 0;		mov	eax, 16;.loop2		movq mm0, [edi]		movq mm2, [edi + 8]		psadbw mm0, mm4			; mm0 = |cur - mean|		psadbw mm2, mm4			; mm0 = |cur2 - mean|			paddw mm6,mm0			; dev += mm01		paddw mm6,mm2			; dev += mm23		add	edi, ecx;		dec	eax;		jnz	.loop2		movq mm0, [edi]		movq mm2, [edi + 8]		psadbw mm0, mm4		psadbw mm2, mm4			paddw mm6,mm0		paddw mm6,mm2		add	edi, ecx		movq mm0, [edi]		movq mm2, [edi + 8]		psadbw mm0, mm4		psadbw mm2, mm4			paddw mm6,mm0		paddw mm6,mm2		add	edi, ecx		movq mm0, [edi]		movq mm2, [edi + 8]		psadbw mm0, mm4		psadbw mm2, mm4			paddw mm6,mm0		paddw mm6,mm2		add	edi, ecx		movq mm0, [edi]		movq mm2, [edi + 8]		psadbw mm0, mm4		psadbw mm2, mm4			paddw mm6,mm0		paddw mm6,mm2		add	edi, ecx		movq mm0, [edi]		movq mm2, [edi + 8]		psadbw mm0, mm4		psadbw mm2, mm4			paddw mm6,mm0		paddw mm6,mm2		add	edi, ecx		movq mm0, [edi]		movq mm2, [edi + 8]		psadbw mm0, mm4		psadbw mm2, mm4			paddw mm6,mm0		paddw mm6,mm2		add	edi, ecx		movq mm0, [edi]		movq mm2, [edi + 8]		psadbw mm0, mm4		psadbw mm2, mm4			paddw mm6,mm0		paddw mm6,mm2		add	edi, ecx		movq mm0, [edi]		movq mm2, [edi + 8]		psadbw mm0, mm4		psadbw mm2, mm4			paddw mm6,mm0		paddw mm6,mm2		add	edi, ecx		movq mm0, [edi]		movq mm2, [edi + 8]		psadbw mm0, mm4		psadbw mm2, mm4			paddw mm6,mm0		paddw mm6,mm2		add	edi, ecx		movq mm0, [edi]		movq mm2, [edi + 8]		psadbw mm0, mm4		psadbw mm2, mm4			paddw mm6,mm0		paddw mm6,mm2		add	edi, ecx		movq mm0, [edi]		movq mm2, [edi + 8]		psadbw mm0, mm4		psadbw mm2, mm4			paddw mm6,mm0		paddw mm6,mm2		add	edi, ecx		movq mm0, [edi]		movq mm2, [edi + 8]		psadbw mm0, mm4		psadbw mm2, mm4			paddw mm6,mm0		paddw mm6,mm2		add	edi, ecx		movq mm0, [edi]		movq mm2, [edi + 8]		psadbw mm0, mm4		psadbw mm2, mm4			paddw mm6,mm0		paddw mm6,mm2		add	edi, ecx		movq mm0, [edi]		movq mm2, [edi + 8]		psadbw mm0, mm4		psadbw mm2, mm4			paddw mm6,mm0		paddw mm6,mm2		add	edi, ecx		movq mm0, [edi]		movq mm2, [edi + 8]		psadbw mm0, mm4		psadbw mm2, mm4			paddw mm6,mm0		paddw mm6,mm2		movq mm7, mm6		psllq mm7, 32 		paddd mm6, mm7		movd eax, mm6		pop 	edi		pop 	esi		ret

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -