⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 field_dct.asm

📁 经典的MP4编解码核心库
💻 ASM
字号:
;/**************************************************************************
; *
; *	SIGMA DESIGNS MPEG-4 CODEC
; *	mmx FIELD DCT / FRAME DCT Estimation
; *
; *
; *************************************************************************/

;/**************************************************************************
; *
; *	History:
; *
; * 05.13.2002  initial version. 
; * David Zheng, Sigma Designs Inc.
; *
; *************************************************************************/


bits 32

section .data
align 16

	blocks dd 0*64*2, 0*64*2, 0*64*2, 0*64*2, 2*64*2, 2*64*2, 2*64*2, 2*64*2
	lines  dd 0*2, 16*2, 32*2, 48*2, 0*2, 16*2, 32*2, 48*2
	mmx_one	dw 1,1,1,1
	mmx_256 dw 256,256,256,256

section .text

;===========================================================================
;
; void FieldDCT_test_mmx(int16_t *data)
;
;===========================================================================

align 64
global _FieldDCT_test_mmx
_FieldDCT_test_mmx
		push	esi
		push	edi
		push    ebx
		push	edx

		mov			esi, [esp+16+4]	; data
		;mov			esi, data

		pxor		mm6, mm6		;// frame	
		pxor		mm7, mm7		;// field

		mov			ecx, 0			;//

		movq		mm5, [mmx_256]

Ffdecide_lp:
		mov			eax, ecx
		mov			edi, esi

		shl			eax, 4			;//(i*8)*2
		add			edi, eax
		movq		mm0, [edi]		;// 1st half of data[0*64+i*8]
		movq		mm1, [edi+8]	;// 2nd half of data[0*64+i*8]

		paddw		mm0, mm5
		paddw		mm1, mm5

		add			edi, 16
		movq		mm2, [edi]		;// 1st half of data[0*64+(i+1)*8]
		movq		mm3, [edi+8]	;// 1st half of data[0*64+(i+1)*8]

		paddw		mm2, mm5
		movq		mm4, mm0

		paddw		mm3, mm5
		psubusw		mm4, mm2

		psubusw		mm2, mm0
		por			mm2, mm4

		movq		mm4, mm1
		psubusw		mm4, mm3

		paddw		mm6, mm2
		psubusw		mm3, mm1

		por			mm3, mm4
		paddw		mm6, mm3

		;//----------------------------
		mov			edi, esi
		add			edi, 64*2

		add			edi, eax
		;//movq		mm5, [mmx_256]

		movq		mm0, [edi]		;// 1st half of data[1*64+i*8]
		movq		mm1, [edi+8]	;// 2nd half of data[1*64+i*8]

		paddw		mm0, mm5
		paddw		mm1, mm5

		add			edi, 16
		movq		mm2, [edi]		;// 1st half of data[1*64+(i+1)*8]
		movq		mm3, [edi+8]	;// 1st half of data[1*64+(i+1)*8]

		paddw		mm2, mm5
		movq		mm4, mm0

		paddw		mm3, mm5
		psubusw		mm4, mm2

		psubusw		mm2, mm0
		por			mm2, mm4

		movq		mm4, mm1
		psubusw		mm4, mm3

		paddw		mm6, mm2
		psubusw		mm3, mm1

		por			mm3, mm4
		paddw		mm6, mm3

		;//----------------------------
		mov			edi, esi
		add			edi, 64*2*2

		add			edi, eax
		;//movq		mm5, [mmx_256]

		movq		mm0, [edi]		;// 1st half of data[1*64+i*8]
		movq		mm1, [edi+8]	;// 2nd half of data[1*64+i*8]

		paddw		mm0, mm5
		paddw		mm1, mm5

		add			edi, 16
		movq		mm2, [edi]		;// 1st half of data[1*64+(i+1)*8]
		movq		mm3, [edi+8]	;// 1st half of data[1*64+(i+1)*8]

		paddw		mm2, mm5
		movq		mm4, mm0

		paddw		mm3, mm5
		psubusw		mm4, mm2

		psubusw		mm2, mm0
		por			mm2, mm4

		movq		mm4, mm1
		psubusw		mm4, mm3

		paddw		mm6, mm2
		psubusw		mm3, mm1

		por			mm3, mm4
		paddw		mm6, mm3

		;//----------------------------
		mov			edi, esi
		add			edi, 64*2*3

		add			edi, eax
		;//movq		mm5, [mmx_256]

		movq		mm0, [edi]		;// 1st half of data[1*64+i*8]
		movq		mm1, [edi+8]	;// 2nd half of data[1*64+i*8]

		paddw		mm0, mm5
		paddw		mm1, mm5

		add			edi, 16
		movq		mm2, [edi]		;// 1st half of data[1*64+(i+1)*8]
		movq		mm3, [edi+8]	;// 1st half of data[1*64+(i+1)*8]

		paddw		mm2, mm5
		movq		mm4, mm0

		paddw		mm3, mm5
		psubusw		mm4, mm2

		psubusw		mm2, mm0
		por			mm2, mm4

		movq		mm4, mm1
		psubusw		mm4, mm3

		paddw		mm6, mm2
		psubusw		mm3, mm1

		por			mm3, mm4
		paddw		mm6, mm3

		;//-------Frame computation is done ----------
		;//-------start Field computation --------
		mov			eax, ecx
		shl			eax, 2				; //index 
		mov			ebx, [blocks+eax]	; //blocks[i]

		mov			edx, [lines+eax]	; //lines[i]
		
		mov			edi, esi	
		add			edi, ebx
		add			edi, edx			; //data[blocks[i  ] + lines[i  ] + j])

		;//movq		mm5, [mmx_256]
		
		add			eax, 4				; //index+1
		mov			ebx, [blocks+eax]	; //blocks[i+1]
		mov			edx, [lines+eax]	; //blocks[i+1]

		add			ebx, esi
		add			ebx, edx			; //data[blocks[i+1] + lines[i+1] + j] 

		movq		mm0, [edi]
		movq		mm1, [edi+8]

		paddw		mm0, mm5
		paddw		mm1, mm5

		movq		mm2, [ebx]
		movq		mm3, [ebx+8]

		paddw		mm2, mm5
		movq		mm4, mm0

		paddw		mm3, mm5
		psubusw		mm4, mm2

		psubusw		mm2, mm0
		por			mm2, mm4

		movq		mm4, mm1
		psubusw		mm4, mm3

		paddw		mm7, mm2
		psubusw		mm3, mm1

		por			mm3, mm4
		paddw		mm7, mm3


		;//----------------------------

		add			edi, 8*2
		add			ebx, 8*2

		;//movq		mm5, [mmx_256]

		movq		mm0, [edi]
		movq		mm1, [edi+8]

		paddw		mm0, mm5
		paddw		mm1, mm5

		movq		mm2, [ebx]
		movq		mm3, [ebx+8]

		paddw		mm2, mm5
		movq		mm4, mm0

		paddw		mm3, mm5
		psubusw		mm4, mm2

		psubusw		mm2, mm0
		por			mm2, mm4

		movq		mm4, mm1
		psubusw		mm4, mm3

		paddw		mm7, mm2
		psubusw		mm3, mm1

		por			mm3, mm4
		paddw		mm7, mm3

		;//----------------------------

		add			edi, 64*2 - 8*2
		add			ebx, 64*2 - 8*2

		;//movq		mm5, [mmx_256]

		movq		mm0, [edi]
		movq		mm1, [edi+8]

		paddw		mm0, mm5
		paddw		mm1, mm5

		movq		mm2, [ebx]
		movq		mm3, [ebx+8]

		paddw		mm2, mm5
		movq		mm4, mm0

		paddw		mm3, mm5
		psubusw		mm4, mm2

		psubusw		mm2, mm0
		por			mm2, mm4

		movq		mm4, mm1
		psubusw		mm4, mm3

		paddw		mm7, mm2
		psubusw		mm3, mm1

		por			mm3, mm4
		paddw		mm7, mm3

		;//----------------------------

		add			edi, 8*2
		add			ebx, 8*2

		;//movq		mm5, [mmx_256]

		movq		mm0, [edi]
		movq		mm1, [edi+8]

		paddw		mm0, mm5
		paddw		mm1, mm5

		movq		mm2, [ebx]
		movq		mm3, [ebx+8]

		paddw		mm2, mm5
		movq		mm4, mm0

		paddw		mm3, mm5
		psubusw		mm4, mm2

		psubusw		mm2, mm0
		por			mm2, mm4

		movq		mm4, mm1
		psubusw		mm4, mm3

		paddw		mm7, mm2
		psubusw		mm3, mm1

		por			mm3, mm4

		add			ecx, 1
		paddw		mm7, mm3

		;//----------------------------

		cmp			ecx, 7
		;//jge			short Ff_comp_done
		jb			near Ffdecide_lp

Ff_comp_done:

		movq		mm5, [mmx_one]
		pmaddwd		mm6, mm5		; merge sad_frame
		pmaddwd		mm7, mm5		; merge sad_field

		movq		mm4, mm6		
		movq		mm5, mm7		

		psrlq		mm4, 32 
		psrlq		mm5, 32

		paddd		mm4, mm6		; mm4 = sad_frame
		paddd		mm5, mm7		; mm5 = sad_field

		movd		ecx, mm4		
		movd		edx, mm5

		xor			eax, eax		; frame coding
		cmp			ecx, edx
		jb			short estimation_coding

		mov			eax, 1			; field coding

estimation_coding:
		
		pop edx
		pop ebx
		pop edi
		pop esi

		ret

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -