📄 field_dct.asm
字号:
;/**************************************************************************
; *
; * SIGMA DESIGNS MPEG-4 CODEC
; * mmx FIELD DCT / FRAME DCT Estimation
; *
; *
; *************************************************************************/
;/**************************************************************************
; *
; * History:
; *
; * 05.13.2002 initial version.
; * David Zheng, Sigma Designs Inc.
; *
; *************************************************************************/
bits 32
section .data
align 16
blocks dd 0*64*2, 0*64*2, 0*64*2, 0*64*2, 2*64*2, 2*64*2, 2*64*2, 2*64*2
lines dd 0*2, 16*2, 32*2, 48*2, 0*2, 16*2, 32*2, 48*2
mmx_one dw 1,1,1,1
mmx_256 dw 256,256,256,256
section .text
;===========================================================================
;
; void FieldDCT_test_mmx(int16_t *data)
;
;===========================================================================
align 64
global _FieldDCT_test_mmx
_FieldDCT_test_mmx
push esi
push edi
push ebx
push edx
mov esi, [esp+16+4] ; data
;mov esi, data
pxor mm6, mm6 ;// frame
pxor mm7, mm7 ;// field
mov ecx, 0 ;//
movq mm5, [mmx_256]
Ffdecide_lp:
mov eax, ecx
mov edi, esi
shl eax, 4 ;//(i*8)*2
add edi, eax
movq mm0, [edi] ;// 1st half of data[0*64+i*8]
movq mm1, [edi+8] ;// 2nd half of data[0*64+i*8]
paddw mm0, mm5
paddw mm1, mm5
add edi, 16
movq mm2, [edi] ;// 1st half of data[0*64+(i+1)*8]
movq mm3, [edi+8] ;// 1st half of data[0*64+(i+1)*8]
paddw mm2, mm5
movq mm4, mm0
paddw mm3, mm5
psubusw mm4, mm2
psubusw mm2, mm0
por mm2, mm4
movq mm4, mm1
psubusw mm4, mm3
paddw mm6, mm2
psubusw mm3, mm1
por mm3, mm4
paddw mm6, mm3
;//----------------------------
mov edi, esi
add edi, 64*2
add edi, eax
;//movq mm5, [mmx_256]
movq mm0, [edi] ;// 1st half of data[1*64+i*8]
movq mm1, [edi+8] ;// 2nd half of data[1*64+i*8]
paddw mm0, mm5
paddw mm1, mm5
add edi, 16
movq mm2, [edi] ;// 1st half of data[1*64+(i+1)*8]
movq mm3, [edi+8] ;// 1st half of data[1*64+(i+1)*8]
paddw mm2, mm5
movq mm4, mm0
paddw mm3, mm5
psubusw mm4, mm2
psubusw mm2, mm0
por mm2, mm4
movq mm4, mm1
psubusw mm4, mm3
paddw mm6, mm2
psubusw mm3, mm1
por mm3, mm4
paddw mm6, mm3
;//----------------------------
mov edi, esi
add edi, 64*2*2
add edi, eax
;//movq mm5, [mmx_256]
movq mm0, [edi] ;// 1st half of data[1*64+i*8]
movq mm1, [edi+8] ;// 2nd half of data[1*64+i*8]
paddw mm0, mm5
paddw mm1, mm5
add edi, 16
movq mm2, [edi] ;// 1st half of data[1*64+(i+1)*8]
movq mm3, [edi+8] ;// 1st half of data[1*64+(i+1)*8]
paddw mm2, mm5
movq mm4, mm0
paddw mm3, mm5
psubusw mm4, mm2
psubusw mm2, mm0
por mm2, mm4
movq mm4, mm1
psubusw mm4, mm3
paddw mm6, mm2
psubusw mm3, mm1
por mm3, mm4
paddw mm6, mm3
;//----------------------------
mov edi, esi
add edi, 64*2*3
add edi, eax
;//movq mm5, [mmx_256]
movq mm0, [edi] ;// 1st half of data[1*64+i*8]
movq mm1, [edi+8] ;// 2nd half of data[1*64+i*8]
paddw mm0, mm5
paddw mm1, mm5
add edi, 16
movq mm2, [edi] ;// 1st half of data[1*64+(i+1)*8]
movq mm3, [edi+8] ;// 1st half of data[1*64+(i+1)*8]
paddw mm2, mm5
movq mm4, mm0
paddw mm3, mm5
psubusw mm4, mm2
psubusw mm2, mm0
por mm2, mm4
movq mm4, mm1
psubusw mm4, mm3
paddw mm6, mm2
psubusw mm3, mm1
por mm3, mm4
paddw mm6, mm3
;//-------Frame computation is done ----------
;//-------start Field computation --------
mov eax, ecx
shl eax, 2 ; //index
mov ebx, [blocks+eax] ; //blocks[i]
mov edx, [lines+eax] ; //lines[i]
mov edi, esi
add edi, ebx
add edi, edx ; //data[blocks[i ] + lines[i ] + j])
;//movq mm5, [mmx_256]
add eax, 4 ; //index+1
mov ebx, [blocks+eax] ; //blocks[i+1]
mov edx, [lines+eax] ; //blocks[i+1]
add ebx, esi
add ebx, edx ; //data[blocks[i+1] + lines[i+1] + j]
movq mm0, [edi]
movq mm1, [edi+8]
paddw mm0, mm5
paddw mm1, mm5
movq mm2, [ebx]
movq mm3, [ebx+8]
paddw mm2, mm5
movq mm4, mm0
paddw mm3, mm5
psubusw mm4, mm2
psubusw mm2, mm0
por mm2, mm4
movq mm4, mm1
psubusw mm4, mm3
paddw mm7, mm2
psubusw mm3, mm1
por mm3, mm4
paddw mm7, mm3
;//----------------------------
add edi, 8*2
add ebx, 8*2
;//movq mm5, [mmx_256]
movq mm0, [edi]
movq mm1, [edi+8]
paddw mm0, mm5
paddw mm1, mm5
movq mm2, [ebx]
movq mm3, [ebx+8]
paddw mm2, mm5
movq mm4, mm0
paddw mm3, mm5
psubusw mm4, mm2
psubusw mm2, mm0
por mm2, mm4
movq mm4, mm1
psubusw mm4, mm3
paddw mm7, mm2
psubusw mm3, mm1
por mm3, mm4
paddw mm7, mm3
;//----------------------------
add edi, 64*2 - 8*2
add ebx, 64*2 - 8*2
;//movq mm5, [mmx_256]
movq mm0, [edi]
movq mm1, [edi+8]
paddw mm0, mm5
paddw mm1, mm5
movq mm2, [ebx]
movq mm3, [ebx+8]
paddw mm2, mm5
movq mm4, mm0
paddw mm3, mm5
psubusw mm4, mm2
psubusw mm2, mm0
por mm2, mm4
movq mm4, mm1
psubusw mm4, mm3
paddw mm7, mm2
psubusw mm3, mm1
por mm3, mm4
paddw mm7, mm3
;//----------------------------
add edi, 8*2
add ebx, 8*2
;//movq mm5, [mmx_256]
movq mm0, [edi]
movq mm1, [edi+8]
paddw mm0, mm5
paddw mm1, mm5
movq mm2, [ebx]
movq mm3, [ebx+8]
paddw mm2, mm5
movq mm4, mm0
paddw mm3, mm5
psubusw mm4, mm2
psubusw mm2, mm0
por mm2, mm4
movq mm4, mm1
psubusw mm4, mm3
paddw mm7, mm2
psubusw mm3, mm1
por mm3, mm4
add ecx, 1
paddw mm7, mm3
;//----------------------------
cmp ecx, 7
;//jge short Ff_comp_done
jb near Ffdecide_lp
Ff_comp_done:
movq mm5, [mmx_one]
pmaddwd mm6, mm5 ; merge sad_frame
pmaddwd mm7, mm5 ; merge sad_field
movq mm4, mm6
movq mm5, mm7
psrlq mm4, 32
psrlq mm5, 32
paddd mm4, mm6 ; mm4 = sad_frame
paddd mm5, mm7 ; mm5 = sad_field
movd ecx, mm4
movd edx, mm5
xor eax, eax ; frame coding
cmp ecx, edx
jb short estimation_coding
mov eax, 1 ; field coding
estimation_coding:
pop edx
pop ebx
pop edi
pop esi
ret
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -