📄 b_motion.asm
字号:
;/**************************************************************************
; *
; * SIGMA DESIGNS MPEG-4 CODEC
; * mmx motion compensation for B_VOP encoding
; *
; *
; *************************************************************************/
;/**************************************************************************
; *
; * History:
; *
; * 02.20.2002 Finished coding. David Zheng, Sigma Designs Inc.
; *
; *************************************************************************/
bits 32
section .text
;===========================================================================
;
; void copy_image2block_mmx(uint8_t * const block,
; const uint8_t * ref,
; const uint32_t stride)
;
;===========================================================================
align 64
global _copy_image2block_mmx
_copy_image2block_mmx
push esi
push edi
push ebx
push edx
mov edi, [esp + 16 + 4] ; block [out]
mov esi, [esp + 16 + 8] ; ref [in/out]
mov ecx, [esp + 16 + 12] ; stride [in]
mov edx, edi
mov ebx, esi
add edx, 8
add ebx, ecx
; start to load block from reference picture
movq mm0, [esi]
add ecx, ecx
movq mm1, [ebx]
movq [edi], mm0
add esi, ecx
movq [edx], mm1 ; 1st and 2nd rows
add ebx, ecx
movq mm0, [esi]
add edi, 16
movq mm1, [ebx]
add edx, 16
movq [edi], mm0
add esi, ecx
movq [edx], mm1 ; 3rd and 4th rows
add ebx, ecx
movq mm0, [esi]
add edi, 16
movq mm1, [ebx]
add edx, 16
movq [edi], mm0
add esi, ecx
movq [edx], mm1 ; 5th and 6th rows
add ebx, ecx
movq mm0, [esi]
add edi, 16
movq mm1, [ebx]
add edx, 16
movq [edi], mm0
; add esi, ecx
movq [edx], mm1 ; 7th and 8th rows
pop edx
pop ebx
pop edi
pop esi
ret
;===========================================================================
;
; void copy_block2image_mmx(const uint8_t * block,
; uint8_t * const comp,
; const uint32_t stride)
;
;===========================================================================
align 64
global _copy_block2image_mmx
_copy_block2image_mmx
push esi
push edi
push ebx
push edx
mov esi, [esp + 16 + 4] ; block [out]
mov edi, [esp + 16 + 8] ; comp [in/out]
mov ecx, [esp + 16 + 12] ; stride [in]
mov edx, edi
mov ebx, esi
add edx, ecx
add ebx, 8
; start to save block to compensated picture
movq mm0, [esi]
add ecx, ecx
movq mm1, [ebx]
movq [edi], mm0
add esi, 16
movq [edx], mm1 ; 1st and 2nd rows
add ebx, 16
movq mm0, [esi]
add edi, ecx
movq mm1, [ebx]
add edx, ecx
movq [edi], mm0
add esi, 16
movq [edx], mm1 ; 3rd and 4th rows
add ebx, 16
movq mm0, [esi]
add edi, ecx
movq mm1, [ebx]
add edx, ecx
movq [edi], mm0
add esi, 16
movq [edx], mm1 ; 5th and 6th rows
add ebx, 16
movq mm0, [esi]
add edi, ecx
movq mm1, [ebx]
add edx, ecx
movq [edi], mm0
; add esi, 16
movq [edx], mm1 ; 7th and 8th rows
pop edx
pop ebx
pop edi
pop esi
ret
;===========================================================================
;
; int average_sad_mmx(
; const uint8_t * curr_blk,
; uint8_t * const result,
; const uint8_t * source1,
; const uint8_t * source2)
;
;===========================================================================
align 64
global _average_sad_mmx
_average_sad_mmx
push esi
push edi
push ebx
mov esi, [esp + 12 + 4] ; curr_blk [in]
mov edi, [esp + 12 + 8] ; result [out]
mov ebx, [esp + 12 + 12] ; source1 [in]
mov eax, [esp + 12 + 16] ; source2 [in]
pxor mm5, mm5
pxor mm7, mm7
pxor mm6, mm6
pcmpeqw mm1, mm1
mov ecx, 8
psubw mm6, mm1 ; mm6 = w[1|1|1|1]
lp_avg_sad8:
movq mm0, [ebx]
movq mm1, mm0
movq mm2, [eax]
punpcklbw mm0, mm5
movq mm3, mm2
punpckhbw mm1, mm5
punpcklbw mm2, mm5
punpckhbw mm3, mm5
paddw mm0, mm2
paddw mm1, mm3
paddw mm0, mm6
paddw mm1, mm6
psrlw mm0, 1
psrlw mm1, 1
add ebx, 8
packuswb mm0, mm1
add eax, 8
movq [edi], mm0
movq mm1, [esi]
movq mm2, mm0
movq mm3, mm1
psubusb mm1, mm2
psubusb mm0, mm3
add esi, 8
por mm0, mm1
add edi, 8
movq mm1, mm0
punpcklbw mm0, mm5
punpckhbw mm1, mm5
paddusw mm7, mm0
paddusw mm7, mm1
loop lp_avg_sad8
pmaddwd mm7, mm6 ; merge sum
por mm0, mm0
movq mm1, mm7
psrlq mm7, 32
paddd mm1, mm7
movd eax, mm1
pop ebx
pop edi
pop esi
ret
;===========================================================================
;
; int average_sad_xmm(
; const uint8_t * curr_blk,
; uint8_t * const result,
; const uint8_t * source1,
; const uint8_t * source2)
;
;===========================================================================
align 64
global _average_sad_xmm
_average_sad_xmm
push esi
push edi
push ebx
mov esi, [esp + 12 + 4] ; curr_blk [in]
mov edi, [esp + 12 + 8] ; result [out]
mov ebx, [esp + 12 + 12] ; source1 [in]
mov eax, [esp + 12 + 16] ; source2 [in]
pxor mm5, mm5
pxor mm7, mm7
pxor mm6, mm6
pcmpeqw mm1, mm1
mov ecx, 8
movq mm0, [ebx]
psubw mm6, mm1 ; mm6 = w[1|1|1|1]
lp_avg_sad8_xmm:
pavgb mm0, [eax] ; mm0 = average
add ebx, 8
movq mm4, mm0
add eax, 8
psadbw mm0, [esi]
movq [edi], mm4 ; store the average result
add esi, 8
paddusw mm7, mm0
add edi, 8
sub ecx, 1
movq mm0, [ebx]
jg short lp_avg_sad8_xmm
movd eax, mm7
pop ebx
pop edi
pop esi
ret
;===========================================================================
;
; int average_block_mmx(
; uint8_t * const result,
; const uint8_t * source1,
; const uint8_t * source2)
;
;===========================================================================
align 64
global _average_block_mmx
_average_block_mmx
push edi
push ebx
push edx
mov edi, [esp + 12 + 4] ; result [out]
mov ebx, [esp + 12 + 8] ; source1 [in]
mov edx, [esp + 12 + 12] ; source2 [in]
pxor mm5, mm5
pxor mm6, mm6
pcmpeqw mm1, mm1
mov ecx, 8
psubw mm6, mm1 ; mm6 = w[1|1|1|1]
lp_avg8:
movq mm0, [ebx]
movq mm1, mm0
movq mm2, [edx]
punpcklbw mm0, mm5
movq mm3, mm2
punpckhbw mm1, mm5
punpcklbw mm2, mm5
punpckhbw mm3, mm5
paddw mm0, mm2
paddw mm1, mm3
paddw mm0, mm6
paddw mm1, mm6
psrlw mm0, 1
psrlw mm1, 1
add ebx, 8
packuswb mm0, mm1
add edx, 8
movq [edi], mm0
add edi, 8
sub ecx, 1
jg short lp_avg8
pop edx
pop ebx
pop edi
ret
;===========================================================================
;
; int average_block2image_mmx(
; const uint8_t * const block,
; uint8_t * const image,
; const int stride)
;
;===========================================================================
align 64
global _average_block2image_mmx
_average_block2image_mmx
push esi
push edi
mov esi, [esp + 8 + 4] ; block [in]
mov edi, [esp + 8 + 8] ; image [in/out]
mov eax, [esp + 8 + 12] ; stride [in]
pxor mm5, mm5
pxor mm6, mm6
pcmpeqw mm1, mm1
mov ecx, 8
psubw mm6, mm1 ; mm6 = w[1|1|1|1]
lp_avgb2i:
movq mm0, [esi]
movq mm1, mm0
movq mm2, [edi]
punpcklbw mm0, mm5
movq mm3, mm2
punpckhbw mm1, mm5
punpcklbw mm2, mm5
punpckhbw mm3, mm5
paddw mm0, mm2
paddw mm1, mm3
paddw mm0, mm6
paddw mm1, mm6
psrlw mm0, 1
psrlw mm1, 1
add esi, 8
packuswb mm0, mm1
movq [edi], mm0
add edi, eax
loop lp_avgb2i
pop edi
pop esi
ret
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -