📄 interpolate8x8_mmx.asm
字号:
lea ecx, [ecx+2*edx]
AVG2_MMX_RND0
lea ecx, [ecx+2*edx]
AVG2_MMX_RND0
lea ecx, [ecx+2*edx]
AVG2_MMX_RND0
pop ebx
ret
.rounding1
mov eax, [esp + 4 + 24] ; height -> eax
sub eax, 8
test eax, eax
mov ecx, [esp + 4 + 4] ; dst -> edi
mov eax, [esp + 4 + 8] ; src1 -> esi
mov ebx, [esp + 4 + 12] ; src2 -> eax
mov edx, [esp + 4 + 16] ; stride -> edx
movq mm7, [mmx_one]
jz near .start1
AVG2_MMX_RND1
lea ecx, [ecx+2*edx]
.start1
AVG2_MMX_RND1
lea ecx, [ecx+2*edx]
AVG2_MMX_RND1
lea ecx, [ecx+2*edx]
AVG2_MMX_RND1
lea ecx, [ecx+2*edx]
AVG2_MMX_RND1
pop ebx
ret
;-----------------------------------------------------------------------------
;
; void interpolate8x8_avg4_mmx(uint8_t const *dst,
; const uint8_t * const src1,
; const uint8_t * const src2,
; const uint8_t * const src3,
; const uint8_t * const src4,
; const uint32_t stride,
; const uint32_t rounding);
;
;-----------------------------------------------------------------------------
%macro AVG4_MMX_RND0 0
movq mm0, [eax] ; src1 -> mm0
movq mm1, [ebx] ; src2 -> mm1
movq mm2, mm0
movq mm3, mm1
pand mm2, [mmx_three]
pand mm3, [mmx_three]
pand mm0, [mmx_mask2]
pand mm1, [mmx_mask2]
psrlq mm0, 2
psrlq mm1, 2
lea eax, [eax+edx]
lea ebx, [ebx+edx]
paddb mm0, mm1
paddb mm2, mm3
movq mm4, [esi] ; src3 -> mm0
movq mm5, [edi] ; src4 -> mm1
movq mm1, mm4
movq mm3, mm5
pand mm1, [mmx_three]
pand mm3, [mmx_three]
pand mm4, [mmx_mask2]
pand mm5, [mmx_mask2]
psrlq mm4, 2
psrlq mm5, 2
paddb mm4, mm5
paddb mm0, mm4
paddb mm1, mm3
paddb mm2, mm1
paddb mm2, [mmx_two]
pand mm2, [mmx_mask2]
psrlq mm2, 2
paddb mm0, mm2
lea esi, [esi+edx]
lea edi, [edi+edx]
movq [ecx], mm0 ; (src1 + src2 + src3 + src4 + 2) / 4 -> dst
%endmacro
%macro AVG4_MMX_RND1 0
movq mm0, [eax] ; src1 -> mm0
movq mm1, [ebx] ; src2 -> mm1
movq mm2, mm0
movq mm3, mm1
pand mm2, [mmx_three]
pand mm3, [mmx_three]
pand mm0, [mmx_mask2]
pand mm1, [mmx_mask2]
psrlq mm0, 2
psrlq mm1, 2
lea eax,[eax+edx]
lea ebx,[ebx+edx]
paddb mm0, mm1
paddb mm2, mm3
movq mm4, [esi] ; src3 -> mm0
movq mm5, [edi] ; src4 -> mm1
movq mm1, mm4
movq mm3, mm5
pand mm1, [mmx_three]
pand mm3, [mmx_three]
pand mm4, [mmx_mask2]
pand mm5, [mmx_mask2]
psrlq mm4, 2
psrlq mm5, 2
paddb mm4, mm5
paddb mm0, mm4
paddb mm1, mm3
paddb mm2, mm1
paddb mm2, [mmx_one]
pand mm2, [mmx_mask2]
psrlq mm2, 2
paddb mm0, mm2
lea esi,[esi+edx]
lea edi,[edi+edx]
movq [ecx], mm0 ; (src1 + src2 + src3 + src4 + 2) / 4 -> dst
%endmacro
ALIGN 16
interpolate8x8_avg4_mmx:
push ebx
push edi
push esi
mov eax, [esp + 12 + 28] ; rounding
test eax, eax
mov ecx, [esp + 12 + 4] ; dst -> edi
mov eax, [esp + 12 + 8] ; src1 -> esi
mov ebx, [esp + 12 + 12] ; src2 -> eax
mov esi, [esp + 12 + 16] ; src3 -> esi
mov edi, [esp + 12 + 20] ; src4 -> edi
mov edx, [esp + 12 + 24] ; stride -> edx
movq mm7, [mmx_one]
jnz near .rounding1
AVG4_MMX_RND0
lea ecx, [ecx+edx]
AVG4_MMX_RND0
lea ecx, [ecx+edx]
AVG4_MMX_RND0
lea ecx, [ecx+edx]
AVG4_MMX_RND0
lea ecx, [ecx+edx]
AVG4_MMX_RND0
lea ecx, [ecx+edx]
AVG4_MMX_RND0
lea ecx, [ecx+edx]
AVG4_MMX_RND0
lea ecx, [ecx+edx]
AVG4_MMX_RND0
pop esi
pop edi
pop ebx
ret
.rounding1
AVG4_MMX_RND1
lea ecx, [ecx+edx]
AVG4_MMX_RND1
lea ecx, [ecx+edx]
AVG4_MMX_RND1
lea ecx, [ecx+edx]
AVG4_MMX_RND1
lea ecx, [ecx+edx]
AVG4_MMX_RND1
lea ecx, [ecx+edx]
AVG4_MMX_RND1
lea ecx, [ecx+edx]
AVG4_MMX_RND1
lea ecx, [ecx+edx]
AVG4_MMX_RND1
pop esi
pop edi
pop ebx
ret
;-----------------------------------------------------------------------------
;
; void interpolate8x8_6tap_lowpass_h_mmx(uint8_t const *dst,
; const uint8_t * const src,
; const uint32_t stride,
; const uint32_t rounding);
;
;-----------------------------------------------------------------------------
%macro LOWPASS_6TAP_H_MMX 0
movq mm0, [eax]
movq mm2, [eax+1]
movq mm1, mm0
movq mm3, mm2
punpcklbw mm0, mm7
punpcklbw mm2, mm7
punpckhbw mm1, mm7
punpckhbw mm3, mm7
paddw mm0, mm2
paddw mm1, mm3
psllw mm0, 2
psllw mm1, 2
movq mm2, [eax-1]
movq mm4, [eax+2]
movq mm3, mm2
movq mm5, mm4
punpcklbw mm2, mm7
punpcklbw mm4, mm7
punpckhbw mm3, mm7
punpckhbw mm5, mm7
paddw mm2, mm4
paddw mm3, mm5
psubsw mm0, mm2
psubsw mm1, mm3
pmullw mm0, [mmx_five]
pmullw mm1, [mmx_five]
movq mm2, [eax-2]
movq mm4, [eax+3]
movq mm3, mm2
movq mm5, mm4
punpcklbw mm2, mm7
punpcklbw mm4, mm7
punpckhbw mm3, mm7
punpckhbw mm5, mm7
paddw mm2, mm4
paddw mm3, mm5
paddsw mm0, mm2
paddsw mm1, mm3
paddsw mm0, mm6
paddsw mm1, mm6
psraw mm0, 5
psraw mm1, 5
lea eax, [eax+edx]
packuswb mm0, mm1
movq [ecx], mm0
%endmacro
ALIGN 16
interpolate8x8_6tap_lowpass_h_mmx:
mov eax, [esp + 16] ; rounding
movq mm6, [rounding_lowpass_mmx + eax * 8]
mov ecx, [esp + 4] ; dst -> edi
mov eax, [esp + 8] ; src -> esi
mov edx, [esp + 12] ; stride -> edx
pxor mm7, mm7
LOWPASS_6TAP_H_MMX
lea ecx, [ecx+edx]
LOWPASS_6TAP_H_MMX
lea ecx, [ecx+edx]
LOWPASS_6TAP_H_MMX
lea ecx, [ecx+edx]
LOWPASS_6TAP_H_MMX
lea ecx, [ecx+edx]
LOWPASS_6TAP_H_MMX
lea ecx, [ecx+edx]
LOWPASS_6TAP_H_MMX
lea ecx, [ecx+edx]
LOWPASS_6TAP_H_MMX
lea ecx, [ecx+edx]
LOWPASS_6TAP_H_MMX
ret
;-----------------------------------------------------------------------------
;
; void interpolate8x8_6tap_lowpass_v_mmx(uint8_t const *dst,
; const uint8_t * const src,
; const uint32_t stride,
; const uint32_t rounding);
;
;-----------------------------------------------------------------------------
%macro LOWPASS_6TAP_V_MMX 0
movq mm0, [eax]
movq mm2, [eax+edx]
movq mm1, mm0
movq mm3, mm2
punpcklbw mm0, mm7
punpcklbw mm2, mm7
punpckhbw mm1, mm7
punpckhbw mm3, mm7
paddw mm0, mm2
paddw mm1, mm3
psllw mm0, 2
psllw mm1, 2
movq mm4, [eax+2*edx]
sub eax, ebx
movq mm2, [eax+2*edx]
movq mm3, mm2
movq mm5, mm4
punpcklbw mm2, mm7
punpcklbw mm4, mm7
punpckhbw mm3, mm7
punpckhbw mm5, mm7
paddw mm2, mm4
paddw mm3, mm5
psubsw mm0, mm2
psubsw mm1, mm3
pmullw mm0, [mmx_five]
pmullw mm1, [mmx_five]
movq mm2, [eax+edx]
movq mm4, [eax+2*ebx]
movq mm3, mm2
movq mm5, mm4
punpcklbw mm2, mm7
punpcklbw mm4, mm7
punpckhbw mm3, mm7
punpckhbw mm5, mm7
paddw mm2, mm4
paddw mm3, mm5
paddsw mm0, mm2
paddsw mm1, mm3
paddsw mm0, mm6
paddsw mm1, mm6
psraw mm0, 5
psraw mm1, 5
lea eax, [eax+4*edx]
packuswb mm0, mm1
movq [ecx], mm0
%endmacro
ALIGN 16
interpolate8x8_6tap_lowpass_v_mmx:
push ebx
mov eax, [esp + 4 + 16] ; rounding
movq mm6, [rounding_lowpass_mmx + eax * 8]
mov ecx, [esp + 4 + 4] ; dst -> edi
mov eax, [esp + 4 + 8] ; src -> esi
mov edx, [esp + 4 + 12] ; stride -> edx
mov ebx, edx
shl ebx, 1
add ebx, edx
pxor mm7, mm7
LOWPASS_6TAP_V_MMX
lea ecx, [ecx+edx]
LOWPASS_6TAP_V_MMX
lea ecx, [ecx+edx]
LOWPASS_6TAP_V_MMX
lea ecx, [ecx+edx]
LOWPASS_6TAP_V_MMX
lea ecx, [ecx+edx]
LOWPASS_6TAP_V_MMX
lea ecx, [ecx+edx]
LOWPASS_6TAP_V_MMX
lea ecx, [ecx+edx]
LOWPASS_6TAP_V_MMX
lea ecx, [ecx+edx]
LOWPASS_6TAP_V_MMX
pop ebx
ret
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -