📄 interpolate8x8_mmx.asm
字号:
movq mm0, [esi]
pavgb mm0, [esi + edx]
movq [edi], mm0
; dec eax
; jnz .loop
pop edi
pop esi
ret
halfpel_v_xmm_rounding1
movq mm7, [mmx_one]
mov eax, 8
.loop
movq mm0, [esi]
movq mm1, [esi + edx]
movq mm2, mm0 ; backup mm0
pavgb mm0, mm1 ; mm0 = avg([src], [src+stride])
paddb mm1, mm2 ; isolate and subtract lsb's from original values
pand mm1, mm7 ; (compensate for pavgb rounding)
psubusb mm0, mm1
movq [edi], mm0
add edi, edx ; dst += stride
add esi, edx ; src += stride
dec eax
jnz .loop
pop edi
pop esi
ret
;===========================================================================
;
; void interpolate8x8_halfpel_v_3dn(uint8_t * const dst,
; const uint8_t * const src,
; const uint32_t stride,
; const uint32_t rounding);
;
;===========================================================================
align 16
cglobal interpolate8x8_halfpel_v_3dn
interpolate8x8_halfpel_v_3dn
push esi
push edi
mov edi, [esp + 8 + 4] ; dst
mov esi, [esp + 8 + 8] ; src
mov edx, [esp + 8 + 12] ; stride
mov eax, [esp + 8 + 16] ; rounding
or al,al ; hack for pavgusb rounding
jnz halfpel_v_3dn_rounding1
halfpel_v_3dn_rounding0
; mov eax, 8
;.loop
movq mm0, [esi]
pavgusb mm0, [esi + edx] ; mm0 = avg([src], [src+stride])
movq [edi], mm0
add edi, edx ; dst += stride
add esi, edx ; src += stride
movq mm0, [esi]
pavgusb mm0, [esi + edx]
movq [edi], mm0
add edi, edx
add esi, edx
movq mm0, [esi]
pavgusb mm0, [esi + edx]
movq [edi], mm0
add edi, edx
add esi, edx
movq mm0, [esi]
pavgusb mm0, [esi + edx]
movq [edi], mm0
add edi, edx
add esi, edx
movq mm0, [esi]
pavgusb mm0, [esi + edx]
movq [edi], mm0
add edi, edx
add esi, edx
movq mm0, [esi]
pavgusb mm0, [esi + edx]
movq [edi], mm0
add edi, edx
add esi, edx
movq mm0, [esi]
pavgusb mm0, [esi + edx]
movq [edi], mm0
add edi, edx
add esi, edx
movq mm0, [esi]
pavgusb mm0, [esi + edx]
movq [edi], mm0
; dec eax
; jnz .loop
pop edi
pop esi
ret
halfpel_v_3dn_rounding1
movq mm7, [mmx_one]
mov eax, 8
.loop
movq mm0, [esi]
movq mm1, [esi + edx]
movq mm2, mm0 ; backup mm0
pavgusb mm0, mm1 ; mm0 = avg([src], [src+stride])
paddb mm1, mm2 ; isolate and subtract lsb's from original values
pand mm1, mm7 ; (compensate for pavgb rounding)
psubusb mm0, mm1
movq [edi], mm0
add edi, edx ; dst += stride
add esi, edx ; src += stride
dec eax
jnz .loop
pop edi
pop esi
ret
;===========================================================================
;
; void interpolate8x8_halfpel_v_mmx(uint8_t * const dst,
; const uint8_t * const src,
; const uint32_t stride,
; const uint32_t rounding);
;
;===========================================================================
align 16
cglobal interpolate8x8_halfpel_v_mmx
interpolate8x8_halfpel_v_mmx
push esi
push edi
mov eax, [esp + 8 + 16] ; rounding
interpolate8x8_halfpel_v_mmx.start
movq mm7, [rounding1_mmx + eax * 8]
mov edi, [esp + 8 + 4] ; dst
mov esi, [esp + 8 + 8] ; src
mov edx, [esp + 8 + 12] ; stride
mov eax, 8
pxor mm6, mm6 ; zero
.loop
movq mm0, [esi]
movq mm2, [esi + edx]
movq mm1, mm0
movq mm3, mm2
punpcklbw mm0, mm6 ; mm01 = [src]
punpckhbw mm1, mm6 ; mm23 = [src + 1]
CALC_AVG mm0, mm1, mm2, mm3, mm7, mm6
packuswb mm0, mm1
movq [edi], mm0 ; [dst] = mm01
add esi, edx ; src += stride
add edi, edx ; dst += stride
dec eax
jnz .loop
pop edi
pop esi
ret
;===========================================================================
;
; void interpolate8x8_halfpel_hv_xmm(uint8_t * const dst,
; const uint8_t * const src,
; const uint32_t stride,
; const uint32_t rounding);
;
;
;===========================================================================
align 16
cglobal interpolate8x8_halfpel_hv_xmm
interpolate8x8_halfpel_hv_xmm
push esi
push edi
mov eax, [esp + 8 + 16] ; rounding
or al,al
jnz interpolate8x8_halfpel_hv_mmx.start
mov edi, [esp + 8 + 4] ; dst
mov esi, [esp + 8 + 8] ; src
mov edx, [esp + 8 + 12] ; stride
movq mm7, [mmx_one]
mov eax, 8
.loop
; current row
movq mm0, [esi]
movq mm1, [esi + 1]
movq mm2, mm0
pavgb mm0, mm1
pxor mm1, mm2
; next row
movq mm2, [esi + edx]
movq mm3, [esi + edx + 1]
movq mm4, mm2
pavgb mm2, mm3
pxor mm3, mm4
; add current + next row
por mm1, mm3
movq mm3, mm0
pxor mm3, mm2
pand mm1, mm3
pavgb mm0, mm2
pand mm1, mm7
psubusb mm0, mm1
movq [edi], mm0 ; [dst] = mm01
add esi, edx ; src += stride
add edi, edx ; dst += stride
dec eax
jnz .loop
pop edi
pop esi
ret
;===========================================================================
;
; void interpolate8x8_halfpel_hv_mmx(uint8_t * const dst,
; const uint8_t * const src,
; const uint32_t stride,
; const uint32_t rounding);
;
;
;===========================================================================
align 16
cglobal interpolate8x8_halfpel_hv_mmx
interpolate8x8_halfpel_hv_mmx
push esi
push edi
mov eax, [esp + 8 + 16] ; rounding
interpolate8x8_halfpel_hv_mmx.start
movq mm7, [rounding2_mmx + eax * 8]
mov edi, [esp + 8 + 4] ; dst
mov esi, [esp + 8 + 8] ; src
mov eax, 8
pxor mm6, mm6 ; zero
mov edx, [esp + 8 + 12] ; stride
.loop
; current row
movq mm0, [esi]
movq mm2, [esi + 1]
movq mm1, mm0
movq mm3, mm2
punpcklbw mm0, mm6 ; mm01 = [src]
punpcklbw mm2, mm6 ; mm23 = [src + 1]
punpckhbw mm1, mm6
punpckhbw mm3, mm6
paddusw mm0, mm2 ; mm01 += mm23
paddusw mm1, mm3
; next row
movq mm4, [esi + edx]
movq mm2, [esi + edx + 1]
movq mm5, mm4
movq mm3, mm2
punpcklbw mm4, mm6 ; mm45 = [src + stride]
punpcklbw mm2, mm6 ; mm23 = [src + stride + 1]
punpckhbw mm5, mm6
punpckhbw mm3, mm6
paddusw mm4, mm2 ; mm45 += mm23
paddusw mm5, mm3
; add current + next row
paddusw mm0, mm4 ; mm01 += mm45
paddusw mm1, mm5
paddusw mm0, mm7 ; mm01 += rounding2
paddusw mm1, mm7
psrlw mm0, 2 ; mm01 >>= 2
psrlw mm1, 2
packuswb mm0, mm1
movq [edi], mm0 ; [dst] = mm01
add esi, edx ; src += stride
add edi, edx ; dst += stride
dec eax
jnz .loop
pop edi
pop esi
ret
;===========================================================================
;
; void interpolate8x8_halfpel_hv_3dn(uint8_t * const dst,
; const uint8_t * const src,
; const uint32_t stride,
; const uint32_t rounding);
;
;
;===========================================================================
align 16
cglobal interpolate8x8_halfpel_hv_3dn
interpolate8x8_halfpel_hv_3dn
push esi
push edi
mov eax, [esp + 8 + 16] ; rounding
or al,al
jnz near interpolate8x8_halfpel_hv_mmx.start
mov edi, [esp + 8 + 4] ; dst
mov esi, [esp + 8 + 8] ; src
mov edx, [esp + 8 + 12] ; stride
movq mm7, [mmx_one]
mov eax, 8
.loop
; current row
movq mm0, [esi]
movq mm1, [esi + 1]
movq mm2, mm0
pavgusb mm0, mm1
pxor mm1, mm2
; next row
movq mm2, [esi + edx]
movq mm3, [esi + edx + 1]
movq mm4, mm2
pavgusb mm2, mm3
pxor mm3, mm4
; add current + next row
por mm1, mm3
movq mm3, mm0
pxor mm3, mm2
pand mm1, mm3
pavgusb mm0, mm2
pand mm1, mm7
psubusb mm0, mm1
movq [edi], mm0 ; [dst] = mm01
add esi, edx ; src += stride
add edi, edx ; dst += stride
dec eax
jnz .loop
pop edi
pop esi
ret
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -