📄 interpolate8x8_mmx.asm
字号:
movq mm0, [esi] pavgb mm0, [esi + edx] movq [edi], mm0; dec eax; jnz .loop pop edi pop esi rethalfpel_v_xmm_rounding1 movq mm7, [mmx_one] mov eax, 8.loop movq mm0, [esi] movq mm1, [esi + edx] movq mm2, mm0 ; backup mm0 pavgb mm0, mm1 ; mm0 = avg([src], [src+stride]) paddb mm1, mm2 ; isolate and subtract lsb's from original values pand mm1, mm7 ; (compensate for pavgb rounding) psubusb mm0, mm1 movq [edi], mm0 add edi, edx ; dst += stride add esi, edx ; src += stride dec eax jnz .loop pop edi pop esi ret;===========================================================================;; void interpolate8x8_halfpel_v_3dn(uint8_t * const dst,; const uint8_t * const src,; const uint32_t stride,; const uint32_t rounding);;;===========================================================================align 16cglobal interpolate8x8_halfpel_v_3dninterpolate8x8_halfpel_v_3dn push esi push edi mov edi, [esp + 8 + 4] ; dst mov esi, [esp + 8 + 8] ; src mov edx, [esp + 8 + 12] ; stride mov eax, [esp + 8 + 16] ; rounding or al,al ; hack for pavgusb rounding jnz halfpel_v_3dn_rounding1halfpel_v_3dn_rounding0; mov eax, 8;.loop movq mm0, [esi] pavgusb mm0, [esi + edx] ; mm0 = avg([src], [src+stride]) movq [edi], mm0 add edi, edx ; dst += stride add esi, edx ; src += stride movq mm0, [esi] pavgusb mm0, [esi + edx] movq [edi], mm0 add edi, edx add esi, edx movq mm0, [esi] pavgusb mm0, [esi + edx] movq [edi], mm0 add edi, edx add esi, edx movq mm0, [esi] pavgusb mm0, [esi + edx] movq [edi], mm0 add edi, edx add esi, edx movq mm0, [esi] pavgusb mm0, [esi + edx] movq [edi], mm0 add edi, edx add esi, edx movq mm0, [esi] pavgusb mm0, [esi + edx] movq [edi], mm0 add edi, edx add esi, edx movq mm0, [esi] pavgusb mm0, [esi + edx] movq [edi], mm0 add edi, edx add esi, edx movq mm0, [esi] pavgusb mm0, [esi + edx] movq [edi], mm0; dec eax; jnz .loop pop edi pop esi rethalfpel_v_3dn_rounding1 movq mm7, [mmx_one] mov eax, 8.loop movq mm0, [esi] movq mm1, [esi + edx] movq mm2, mm0 ; backup mm0 pavgusb mm0, mm1 ; mm0 = avg([src], [src+stride]) paddb mm1, mm2 ; isolate and subtract lsb's from original values pand mm1, mm7 ; (compensate for pavgb rounding) psubusb mm0, mm1 movq [edi], mm0 add edi, edx ; dst += stride add esi, edx ; src += stride dec eax jnz .loop pop edi pop esi ret;===========================================================================;; void interpolate8x8_halfpel_v_mmx(uint8_t * const dst,; const uint8_t * const src,; const uint32_t stride,; const uint32_t rounding);;;===========================================================================align 16cglobal interpolate8x8_halfpel_v_mmxinterpolate8x8_halfpel_v_mmx push esi push edi mov eax, [esp + 8 + 16] ; roundinginterpolate8x8_halfpel_v_mmx.start movq mm7, [rounding1_mmx + eax * 8] mov edi, [esp + 8 + 4] ; dst mov esi, [esp + 8 + 8] ; src mov edx, [esp + 8 + 12] ; stride mov eax, 8 pxor mm6, mm6 ; zero .loop movq mm0, [esi] movq mm2, [esi + edx] movq mm1, mm0 movq mm3, mm2 punpcklbw mm0, mm6 ; mm01 = [src] punpckhbw mm1, mm6 ; mm23 = [src + 1] CALC_AVG mm0, mm1, mm2, mm3, mm7, mm6 packuswb mm0, mm1 movq [edi], mm0 ; [dst] = mm01 add esi, edx ; src += stride add edi, edx ; dst += stride dec eax jnz .loop pop edi pop esi ret;===========================================================================;; void interpolate8x8_halfpel_hv_xmm(uint8_t * const dst,; const uint8_t * const src,; const uint32_t stride, ; const uint32_t rounding);;;;===========================================================================align 16cglobal interpolate8x8_halfpel_hv_xmminterpolate8x8_halfpel_hv_xmm push esi push edi mov eax, [esp + 8 + 16] ; rounding or al,al jnz interpolate8x8_halfpel_hv_mmx.start mov edi, [esp + 8 + 4] ; dst mov esi, [esp + 8 + 8] ; src mov edx, [esp + 8 + 12] ; stride movq mm7, [mmx_one] mov eax, 8.loop ; current row movq mm0, [esi] movq mm1, [esi + 1] movq mm2, mm0 pavgb mm0, mm1 pxor mm1, mm2 ; next row movq mm2, [esi + edx] movq mm3, [esi + edx + 1] movq mm4, mm2 pavgb mm2, mm3 pxor mm3, mm4 ; add current + next row por mm1, mm3 movq mm3, mm0 pxor mm3, mm2 pand mm1, mm3 pavgb mm0, mm2 pand mm1, mm7 psubusb mm0, mm1 movq [edi], mm0 ; [dst] = mm01 add esi, edx ; src += stride add edi, edx ; dst += stride dec eax jnz .loop pop edi pop esi ret;===========================================================================;; void interpolate8x8_halfpel_hv_mmx(uint8_t * const dst,; const uint8_t * const src,; const uint32_t stride, ; const uint32_t rounding);;;;===========================================================================align 16cglobal interpolate8x8_halfpel_hv_mmxinterpolate8x8_halfpel_hv_mmx push esi push edi mov eax, [esp + 8 + 16] ; roundinginterpolate8x8_halfpel_hv_mmx.start movq mm7, [rounding2_mmx + eax * 8] mov edi, [esp + 8 + 4] ; dst mov esi, [esp + 8 + 8] ; src mov eax, 8 pxor mm6, mm6 ; zero mov edx, [esp + 8 + 12] ; stride .loop ; current row movq mm0, [esi] movq mm2, [esi + 1] movq mm1, mm0 movq mm3, mm2 punpcklbw mm0, mm6 ; mm01 = [src] punpcklbw mm2, mm6 ; mm23 = [src + 1] punpckhbw mm1, mm6 punpckhbw mm3, mm6 paddusw mm0, mm2 ; mm01 += mm23 paddusw mm1, mm3 ; next row movq mm4, [esi + edx] movq mm2, [esi + edx + 1] movq mm5, mm4 movq mm3, mm2 punpcklbw mm4, mm6 ; mm45 = [src + stride] punpcklbw mm2, mm6 ; mm23 = [src + stride + 1] punpckhbw mm5, mm6 punpckhbw mm3, mm6 paddusw mm4, mm2 ; mm45 += mm23 paddusw mm5, mm3 ; add current + next row paddusw mm0, mm4 ; mm01 += mm45 paddusw mm1, mm5 paddusw mm0, mm7 ; mm01 += rounding2 paddusw mm1, mm7 psrlw mm0, 2 ; mm01 >>= 2 psrlw mm1, 2 packuswb mm0, mm1 movq [edi], mm0 ; [dst] = mm01 add esi, edx ; src += stride add edi, edx ; dst += stride dec eax jnz .loop pop edi pop esi ret;===========================================================================;; void interpolate8x8_halfpel_hv_3dn(uint8_t * const dst,; const uint8_t * const src,; const uint32_t stride, ; const uint32_t rounding);;;;===========================================================================align 16cglobal interpolate8x8_halfpel_hv_3dninterpolate8x8_halfpel_hv_3dn push esi push edi mov eax, [esp + 8 + 16] ; rounding or al,al jnz near interpolate8x8_halfpel_hv_mmx.start mov edi, [esp + 8 + 4] ; dst mov esi, [esp + 8 + 8] ; src mov edx, [esp + 8 + 12] ; stride movq mm7, [mmx_one] mov eax, 8.loop ; current row movq mm0, [esi] movq mm1, [esi + 1] movq mm2, mm0 pavgusb mm0, mm1 pxor mm1, mm2 ; next row movq mm2, [esi + edx] movq mm3, [esi + edx + 1] movq mm4, mm2 pavgusb mm2, mm3 pxor mm3, mm4 ; add current + next row por mm1, mm3 movq mm3, mm0 pxor mm3, mm2 pand mm1, mm3 pavgusb mm0, mm2 pand mm1, mm7 psubusb mm0, mm1 movq [edi], mm0 ; [dst] = mm01 add esi, edx ; src += stride add edi, edx ; dst += stride dec eax jnz .loop pop edi pop esi ret
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -