📄 skl_img_x86.asm
字号:
SSD_4x4_MMX lea eax,[eax+2*edx] lea ecx,[ecx+2*edx] SSD_4x4_MMX movq mm6, mm7 psrlq mm7, 32 paddd mm6, mm7 movd eax, mm6 ret;//////////////////////////////////////////////////////////////////////; Skl_Mean_16x16_MMX;//////////////////////////////////////////////////////////////////////%macro MEAN_16x16_MMX 0 movq mm0, [eax] movq mm1, [eax+8] lea eax,[eax+ecx] movq mm2, mm0 movq mm3, mm1 punpcklbw mm0,mm7 punpcklbw mm1,mm7 punpckhbw mm2,mm7 punpckhbw mm3,mm7 paddw mm5, mm0 paddw mm6, mm1 paddw mm5, mm2 paddw mm6, mm3%endmacroalign 16Skl_Mean_16x16_MMX: mov eax, [esp+ 4] ; Src mov ecx, [esp+ 8] ; BpS pxor mm5, mm5 ; accums pxor mm6, mm6 ; accums pxor mm7, mm7 ; zero MEAN_16x16_MMX MEAN_16x16_MMX MEAN_16x16_MMX MEAN_16x16_MMX MEAN_16x16_MMX MEAN_16x16_MMX MEAN_16x16_MMX MEAN_16x16_MMX MEAN_16x16_MMX MEAN_16x16_MMX MEAN_16x16_MMX MEAN_16x16_MMX MEAN_16x16_MMX MEAN_16x16_MMX MEAN_16x16_MMX MEAN_16x16_MMX paddusw mm6, mm5 COLLAPSE_MMX shr eax, 8 ret;//////////////////////////////////////////////////////////////////////; Skl_Mean_8x8_MMX;//////////////////////////////////////////////////////////////////////%macro MEAN_8x8_MMX 0 movq mm0, [eax] movq mm1, [eax+ecx] lea eax,[eax+2*ecx] movq mm2, mm0 movq mm3, mm1 punpcklbw mm0,mm7 punpcklbw mm1,mm7 punpckhbw mm2,mm7 punpckhbw mm3,mm7 paddw mm5, mm0 paddw mm6, mm1 paddw mm5, mm2 paddw mm6, mm3%endmacroalign 16Skl_Mean_8x8_MMX: mov eax, [esp+ 4] ; Src mov ecx, [esp+ 8] ; BpS pxor mm5, mm5 ; accums pxor mm6, mm6 ; accums pxor mm7, mm7 ; zero MEAN_8x8_MMX MEAN_8x8_MMX MEAN_8x8_MMX MEAN_8x8_MMX paddw mm6, mm5 COLLAPSE_MMX shr eax,6 ret;//////////////////////////////////////////////////////////////////////; Skl_Mean_4x4_MMX;//////////////////////////////////////////////////////////////////////%macro MEAN_4x4_MMX 0 movd mm0, [eax] movd mm1, [eax+ecx] lea eax,[eax+2*ecx] punpcklbw mm0,mm7 punpcklbw mm1,mm7 paddw mm5, mm0 paddw mm6, mm1%endmacroalign 16Skl_Mean_4x4_MMX: mov eax, [esp+ 4] ; Src mov ecx, [esp+ 8] ; BpS pxor mm5, mm5 ; accums pxor mm6, mm6 ; accums pxor mm7, mm7 ; zero MEAN_4x4_MMX MEAN_4x4_MMX paddw mm6, mm5 COLLAPSE_MMX shr eax,4 ret;//////////////////////////////////////////////////////////////////////; Skl_Sqr_16x16_MMX;//////////////////////////////////////////////////////////////////////%macro SQR_16x16_MMX 0 movq mm0, [eax] movq mm1, [eax+8] lea eax,[eax+ecx] movq mm2, mm0 movq mm3, mm1 punpcklbw mm0, mm6 punpcklbw mm1, mm6 punpckhbw mm2, mm6 punpckhbw mm3, mm6 pmaddwd mm0, mm0 pmaddwd mm1, mm1 pmaddwd mm2, mm2 pmaddwd mm3, mm3 paddd mm7, mm0 paddd mm7, mm1 paddd mm7, mm2 paddd mm7, mm3%endmacroalign 16Skl_Sqr_16x16_MMX: mov eax, [esp+ 4] ; Src mov ecx, [esp+ 8] ; BpS pxor mm7, mm7 ; accum pxor mm6, mm6 ; zero SQR_16x16_MMX SQR_16x16_MMX SQR_16x16_MMX SQR_16x16_MMX SQR_16x16_MMX SQR_16x16_MMX SQR_16x16_MMX SQR_16x16_MMX SQR_16x16_MMX SQR_16x16_MMX SQR_16x16_MMX SQR_16x16_MMX SQR_16x16_MMX SQR_16x16_MMX SQR_16x16_MMX SQR_16x16_MMX movq mm6, mm7 psrlq mm7, 32 paddd mm6, mm7 movd eax, mm6 shr eax, 8 ret;//////////////////////////////////////////////////////////////////////; Skl_Sqr_8x8_MMX;//////////////////////////////////////////////////////////////////////%macro SQR_8x8_MMX 0 movq mm0, [eax] movq mm1, [eax+ecx] lea eax,[eax+2*ecx] movq mm2, mm0 movq mm3, mm1 punpcklbw mm0, mm6 punpcklbw mm1, mm6 punpckhbw mm2, mm6 punpckhbw mm3, mm6 pmaddwd mm0, mm0 pmaddwd mm1, mm1 pmaddwd mm2, mm2 pmaddwd mm3, mm3 paddd mm7, mm0 paddd mm7, mm1 paddd mm7, mm2 paddd mm7, mm3%endmacroalign 16Skl_Sqr_8x8_MMX: mov eax, [esp+ 4] ; Src mov ecx, [esp+ 8] ; BpS pxor mm7, mm7 ; accum pxor mm6, mm6 ; zero SQR_8x8_MMX SQR_8x8_MMX SQR_8x8_MMX SQR_8x8_MMX movq mm6, mm7 psrlq mm7, 32 paddd mm6, mm7 movd eax, mm6 shr eax,6 ret;//////////////////////////////////////////////////////////////////////; Skl_Sqr_4x4_MMX;//////////////////////////////////////////////////////////////////////%macro SQR_4x4_MMX 0 movq mm0, [eax] movq mm1, [eax+ecx] punpcklbw mm0, mm6 punpcklbw mm1, mm6 pmaddwd mm0, mm0 pmaddwd mm1, mm1 paddd mm7, mm0 paddd mm7, mm1%endmacroalign 16Skl_Sqr_4x4_MMX: mov eax, [esp+ 4] ; Src mov ecx, [esp+ 8] ; BpS pxor mm7, mm7 ; accum pxor mm6, mm6 ; zero SQR_4x4_MMX lea eax,[eax+2*ecx] SQR_4x4_MMX movq mm6, mm7 psrlq mm7, 32 paddd mm6, mm7 movd eax, mm6 shr eax,4 ret;//////////////////////////////////////////////////////////////////////;//;// SSE impl;//;//////////////////////////////////////////////////////////////////////;//////////////////////////////////////////////////////////////////////; Skl_SAD_16x16_SSE;//////////////////////////////////////////////////////////////////////%macro SAD_16x16_SSE 0 movq mm0, [eax] psadbw mm0, [edx] movq mm1, [eax+8] lea eax, [eax+ecx] psadbw mm1, [edx+8] add edx, ecx paddusw mm6,mm0 paddusw mm6,mm1%endmacroalign 16Skl_SAD_16x16_SSE: ; 104c mov eax, [esp+ 4] ; Src1 mov edx, [esp+ 8] ; Src2 mov ecx, [esp+12] ; BpS pxor mm7, mm7 ; this is a NOP pxor mm6, mm6 ; accum2 SAD_16x16_SSE SAD_16x16_SSE SAD_16x16_SSE SAD_16x16_SSE SAD_16x16_SSE SAD_16x16_SSE SAD_16x16_SSE SAD_16x16_SSE SAD_16x16_SSE SAD_16x16_SSE SAD_16x16_SSE SAD_16x16_SSE SAD_16x16_SSE SAD_16x16_SSE SAD_16x16_SSE SAD_16x16_SSE movd eax, mm6 retalign 16Skl_SAD_16x8_SSE: mov eax, [esp+ 4] ; Src1 mov edx, [esp+ 8] ; Src2 mov ecx, [esp+12] ; BpS pxor mm7, mm7 ; this is a NOP pxor mm6, mm6 ; accum2 SAD_16x16_SSE SAD_16x16_SSE SAD_16x16_SSE SAD_16x16_SSE SAD_16x16_SSE SAD_16x16_SSE SAD_16x16_SSE SAD_16x16_SSE movd eax, mm6 retalign 16Skl_SAD_16x8_Field_SSE: ; 104c mov eax, [esp+ 4] ; Src1 mov edx, [esp+ 8] ; Src2 mov ecx, [esp+12] ; BpS lea ecx, [ecx+ecx] ; 2.BpS pxor mm7, mm7 ; this is a NOP pxor mm6, mm6 ; accum2 SAD_16x16_SSE SAD_16x16_SSE SAD_16x16_SSE SAD_16x16_SSE SAD_16x16_SSE SAD_16x16_SSE SAD_16x16_SSE SAD_16x16_SSE movd eax, mm6 ret;//////////////////////////////////////////////////////////////////////; Skl_SAD_Avrg_xxx_SSE;//////////////////////////////////////////////////////////////////////%macro SAD_16x16_AVRG_SSE 0 movq mm0, [edx] movq mm1, [edx+8] pavgb mm0, [ebx] pavgb mm1, [ebx+8] psadbw mm0, [eax] lea edx, [edx+ecx] psadbw mm1, [eax+8] add eax, ecx paddusw mm6,mm0 lea ebx, [ebx+ecx] paddusw mm6,mm1%endmacro%macro SAD_8x8_AVRG_SSE 0 movq mm0, [edx] movq mm1, [edx+ecx] pavgb mm0, [ebx] pavgb mm1, [ebx+ecx] psadbw mm0, [eax] lea edx, [edx+2*ecx] psadbw mm1, [eax+ecx] lea eax, [eax+2*ecx] paddusw mm6,mm0 lea ebx, [ebx+2*ecx] paddusw mm6,mm1%endmacroalign 16Skl_SAD_Avrg_16x16_SSE: mov eax, [esp+ 4] ; Dst mov edx, [esp+ 8] ; Src1 mov ecx, [esp+16] ; BpS push ebx mov ebx, [esp+12+4] ; Src2 pxor mm7, mm7 ; this is a NOP pxor mm6, mm6 ; accum2 SAD_16x16_AVRG_SSE SAD_16x16_AVRG_SSE SAD_16x16_AVRG_SSE SAD_16x16_AVRG_SSE SAD_16x16_AVRG_SSE SAD_16x16_AVRG_SSE SAD_16x16_AVRG_SSE SAD_16x16_AVRG_SSE SAD_16x16_AVRG_SSE SAD_16x16_AVRG_SSE SAD_16x16_AVRG_SSE SAD_16x16_AVRG_SSE SAD_16x16_AVRG_SSE SAD_16x16_AVRG_SSE SAD_16x16_AVRG_SSE SAD_16x16_AVRG_SSE pop ebx movd eax, mm6 retalign 16Skl_SAD_Avrg_16x8_SSE: mov eax, [esp+ 4] ; Dst mov edx, [esp+ 8] ; Src1 mov ecx, [esp+16] ; BpS push ebx mov ebx, [esp+12+4] ; Src2 pxor mm7, mm7 ; this is a NOP pxor mm6, mm6 ; accum2 SAD_16x16_AVRG_SSE SAD_16x16_AVRG_SSE SAD_16x16_AVRG_SSE SAD_16x16_AVRG_SSE SAD_16x16_AVRG_SSE SAD_16x16_AVRG_SSE SAD_16x16_AVRG_SSE SAD_16x16_AVRG_SSE pop ebx movd eax, mm6 retalign 16Skl_SAD_Avrg_8x16_SSE: mov eax, [esp+ 4] ; Dst mov edx, [esp+ 8] ; Src1 mov ecx, [esp+16] ; BpS push ebx mov ebx, [esp+12+4] ; Src2 pxor mm7, mm7 ; this is a NOP pxor mm6, mm6 ; accum2 SAD_8x8_AVRG_SSE SAD_8x8_AVRG_SSE SAD_8x8_AVRG_SSE SAD_8x8_AVRG_SSE SAD_8x8_AVRG_SSE SAD_8x8_AVRG_SSE SAD_8x8_AVRG_SSE SAD_8x8_AVRG_SSE pop ebx movd eax, mm6 retalign 16Skl_SAD_Avrg_8x8_SSE: mov eax, [esp+ 4] ; Dst mov edx, [esp+ 8] ; Src1 mov ecx, [esp+16] ; BpS push ebx mov ebx, [esp+12+4] ; Src2 pxor mm7, mm7 ; this is a NOP pxor mm6, mm6 ; accum2 SAD_8x8_AVRG_SSE SAD_8x8_AVRG_SSE SAD_8x8_AVRG_SSE SAD_8x8_AVRG_SSE pop ebx movd eax, mm6 ret;//////////////////////////////////////////////////////////////////////; Skl_SAD_16x7_Self_SSE;//////////////////////////////////////////////////////////////////////%macro SAD_16x7_SSE 0 movq mm0, [eax] psadbw mm0, [edx] movq mm1, [eax+8] lea eax, [eax+ecx] psadbw mm1, [edx+8] add edx, ecx paddusw mm6,mm0 paddusw mm6,mm1%endmacroalign 16Skl_SAD_16x7_Self_SSE: ; 104c mov eax, [esp+ 4] ; Src mov ecx, [esp+ 8] ; BpS lea edx, [eax+ecx] pxor mm7, mm7 ; this is a NOP pxor mm6, mm6 ; accum2 SAD_16x7_SSE SAD_16x7_SSE SAD_16x7_SSE SAD_16x7_SSE SAD_16x7_SSE SAD_16x7_SSE SAD_16x7_SSE movd eax, mm6 ret;//////////////////////////////////////////////////////////////////////; Skl_SAD_8x8_SSE;//////////////////////////////////////////////////////////////////////%macro SAD_8x8_SSE 0 movq mm0, [eax] psadbw mm0, [edx] movq mm1, [eax+ecx] add eax, ebx psadbw mm1, [edx+ecx] lea edx, [edx+ebx] paddusw mm6,mm0 paddusw mm6,mm1%endmacroalign 16Skl_SAD_8x4_SSE: mov eax, [esp+ 4] ; Src1 mov edx, [esp+ 8] ; Src2 mov ecx, [esp+12] ; BpS push ebx lea ebx, [ecx+ecx] pxor mm5, mm5 ; this is a NOP pxor mm6, mm6 ; accum2 SAD_8x8_SSE movq mm0, [eax] psadbw mm0, [edx] movq mm1, [eax+ecx] psadbw mm1, [edx+ecx] pop ebx paddusw mm6,mm0 paddusw mm6,mm1 movd eax, mm6 retalign 16Skl_SAD_8x8_SSE: ; 29c mov eax, [esp+ 4] ; Src1 mov edx, [esp+ 8] ; Src2 mov ecx, [esp+12] ; BpS push ebx lea ebx, [ecx+ecx] pxor mm5, mm5 ; this is a NOP pxor mm6, mm6 ; accum2 SAD_8x8_SSE SAD_8x8_SSE SAD_8x8_SSE movq mm0, [eax] psadbw mm0, [edx] movq mm1, [eax+ecx] psadbw mm1, [edx+ecx]
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -