📄 pixel-a.asm.svn-base
字号:
SAD_INC_2x4P SAD_INC_2x4P SAD_INC_2x4P SAD_INC_2x4P SAD_ENDALIGN 16;-----------------------------------------------------------------------------; int __cdecl x264_pixel_sad_4x4_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_sad_4x4_mmxext: SAD_START SAD_INC_2x4P SAD_INC_2x4P SAD_END%macro SSD_START 0 push ebx mov eax, [esp+ 8] ; pix1 mov ebx, [esp+12] ; stride1 mov ecx, [esp+16] ; pix2 mov edx, [esp+20] ; stride2 pxor mm7, mm7 ; zero pxor mm0, mm0 ; mm0 holds the sum%endmacro%macro SSD_END 0 movq mm1, mm0 psrlq mm1, 32 paddd mm0, mm1 movd eax, mm0 pop ebx ret%endmacroALIGN 16;-----------------------------------------------------------------------------; int __cdecl x264_pixel_ssd_16x16_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_ssd_16x16_mmxext: SSD_START SSD_INC_8x16P SSD_INC_8x16P SSD_ENDALIGN 16x264_pixel_ssd_16x8_mmxext: SSD_START SSD_INC_8x16P SSD_ENDALIGN 16x264_pixel_ssd_8x16_mmxext: SSD_START SSD_INC_4x8P SSD_INC_4x8P SSD_INC_4x8P SSD_INC_4x8P SSD_ENDALIGN 16x264_pixel_ssd_8x8_mmxext: SSD_START SSD_INC_4x8P SSD_INC_4x8P SSD_ENDALIGN 16x264_pixel_ssd_8x4_mmxext: SSD_START SSD_INC_4x8P SSD_ENDALIGN 16x264_pixel_ssd_4x8_mmxext: SSD_START SSD_INC_4x4P SSD_INC_4x4P SSD_ENDALIGN 16x264_pixel_ssd_4x4_mmxext: SSD_START SSD_INC_4x4P SSD_ENDALIGN 16;-----------------------------------------------------------------------------; int __cdecl x264_pixel_satd_4x4_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_4x4_mmxext: push ebx mov eax, [esp+ 8] ; pix1 mov ebx, [esp+12] ; stride1 mov ecx, [esp+16] ; pix2 mov edx, [esp+20] ; stride2 pxor mm7, mm7 LOAD_DIFF_4P mm0, mm6, mm7, [eax], [ecx] LOAD_DIFF_4P mm1, mm6, mm7, [eax+ebx], [ecx+edx] LOAD_DIFF_4P mm2, mm6, mm7, [eax+2*ebx], [ecx+2*edx] add eax, ebx add ecx, edx LOAD_DIFF_4P mm3, mm6, mm7, [eax+2*ebx], [ecx+2*edx] HADAMARD4x4_FIRST MMX_SUM_MM mm0, mm7 pop ebx retALIGN 16;-----------------------------------------------------------------------------; int __cdecl x264_pixel_satd_4x8_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_4x8_mmxext: push ebx mov eax, [esp+ 8] ; pix1 mov ebx, [esp+12] ; stride1 mov ecx, [esp+16] ; pix2 mov edx, [esp+20] ; stride2 pxor mm7, mm7 LOAD_DIFF_INC_4x4 mm0, mm1, mm2, mm3, mm6, mm7, eax, ebx, ecx, edx, 0 HADAMARD4x4_FIRST LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 0 HADAMARD4x4_NEXT MMX_SUM_MM mm0, mm7 pop ebx retALIGN 16;-----------------------------------------------------------------------------; int __cdecl x264_pixel_satd_8x4_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_8x4_mmxext: push ebx mov eax, [esp+ 8] ; pix1 mov ebx, [esp+12] ; stride1 mov ecx, [esp+16] ; pix2 mov edx, [esp+20] ; stride2 pxor mm7, mm7 LOAD_DIFF_INC_4x4 mm0, mm1, mm2, mm3, mm6, mm7, eax, ebx, ecx, edx, 0 HADAMARD4x4_FIRST mov eax, [esp+ 8] ; pix1 mov ecx, [esp+16] ; pix2 LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 4 HADAMARD4x4_NEXT MMX_SUM_MM mm0, mm7 pop ebx retALIGN 16;-----------------------------------------------------------------------------; int __cdecl x264_pixel_satd_8x8_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_8x8_mmxext: push ebx mov eax, [esp+ 8] ; pix1 mov ebx, [esp+12] ; stride1 mov ecx, [esp+16] ; pix2 mov edx, [esp+20] ; stride2 pxor mm7, mm7 LOAD_DIFF_INC_4x4 mm0, mm1, mm2, mm3, mm6, mm7, eax, ebx, ecx, edx, 0 HADAMARD4x4_FIRST LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 0 HADAMARD4x4_NEXT mov eax, [esp+ 8] ; pix1 mov ecx, [esp+16] ; pix2 LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 4 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 4 HADAMARD4x4_NEXT MMX_SUM_MM mm0, mm7 pop ebx retALIGN 16;-----------------------------------------------------------------------------; int __cdecl x264_pixel_satd_16x8_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_16x8_mmxext: push ebx push ebp mov eax, [esp+12] ; pix1 mov ebx, [esp+16] ; stride1 mov ecx, [esp+20] ; pix2 mov edx, [esp+24] ; stride2 pxor mm7, mm7 xor ebp, ebp LOAD_DIFF_INC_4x4 mm0, mm1, mm2, mm3, mm6, mm7, eax, ebx, ecx, edx, 0 HADAMARD4x4_FIRST LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 0 HADAMARD4x4_NEXT mov eax, [esp+12] ; pix1 mov ecx, [esp+20] ; pix2 LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 4 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 4 HADAMARD4x4_NEXT MMX_SUM_MM mm0, mm7 mov ebp, eax mov eax, [esp+12] ; pix1 mov ecx, [esp+20] ; pix2 LOAD_DIFF_INC_4x4 mm0, mm1, mm2, mm3, mm6, mm7, eax, ebx, ecx, edx, 8 HADAMARD4x4_FIRST LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 8 HADAMARD4x4_NEXT mov eax, [esp+12] ; pix1 mov ecx, [esp+20] ; pix2 LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 12 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 12 HADAMARD4x4_NEXT MMX_SUM_MM mm0, mm7 add eax, ebp pop ebp pop ebx retALIGN 16;-----------------------------------------------------------------------------; int __cdecl x264_pixel_satd_8x16_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_8x16_mmxext: push ebx push ebp mov eax, [esp+12] ; pix1 mov ebx, [esp+16] ; stride1 mov ecx, [esp+20] ; pix2 mov edx, [esp+24] ; stride2 pxor mm7, mm7 xor ebp, ebp LOAD_DIFF_INC_4x4 mm0, mm1, mm2, mm3, mm6, mm7, eax, ebx, ecx, edx, 0 HADAMARD4x4_FIRST LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 0 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 0 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 0 HADAMARD4x4_NEXT MMX_SUM_MM mm0, mm7 mov ebp, eax mov eax, [esp+12] ; pix1 mov ecx, [esp+20] ; pix2 LOAD_DIFF_INC_4x4 mm0, mm1, mm2, mm3, mm6, mm7, eax, ebx, ecx, edx, 4 HADAMARD4x4_FIRST LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 4 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 4 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 4 HADAMARD4x4_NEXT MMX_SUM_MM mm0, mm7 add eax, ebp pop ebp pop ebx retALIGN 16;-----------------------------------------------------------------------------; int __cdecl x264_pixel_satd_16x16_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_16x16_mmxext: push ebx push ebp mov eax, [esp+12] ; pix1 mov ebx, [esp+16] ; stride1 mov ecx, [esp+20] ; pix2 mov edx, [esp+24] ; stride2 pxor mm7, mm7 xor ebp, ebp LOAD_DIFF_INC_4x4 mm0, mm1, mm2, mm3, mm6, mm7, eax, ebx, ecx, edx, 0 HADAMARD4x4_FIRST LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 0 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 0 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 0 HADAMARD4x4_NEXT MMX_SUM_MM mm0, mm7 mov ebp, eax mov eax, [esp+12] ; pix1 mov ecx, [esp+20] ; pix2 LOAD_DIFF_INC_4x4 mm0, mm1, mm2, mm3, mm6, mm7, eax, ebx, ecx, edx, 4 HADAMARD4x4_FIRST LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 4 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 4 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 4 HADAMARD4x4_NEXT MMX_SUM_MM mm0, mm7 add ebp, eax mov eax, [esp+12] ; pix1 mov ecx, [esp+20] ; pix2 LOAD_DIFF_INC_4x4 mm0, mm1, mm2, mm3, mm6, mm7, eax, ebx, ecx, edx, 8 HADAMARD4x4_FIRST LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 8 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 8 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 8 HADAMARD4x4_NEXT MMX_SUM_MM mm0, mm7 add ebp, eax mov eax, [esp+12] ; pix1 mov ecx, [esp+20] ; pix2 LOAD_DIFF_INC_4x4 mm0, mm1, mm2, mm3, mm6, mm7, eax, ebx, ecx, edx, 12 HADAMARD4x4_FIRST LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 12 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 12 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, eax, ebx, ecx, edx, 12 HADAMARD4x4_NEXT MMX_SUM_MM mm0, mm7 add eax, ebp pop ebp pop ebx ret
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -