📄 pixel-a.asm.svn-base
字号:
%macro SSD_START 0 push rbx mov r9, rdx ; pix2 mov r8d, ecx ; stride2 mov rax, rdi ; pix1 movsxd rbx, esi ; stride1 mov rcx, r9 ; pix2 movsxd rdx, r8d ; stride2 pxor mm7, mm7 ; zero pxor mm0, mm0 ; mm0 holds the sum%endmacro%macro SSD_END 0 movq mm1, mm0 psrlq mm1, 32 paddd mm0, mm1 xor rax, rax movd eax, mm0 pop rbx ret%endmacroALIGN 16;-----------------------------------------------------------------------------; int __cdecl x264_pixel_ssd_16x16_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_ssd_16x16_mmxext: SSD_START SSD_INC_8x16P SSD_INC_8x16P SSD_ENDALIGN 16x264_pixel_ssd_16x8_mmxext: SSD_START SSD_INC_8x16P SSD_ENDALIGN 16x264_pixel_ssd_8x16_mmxext: SSD_START SSD_INC_4x8P SSD_INC_4x8P SSD_INC_4x8P SSD_INC_4x8P SSD_ENDALIGN 16x264_pixel_ssd_8x8_mmxext: SSD_START SSD_INC_4x8P SSD_INC_4x8P SSD_ENDALIGN 16x264_pixel_ssd_8x4_mmxext: SSD_START SSD_INC_4x8P SSD_ENDALIGN 16x264_pixel_ssd_4x8_mmxext: SSD_START SSD_INC_4x4P SSD_INC_4x4P SSD_ENDALIGN 16x264_pixel_ssd_4x4_mmxext: SSD_START SSD_INC_4x4P SSD_ENDALIGN 16;-----------------------------------------------------------------------------; int __cdecl x264_pixel_satd_4x4_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_4x4_mmxext: push rbx mov r9, rdx ; pix2 mov r8d, ecx ; stride2 mov rax, rdi ; pix1 movsxd rbx, esi ; stride1 mov rcx, r9 ; pix2 movsxd rdx, r8d ; stride2 pxor mm7, mm7 LOAD_DIFF_4P mm0, mm6, mm7, [rax], [rcx] LOAD_DIFF_4P mm1, mm6, mm7, [rax+rbx], [rcx+rdx] LOAD_DIFF_4P mm2, mm6, mm7, [rax+2*rbx], [rcx+2*rdx] add rax, rbx add rcx, rdx LOAD_DIFF_4P mm3, mm6, mm7, [rax+2*rbx], [rcx+2*rdx] HADAMARD4x4_FIRST MMX_SUM_MM mm0, mm7 pop rbx retALIGN 16;-----------------------------------------------------------------------------; int __cdecl x264_pixel_satd_4x8_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_4x8_mmxext: push rbx mov r9, rdx ; pix2 mov r8d, ecx ; stride2 mov rax, rdi ; pix1 movsxd rbx, esi ; stride1 mov rcx, r9 ; pix2 movsxd rdx, r8d ; stride2 pxor mm7, mm7 LOAD_DIFF_INC_4x4 mm0, mm1, mm2, mm3, mm6, mm7, rax, rbx, rcx, rdx, 0 HADAMARD4x4_FIRST LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 0 HADAMARD4x4_NEXT MMX_SUM_MM mm0, mm7 pop rbx retALIGN 16;-----------------------------------------------------------------------------; int __cdecl x264_pixel_satd_8x4_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_8x4_mmxext: push rbx mov r9, rdx ; pix2 mov r8d, ecx ; stride2 mov rax, rdi ; pix1 movsxd rbx, esi ; stride1 mov rcx, r9 ; pix2 movsxd rdx, r8d ; stride2 mov r10, rax ; save pix1 mov r11, rcx ; save pix2 pxor mm7, mm7 LOAD_DIFF_INC_4x4 mm0, mm1, mm2, mm3, mm6, mm7, rax, rbx, rcx, rdx, 0 HADAMARD4x4_FIRST mov rax, r10 ; load pix1 mov rcx, r11 ; load pix2 LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 4 HADAMARD4x4_NEXT MMX_SUM_MM mm0, mm7 pop rbx retALIGN 16;-----------------------------------------------------------------------------; int __cdecl x264_pixel_satd_8x8_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_8x8_mmxext: push rbx mov r9, rdx ; pix2 mov r8d, ecx ; stride2 mov rax, rdi ; pix1 movsxd rbx, esi ; stride1 mov rcx, r9 ; pix2 movsxd rdx, r8d ; stride2 mov r10, rax ; save pix1 mov r11, rcx ; save pix2 pxor mm7, mm7 LOAD_DIFF_INC_4x4 mm0, mm1, mm2, mm3, mm6, mm7, rax, rbx, rcx, rdx, 0 HADAMARD4x4_FIRST LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 0 HADAMARD4x4_NEXT mov rax, r10 ; load pix1 mov rcx, r11 ; load pix2 LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 4 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 4 HADAMARD4x4_NEXT MMX_SUM_MM mm0, mm7 pop rbx retALIGN 16;-----------------------------------------------------------------------------; int __cdecl x264_pixel_satd_16x8_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_16x8_mmxext: push rbx push rbp mov r9, rdx ; pix2 mov r8d, ecx ; stride2 mov rax, rdi ; pix1 movsxd rbx, esi ; stride1 mov rcx, r9 ; pix2 movsxd rdx, r8d ; stride2 mov r10, rax ; save pix1 mov r11, rcx ; save pix2 pxor mm7, mm7 xor rbp, rbp LOAD_DIFF_INC_4x4 mm0, mm1, mm2, mm3, mm6, mm7, rax, rbx, rcx, rdx, 0 HADAMARD4x4_FIRST LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 0 HADAMARD4x4_NEXT mov rax, r10 ; load pix1 mov rcx, r11 ; load pix2 LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 4 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 4 HADAMARD4x4_NEXT MMX_SUM_MM mm0, mm7 mov rbp, rax mov rax, r10 ; load pix1 mov rcx, r11 ; load pix2 LOAD_DIFF_INC_4x4 mm0, mm1, mm2, mm3, mm6, mm7, rax, rbx, rcx, rdx, 8 HADAMARD4x4_FIRST LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 8 HADAMARD4x4_NEXT mov rax, r10 ; load pix1 mov rcx, r11 ; load pix2 LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 12 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 12 HADAMARD4x4_NEXT MMX_SUM_MM mm0, mm7 add rax, rbp pop rbp pop rbx retALIGN 16;-----------------------------------------------------------------------------; int __cdecl x264_pixel_satd_8x16_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_8x16_mmxext: push rbx push rbp mov r9, rdx ; pix2 mov r8d, ecx ; stride2 mov rax, rdi ; pix1 movsxd rbx, esi ; stride1 mov rcx, r9 ; pix2 movsxd rdx, r8d ; stride2 mov r10, rax ; save pix1 mov r11, rcx ; save pix2 pxor mm7, mm7 xor rbp, rbp LOAD_DIFF_INC_4x4 mm0, mm1, mm2, mm3, mm6, mm7, rax, rbx, rcx, rdx, 0 HADAMARD4x4_FIRST LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 0 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 0 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 0 HADAMARD4x4_NEXT MMX_SUM_MM mm0, mm7 mov rbp, rax mov rax, r10 ; load pix1 mov rcx, r11 ; load pix2 LOAD_DIFF_INC_4x4 mm0, mm1, mm2, mm3, mm6, mm7, rax, rbx, rcx, rdx, 4 HADAMARD4x4_FIRST LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 4 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 4 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 4 HADAMARD4x4_NEXT MMX_SUM_MM mm0, mm7 add rax, rbp pop rbp pop rbx retALIGN 16;-----------------------------------------------------------------------------; int __cdecl x264_pixel_satd_16x16_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_16x16_mmxext: push rbx push rbp mov r9, rdx ; pix2 mov r8d, ecx ; stride2 mov rax, rdi ; pix1 movsxd rbx, esi ; stride1 mov rcx, r9 ; pix2 movsxd rdx, r8d ; stride2 mov r10, rax ; save pix1 mov r11, rcx ; save pix2 pxor mm7, mm7 xor rbp, rbp LOAD_DIFF_INC_4x4 mm0, mm1, mm2, mm3, mm6, mm7, rax, rbx, rcx, rdx, 0 HADAMARD4x4_FIRST LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 0 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 0 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 0 HADAMARD4x4_NEXT MMX_SUM_MM mm0, mm7 mov rbp, rax mov rax, r10 ; load pix1 mov rcx, r11 ; load pix2 LOAD_DIFF_INC_4x4 mm0, mm1, mm2, mm3, mm6, mm7, rax, rbx, rcx, rdx, 4 HADAMARD4x4_FIRST LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 4 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 4 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 4 HADAMARD4x4_NEXT MMX_SUM_MM mm0, mm7 add rbp, rax mov rax, r10 ; load pix1 mov rcx, r11 ; load pix2 LOAD_DIFF_INC_4x4 mm0, mm1, mm2, mm3, mm6, mm7, rax, rbx, rcx, rdx, 8 HADAMARD4x4_FIRST LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 8 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 8 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 8 HADAMARD4x4_NEXT MMX_SUM_MM mm0, mm7 add rbp, rax mov rax, r10 ; load pix1 mov rcx, r11 ; load pix2 LOAD_DIFF_INC_4x4 mm0, mm1, mm2, mm3, mm6, mm7, rax, rbx, rcx, rdx, 12 HADAMARD4x4_FIRST LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 12 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 12 HADAMARD4x4_NEXT LOAD_DIFF_INC_4x4 mm1, mm2, mm3, mm4, mm6, mm7, rax, rbx, rcx, rdx, 12 HADAMARD4x4_NEXT MMX_SUM_MM mm0, mm7 add rax, rbp pop rbp pop rbx ret
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -