📄 pixel-a.asm
字号:
;-----------------------------------------------------------------------------; int x264_pixel_sad_8x4_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_sad_8x4_mmxext: SAD_START SAD_INC_2x8P SAD_INC_2x8P SAD_ENDALIGN 16;-----------------------------------------------------------------------------; int x264_pixel_sad_4x8_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_sad_4x8_mmxext: SAD_START SAD_INC_2x4P SAD_INC_2x4P SAD_INC_2x4P SAD_INC_2x4P SAD_ENDALIGN 16;-----------------------------------------------------------------------------; int x264_pixel_sad_4x4_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_sad_4x4_mmxext: SAD_START SAD_INC_2x4P SAD_INC_2x4P SAD_END%macro SSD_START 0 firstpush rbx pushreg rbx endprolog mov rax, parm1q ; pix1 =parm1 movsxd rbx, parm2d ; stride1 =parm2%ifdef WIN64 mov rcx, parm3q movsxd rdx, parm4d%else xchg rcx, rdx%endif pxor mm7, mm7 ; zero pxor mm0, mm0 ; mm0 holds the sum%endmacro%macro SSD_END 0 movq mm1, mm0 psrlq mm1, 32 paddd mm0, mm1 movd eax, mm0 pop rbx ret endfunc%endmacroALIGN 16;-----------------------------------------------------------------------------; int x264_pixel_ssd_16x16_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_ssd_16x16_mmxext: SSD_START SSD_INC_8x16P SSD_INC_8x16P SSD_ENDALIGN 16x264_pixel_ssd_16x8_mmxext: SSD_START SSD_INC_8x16P SSD_ENDALIGN 16x264_pixel_ssd_8x16_mmxext: SSD_START SSD_INC_4x8P SSD_INC_4x8P SSD_INC_4x8P SSD_INC_4x8P SSD_ENDALIGN 16x264_pixel_ssd_8x8_mmxext: SSD_START SSD_INC_4x8P SSD_INC_4x8P SSD_ENDALIGN 16x264_pixel_ssd_8x4_mmxext: SSD_START SSD_INC_4x8P SSD_ENDALIGN 16x264_pixel_ssd_4x8_mmxext: SSD_START SSD_INC_4x4P SSD_INC_4x4P SSD_ENDALIGN 16x264_pixel_ssd_4x4_mmxext: SSD_START SSD_INC_4x4P SSD_END%macro SATD_START 0; mov rdi, rdi ; pix1; movsxd rsi, esi ; stride1; mov rdx, rdx ; pix2; movsxd rcx, ecx ; stride2%endmacro%macro SATD_END 0 pshufw mm1, mm0, 01001110b paddw mm0, mm1 pshufw mm1, mm0, 10110001b paddw mm0, mm1 movd eax, mm0 and eax, 0xffff ret%endmacroALIGN 16;-----------------------------------------------------------------------------; int x264_pixel_satd_4x4_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_4x4_mmxext: SATD_START LOAD_DIFF_4x4 mm4, mm5, mm6, mm7, mm0, parm1q, parm2q, parm3q, parm4q, 0 HADAMARD4x4_SUM mm0 SATD_ENDALIGN 16;-----------------------------------------------------------------------------; int x264_pixel_satd_4x8_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_4x8_mmxext: SATD_START LOAD_DIFF_INC_4x4 mm4, mm5, mm6, mm7, mm0, parm1q, parm2q, parm3q, parm4q, 0 HADAMARD4x4_SUM mm0 LOAD_DIFF_4x4 mm4, mm5, mm6, mm7, mm1, parm1q, parm2q, parm3q, parm4q, 0 HADAMARD4x4_SUM mm1 paddw mm0, mm1 SATD_ENDALIGN 16;-----------------------------------------------------------------------------; int x264_pixel_satd_8x4_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_8x4_mmxext: SATD_START LOAD_DIFF_4x4 mm4, mm5, mm6, mm7, mm0, parm1q, parm2q, parm3q, parm4q, 0 HADAMARD4x4_SUM mm0 sub parm1q, parm2q sub parm3q, parm4q LOAD_DIFF_4x4 mm4, mm5, mm6, mm7, mm1, parm1q, parm2q, parm3q, parm4q, 4 HADAMARD4x4_SUM mm1 paddw mm0, mm1 SATD_ENDALIGN 16;-----------------------------------------------------------------------------; int x264_pixel_satd_8x8_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_8x8_mmxext: SATD_START mov r10, parm1q ; pix1 mov r11, parm3q ; pix2 LOAD_DIFF_INC_4x4 mm4, mm5, mm6, mm7, mm0, parm1q, parm2q, parm3q, parm4q, 0 HADAMARD4x4_SUM mm0 LOAD_DIFF_4x4 mm4, mm5, mm6, mm7, mm1, parm1q, parm2q, parm3q, parm4q, 0 HADAMARD4x4_SUM mm1 LOAD_DIFF_INC_4x4 mm4, mm5, mm6, mm7, mm2, r10, parm2q, r11, parm4q, 4 HADAMARD4x4_SUM mm2 LOAD_DIFF_4x4 mm4, mm5, mm6, mm7, mm3, r10, parm2q, r11, parm4q, 4 HADAMARD4x4_SUM mm3 paddw mm0, mm1 paddw mm2, mm3 paddw mm0, mm2 SATD_ENDALIGN 16;-----------------------------------------------------------------------------; int x264_pixel_satd_16x8_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_16x8_mmxext: SATD_START mov r10, parm1q mov r11, parm3q LOAD_DIFF_INC_4x4 mm4, mm5, mm6, mm7, mm0, parm1q, parm2q, parm3q, parm4q, 0 HADAMARD4x4_SUM mm0 LOAD_DIFF_4x4 mm4, mm5, mm6, mm7, mm1, parm1q, parm2q, parm3q, parm4q, 0 HADAMARD4x4_SUM mm1 mov parm1q, r10 mov parm3q, r11 LOAD_DIFF_INC_4x4 mm4, mm5, mm6, mm7, mm2, parm1q, parm2q, parm3q, parm4q, 4 HADAMARD4x4_SUM mm2 LOAD_DIFF_4x4 mm4, mm5, mm6, mm7, mm3, parm1q, parm2q, parm3q, parm4q, 4 HADAMARD4x4_SUM mm3 paddw mm0, mm1 paddw mm2, mm3 paddw mm0, mm2 mov parm1q, r10 mov parm3q, r11 LOAD_DIFF_INC_4x4 mm4, mm5, mm6, mm7, mm1, parm1q, parm2q, parm3q, parm4q, 8 HADAMARD4x4_SUM mm1 LOAD_DIFF_4x4 mm4, mm5, mm6, mm7, mm2, parm1q, parm2q, parm3q, parm4q, 8 HADAMARD4x4_SUM mm2 paddw mm1, mm2 LOAD_DIFF_INC_4x4 mm4, mm5, mm6, mm7, mm2, r10, parm2q, r11, parm4q, 12 HADAMARD4x4_SUM mm2 LOAD_DIFF_4x4 mm4, mm5, mm6, mm7, mm3, r10, parm2q, r11, parm4q, 12 HADAMARD4x4_SUM mm3 paddw mm0, mm1 paddw mm2, mm3 paddw mm0, mm2 SATD_ENDALIGN 16;-----------------------------------------------------------------------------; int x264_pixel_satd_8x16_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_8x16_mmxext: SATD_START mov r10, parm1q mov r11, parm3q LOAD_DIFF_INC_4x4 mm4, mm5, mm6, mm7, mm0, parm1q, parm2q, parm3q, parm4q, 0 HADAMARD4x4_SUM mm0 LOAD_DIFF_INC_4x4 mm4, mm5, mm6, mm7, mm1, parm1q, parm2q, parm3q, parm4q, 0 HADAMARD4x4_SUM mm1 LOAD_DIFF_INC_4x4 mm4, mm5, mm6, mm7, mm2, parm1q, parm2q, parm3q, parm4q, 0 HADAMARD4x4_SUM mm2 LOAD_DIFF_4x4 mm4, mm5, mm6, mm7, mm3, parm1q, parm2q, parm3q, parm4q, 0 HADAMARD4x4_SUM mm3 paddw mm0, mm1 paddw mm2, mm3 paddw mm0, mm2 LOAD_DIFF_INC_4x4 mm4, mm5, mm6, mm7, mm1, r10, parm2q, r11, parm4q, 4 HADAMARD4x4_SUM mm1 LOAD_DIFF_INC_4x4 mm4, mm5, mm6, mm7, mm2, r10, parm2q, r11, parm4q, 4 HADAMARD4x4_SUM mm2 paddw mm1, mm2 LOAD_DIFF_INC_4x4 mm4, mm5, mm6, mm7, mm2, r10, parm2q, r11, parm4q, 4 HADAMARD4x4_SUM mm2 LOAD_DIFF_4x4 mm4, mm5, mm6, mm7, mm3, r10, parm2q, r11, parm4q, 4 HADAMARD4x4_SUM mm3 paddw mm0, mm1 paddw mm2, mm3 paddw mm0, mm2 SATD_ENDALIGN 16;-----------------------------------------------------------------------------; int x264_pixel_satd_16x16_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_16x16_mmxext: SATD_START mov r10, parm1q mov r11, parm3q LOAD_DIFF_INC_4x4 mm4, mm5, mm6, mm7, mm0, parm1q, parm2q, parm3q, parm4q, 0 HADAMARD4x4_SUM mm0 LOAD_DIFF_INC_4x4 mm4, mm5, mm6, mm7, mm1, parm1q, parm2q, parm3q, parm4q, 0 HADAMARD4x4_SUM mm1 LOAD_DIFF_INC_4x4 mm4, mm5, mm6, mm7, mm2, parm1q, parm2q, parm3q, parm4q, 0 HADAMARD4x4_SUM mm2 LOAD_DIFF_4x4 mm4, mm5, mm6, mm7, mm3, parm1q, parm2q, parm3q, parm4q, 0 HADAMARD4x4_SUM mm3 paddw mm0, mm1 paddw mm2, mm3 paddw mm0, mm2 mov parm1q, r10 mov parm3q, r11 LOAD_DIFF_INC_4x4 mm4, mm5, mm6, mm7, mm1, parm1q, parm2q, parm3q, parm4q, 4 HADAMARD4x4_SUM mm1 LOAD_DIFF_INC_4x4 mm4, mm5, mm6, mm7, mm2, parm1q, parm2q, parm3q, parm4q, 4 HADAMARD4x4_SUM mm2 paddw mm1, mm2 LOAD_DIFF_INC_4x4 mm4, mm5, mm6, mm7, mm2, parm1q, parm2q, parm3q, parm4q, 4 HADAMARD4x4_SUM mm2 LOAD_DIFF_4x4 mm4, mm5, mm6, mm7, mm3, parm1q, parm2q, parm3q, parm4q, 4 HADAMARD4x4_SUM mm3 paddw mm0, mm1 paddw mm2, mm3 paddw mm0, mm2 mov parm1q, r10 mov parm3q, r11 LOAD_DIFF_INC_4x4 mm4, mm5, mm6, mm7, mm1, parm1q, parm2q, parm3q, parm4q, 8 HADAMARD4x4_SUM mm1 LOAD_DIFF_INC_4x4 mm4, mm5, mm6, mm7, mm2, parm1q, parm2q, parm3q, parm4q, 8 HADAMARD4x4_SUM mm2 paddw mm1, mm2 LOAD_DIFF_INC_4x4 mm4, mm5, mm6, mm7, mm2, parm1q, parm2q, parm3q, parm4q, 8 HADAMARD4x4_SUM mm2 LOAD_DIFF_4x4 mm4, mm5, mm6, mm7, mm3, parm1q, parm2q, parm3q, parm4q, 8 HADAMARD4x4_SUM mm3 paddw mm0, mm1 paddw mm2, mm3 paddw mm0, mm2 LOAD_DIFF_INC_4x4 mm4, mm5, mm6, mm7, mm1, r10, parm2q, r11, parm4q, 12 HADAMARD4x4_SUM mm1 LOAD_DIFF_INC_4x4 mm4, mm5, mm6, mm7, mm2, r10, parm2q, r11, parm4q, 12 HADAMARD4x4_SUM mm2 paddw mm1, mm2 LOAD_DIFF_INC_4x4 mm4, mm5, mm6, mm7, mm2, r10, parm2q, r11, parm4q, 12 HADAMARD4x4_SUM mm2 LOAD_DIFF_4x4 mm4, mm5, mm6, mm7, mm3, r10, parm2q, r11, parm4q, 12 HADAMARD4x4_SUM mm3 paddw mm0, mm1 paddw mm2, mm3 paddw mm0, mm2 pxor mm3, mm3 pshufw mm1, mm0, 01001110b paddw mm0, mm1 punpcklwd mm0, mm3 pshufw mm1, mm0, 01001110b paddd mm0, mm1 movd eax, mm0 ret
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -