📄 pixel-a.asm.svn-base
字号:
lea parm3q, [parm3q+4*parm4q]%endif HADAMARD4x4_SUM %1%endmacro;=============================================================================; Code;=============================================================================SECTION .textcglobal x264_pixel_sad_16x16_mmxextcglobal x264_pixel_sad_16x8_mmxextcglobal x264_pixel_sad_8x16_mmxextcglobal x264_pixel_sad_8x8_mmxextcglobal x264_pixel_sad_8x4_mmxextcglobal x264_pixel_sad_4x8_mmxextcglobal x264_pixel_sad_4x4_mmxextcglobal x264_pixel_sad_x3_16x16_mmxextcglobal x264_pixel_sad_x3_16x8_mmxextcglobal x264_pixel_sad_x3_8x16_mmxextcglobal x264_pixel_sad_x3_8x8_mmxextcglobal x264_pixel_sad_x3_8x4_mmxextcglobal x264_pixel_sad_x3_4x8_mmxextcglobal x264_pixel_sad_x3_4x4_mmxextcglobal x264_pixel_sad_x4_16x16_mmxextcglobal x264_pixel_sad_x4_16x8_mmxextcglobal x264_pixel_sad_x4_8x16_mmxextcglobal x264_pixel_sad_x4_8x8_mmxextcglobal x264_pixel_sad_x4_8x4_mmxextcglobal x264_pixel_sad_x4_4x8_mmxextcglobal x264_pixel_sad_x4_4x4_mmxextcglobal x264_pixel_sad_pde_16x16_mmxextcglobal x264_pixel_sad_pde_16x8_mmxextcglobal x264_pixel_sad_pde_8x16_mmxextcglobal x264_pixel_ssd_16x16_mmxcglobal x264_pixel_ssd_16x8_mmxcglobal x264_pixel_ssd_8x16_mmxcglobal x264_pixel_ssd_8x8_mmxcglobal x264_pixel_ssd_8x4_mmxcglobal x264_pixel_ssd_4x8_mmxcglobal x264_pixel_ssd_4x4_mmxcglobal x264_pixel_satd_4x4_mmxextcglobal x264_pixel_satd_4x8_mmxextcglobal x264_pixel_satd_8x4_mmxextcglobal x264_pixel_satd_8x8_mmxextcglobal x264_pixel_satd_16x8_mmxextcglobal x264_pixel_satd_8x16_mmxextcglobal x264_pixel_satd_16x16_mmxextcglobal x264_intra_satd_x3_4x4_mmxextcglobal x264_intra_satd_x3_8x8c_mmxextcglobal x264_intra_satd_x3_16x16_mmxextcglobal x264_pixel_ads4_mmxextcglobal x264_pixel_ads2_mmxextcglobal x264_pixel_ads1_mmxext%macro SAD_START 0 pxor mm0, mm0%endmacro%macro SAD_END 0 movd eax, mm0 ret%endmacro;-----------------------------------------------------------------------------; int x264_pixel_sad_16x16_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------%macro SAD 2ALIGN 16x264_pixel_sad_%1x%2_mmxext: SAD_START%rep %2/2 SAD_INC_2x%1P%endrep SAD_END%endmacroSAD 16, 16SAD 16, 8SAD 8, 16SAD 8, 8SAD 8, 4SAD 4, 8SAD 4, 4;-----------------------------------------------------------------------------; void x264_pixel_sad_x3_16x16_mmxext( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1,; uint8_t *pix2, int i_stride, int scores[3] );-----------------------------------------------------------------------------%macro SAD_X 3ALIGN 16x264_pixel_sad_x%1_%2x%3_mmxext: SAD_X%1_2x%2P 1%rep %3/2-1 SAD_X%1_2x%2P 0%endrep SAD_X%1_END%endmacroSAD_X 3, 16, 16SAD_X 3, 16, 8SAD_X 3, 8, 16SAD_X 3, 8, 8SAD_X 3, 8, 4SAD_X 3, 4, 8SAD_X 3, 4, 4SAD_X 4, 16, 16SAD_X 4, 16, 8SAD_X 4, 8, 16SAD_X 4, 8, 8SAD_X 4, 8, 4SAD_X 4, 4, 8SAD_X 4, 4, 4%macro PDE_CHECK 0 movd eax, mm0 cmp eax, parm5d ; prev_score jl .continue retALIGN 4.continue:%endmacro;-----------------------------------------------------------------------------; int x264_pixel_sad_pde_16x16_mmxext (uint8_t *, int, uint8_t *, int, int );-----------------------------------------------------------------------------%macro SAD_PDE 2 ALIGN 16x264_pixel_sad_pde_%1x%2_mmxext: SAD_START%rep %2/4 SAD_INC_2x%1P%endrep movd eax, mm0 cmp eax, parm5d ; prev_score jl .continue retALIGN 4.continue:%rep %2/4 SAD_INC_2x%1P%endrep SAD_END%endmacroSAD_PDE 16, 16SAD_PDE 16 , 8SAD_PDE 8, 16%macro SSD_START 0 pxor mm7, mm7 ; zero pxor mm0, mm0 ; mm0 holds the sum%endmacro%macro SSD_END 0 movq mm1, mm0 psrlq mm1, 32 paddd mm0, mm1 movd eax, mm0 ret%endmacro;-----------------------------------------------------------------------------; int x264_pixel_ssd_16x16_mmx (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------%macro SSD 2ALIGN 16x264_pixel_ssd_%1x%2_mmx: SSD_START%rep %2 SSD_INC_1x%1P%endrep SSD_END%endmacroSSD 16, 16SSD 16, 8SSD 8, 16SSD 8, 8SSD 8, 4SSD 4, 8SSD 4, 4%macro SATD_START 0 lea r10, [3*parm2q] ; 3*stride1 lea r11, [3*parm4q] ; 3*stride2%endmacro%macro SATD_END 0 pshufw mm1, mm0, 01001110b paddw mm0, mm1 pshufw mm1, mm0, 10110001b paddw mm0, mm1 movd eax, mm0 and eax, 0xffff ret%endmacroALIGN 16;-----------------------------------------------------------------------------; int x264_pixel_satd_4x4_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_4x4_mmxext: SATD_START LOAD_DIFF_HADAMARD_SUM mm0, 0, 0 SATD_ENDALIGN 16;-----------------------------------------------------------------------------; int x264_pixel_satd_4x8_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_4x8_mmxext: SATD_START LOAD_DIFF_HADAMARD_SUM mm0, 0, 1 LOAD_DIFF_HADAMARD_SUM mm1, 0, 0 paddw mm0, mm1 SATD_ENDALIGN 16;-----------------------------------------------------------------------------; int x264_pixel_satd_8x4_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_8x4_mmxext: SATD_START LOAD_DIFF_HADAMARD_SUM mm0, 0, 0 LOAD_DIFF_HADAMARD_SUM mm1, 4, 0 paddw mm0, mm1 SATD_ENDALIGN 16;-----------------------------------------------------------------------------; int x264_pixel_satd_8x8_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_8x8_mmxext: SATD_START LOAD_DIFF_HADAMARD_SUM mm0, 0, 0 LOAD_DIFF_HADAMARD_SUM mm1, 4, 1 LOAD_DIFF_HADAMARD_SUM mm2, 0, 0 paddw mm0, mm1 LOAD_DIFF_HADAMARD_SUM mm1, 4, 0 paddw mm0, mm2 paddw mm0, mm1 SATD_ENDALIGN 16;-----------------------------------------------------------------------------; int x264_pixel_satd_16x8_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_16x8_mmxext: SATD_START LOAD_DIFF_HADAMARD_SUM mm0, 0, 0 LOAD_DIFF_HADAMARD_SUM mm1, 4, 0 LOAD_DIFF_HADAMARD_SUM mm2, 8, 0 paddw mm0, mm1 LOAD_DIFF_HADAMARD_SUM mm1, 12, 1 paddw mm0, mm2 LOAD_DIFF_HADAMARD_SUM mm2, 0, 0 paddw mm0, mm1 LOAD_DIFF_HADAMARD_SUM mm1, 4, 0 paddw mm0, mm2 LOAD_DIFF_HADAMARD_SUM mm2, 8, 0 paddw mm0, mm1 LOAD_DIFF_HADAMARD_SUM mm1, 12, 1 paddw mm0, mm2 paddw mm0, mm1 SATD_ENDALIGN 16;-----------------------------------------------------------------------------; int x264_pixel_satd_8x16_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_8x16_mmxext: SATD_START LOAD_DIFF_HADAMARD_SUM mm0, 0, 0 LOAD_DIFF_HADAMARD_SUM mm1, 4, 1 LOAD_DIFF_HADAMARD_SUM mm2, 0, 0 paddw mm0, mm1 LOAD_DIFF_HADAMARD_SUM mm1, 4, 1 paddw mm0, mm2 LOAD_DIFF_HADAMARD_SUM mm2, 0, 0 paddw mm0, mm1 LOAD_DIFF_HADAMARD_SUM mm1, 4, 1 paddw mm1, mm2 LOAD_DIFF_HADAMARD_SUM mm2, 0, 0 paddw mm0, mm1 LOAD_DIFF_HADAMARD_SUM mm1, 4, 1 paddw mm0, mm2 paddw mm0, mm1 SATD_ENDALIGN 16;-----------------------------------------------------------------------------; int x264_pixel_satd_16x16_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_satd_16x16_mmxext: SATD_START LOAD_DIFF_HADAMARD_SUM mm0, 0, 0 LOAD_DIFF_HADAMARD_SUM mm1, 4, 0 LOAD_DIFF_HADAMARD_SUM mm2, 8, 0 paddw mm0, mm1 LOAD_DIFF_HADAMARD_SUM mm1, 12, 1 paddw mm0, mm2 LOAD_DIFF_HADAMARD_SUM mm2, 0, 0 paddw mm0, mm1 LOAD_DIFF_HADAMARD_SUM mm1, 4, 0 paddw mm0, mm2 LOAD_DIFF_HADAMARD_SUM mm2, 8, 0 paddw mm0, mm1 LOAD_DIFF_HADAMARD_SUM mm1, 12, 1 paddw mm0, mm2 LOAD_DIFF_HADAMARD_SUM mm2, 0, 0 paddw mm0, mm1 LOAD_DIFF_HADAMARD_SUM mm1, 4, 0 paddw mm0, mm2 LOAD_DIFF_HADAMARD_SUM mm2, 8, 0 paddw mm0, mm1 LOAD_DIFF_HADAMARD_SUM mm1, 12, 1 paddw mm0, mm2 LOAD_DIFF_HADAMARD_SUM mm2, 0, 0 paddw mm0, mm1 LOAD_DIFF_HADAMARD_SUM mm1, 4, 0 paddw mm0, mm2 LOAD_DIFF_HADAMARD_SUM mm2, 8, 0 paddw mm0, mm1 LOAD_DIFF_HADAMARD_SUM mm1, 12, 0 paddw mm0, mm2 paddw mm0, mm1 pxor mm3, mm3 pshufw mm1, mm0, 01001110b paddw mm0, mm1 punpcklwd mm0, mm3 pshufw mm1, mm0, 01001110b paddd mm0, mm1 movd eax, mm0 ret; in: parm1 = fenc; out: mm0..mm3 = hadamard coefsALIGN 16load_hadamard: pxor mm7, mm7 movd mm0, [parm1q+0*FENC_STRIDE] movd mm4, [parm1q+1*FENC_STRIDE] movd mm3, [parm1q+2*FENC_STRIDE] movd mm1, [parm1q+3*FENC_STRIDE] punpcklbw mm0, mm7 punpcklbw mm4, mm7 punpcklbw mm3, mm7 punpcklbw mm1, mm7 HADAMARD4x4 mm0, mm4, mm3, mm1 TRANSPOSE4x4 mm0, mm4, mm3, mm1, mm2 HADAMARD4x4 mm0, mm1, mm2, mm3 ret%macro SCALAR_SUMSUB 4 add %1, %2 add %3, %4 add %2, %2 add %4, %4 sub %2, %1 sub %4, %3%endmacro%macro SUM_MM_X3 8 ; 3x sum, 4x tmp, op pxor %7, %7 pshufw %4, %1, 01001110b pshufw %5, %2, 01001110b pshufw %6, %3, 01001110b paddw %1, %4 paddw %2, %5 paddw %3, %6 punpcklwd %1, %7 punpcklwd %2, %7 punpcklwd %3, %7 pshufw %4, %1, 01001110b pshufw %5, %2, 01001110b
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -