📄 pixel-a.asm
字号:
;*****************************************************************************;* pixel.asm: h264 encoder library;*****************************************************************************;* Copyright (C) 2003 x264 project;* $Id: pixel.asm,v 1.1 2004/06/03 19:27:07 fenrir Exp $;*;* Authors: Laurent Aimar <fenrir@via.ecp.fr>;*;* This program is free software; you can redistribute it and/or modify;* it under the terms of the GNU General Public License as published by;* the Free Software Foundation; either version 2 of the License, or;* (at your option) any later version.;*;* This program is distributed in the hope that it will be useful,;* but WITHOUT ANY WARRANTY; without even the implied warranty of;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the;* GNU General Public License for more details.;*;* You should have received a copy of the GNU General Public License;* along with this program; if not, write to the Free Software;* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.;*****************************************************************************BITS 64;=============================================================================; Macros and other preprocessor constants;=============================================================================%include "amd64inc.asm"; sad%macro SAD_INC_2x16P 0 movq mm1, [parm1q] movq mm2, [parm1q+8] movq mm3, [parm1q+parm2q] movq mm4, [parm1q+parm2q+8] psadbw mm1, [parm3q] psadbw mm2, [parm3q+8] psadbw mm3, [parm3q+parm4q] psadbw mm4, [parm3q+parm4q+8] lea parm1q, [parm1q+2*parm2q] paddw mm1, mm2 paddw mm3, mm4 lea parm3q, [parm3q+2*parm4q] paddw mm0, mm1 paddw mm0, mm3%endmacro%macro SAD_INC_2x8P 0 movq mm1, [parm1q] movq mm2, [parm1q+parm2q] psadbw mm1, [parm3q] psadbw mm2, [parm3q+parm4q] lea parm1q, [parm1q+2*parm2q] paddw mm0, mm1 paddw mm0, mm2 lea parm3q, [parm3q+2*parm4q]%endmacro%macro SAD_INC_2x4P 0 movd mm1, [parm1q] movd mm2, [parm3q] punpckldq mm1, [parm1q+parm2q] punpckldq mm2, [parm3q+parm4q] psadbw mm1, mm2 paddw mm0, mm1 lea parm1q, [parm1q+2*parm2q] lea parm3q, [parm3q+2*parm4q]%endmacro; sad x3 / x4%macro SAD_X3_START_1x8P 0 movq mm3, [parm1q] movq mm0, [parm2q] movq mm1, [parm3q] movq mm2, [parm4q] psadbw mm0, mm3 psadbw mm1, mm3 psadbw mm2, mm3%endmacro%macro SAD_X3_1x8P 2 movq mm3, [parm1q+%1] movq mm4, [parm2q+%2] movq mm5, [parm3q+%2] movq mm6, [parm4q+%2] psadbw mm4, mm3 psadbw mm5, mm3 psadbw mm6, mm3 paddw mm0, mm4 paddw mm1, mm5 paddw mm2, mm6%endmacro%macro SAD_X3_START_2x4P 3 movd mm3, [parm1q] movd %1, [parm2q] movd %2, [parm3q] movd %3, [parm4q] punpckldq mm3, [parm1q+FENC_STRIDE] punpckldq %1, [parm2q+parm5q] punpckldq %2, [parm3q+parm5q] punpckldq %3, [parm4q+parm5q] psadbw %1, mm3 psadbw %2, mm3 psadbw %3, mm3%endmacro%macro SAD_X3_2x16P 1%if %1 SAD_X3_START_1x8P%else SAD_X3_1x8P 0, 0%endif SAD_X3_1x8P 8, 8 SAD_X3_1x8P FENC_STRIDE, parm5q SAD_X3_1x8P FENC_STRIDE+8, parm5q+8 add parm1q, 2*FENC_STRIDE lea parm2q, [parm2q+2*parm5q] lea parm3q, [parm3q+2*parm5q] lea parm4q, [parm4q+2*parm5q]%endmacro%macro SAD_X3_2x8P 1%if %1 SAD_X3_START_1x8P%else SAD_X3_1x8P 0, 0%endif SAD_X3_1x8P FENC_STRIDE, parm5q add parm1q, 2*FENC_STRIDE lea parm2q, [parm2q+2*parm5q] lea parm3q, [parm3q+2*parm5q] lea parm4q, [parm4q+2*parm5q]%endmacro%macro SAD_X3_2x4P 1%if %1 SAD_X3_START_2x4P mm0, mm1, mm2%else SAD_X3_START_2x4P mm4, mm5, mm6 paddw mm0, mm4 paddw mm1, mm5 paddw mm2, mm6%endif add parm1q, 2*FENC_STRIDE lea parm2q, [parm2q+2*parm5q] lea parm3q, [parm3q+2*parm5q] lea parm4q, [parm4q+2*parm5q]%endmacro%macro SAD_X4_START_1x8P 0 movq mm7, [parm1q] movq mm0, [parm2q] movq mm1, [parm3q] movq mm2, [parm4q] movq mm3, [parm5q] psadbw mm0, mm7 psadbw mm1, mm7 psadbw mm2, mm7 psadbw mm3, mm7%endmacro%macro SAD_X4_1x8P 2 movq mm7, [parm1q+%1] movq mm4, [parm2q+%2] movq mm5, [parm3q+%2] movq mm6, [parm4q+%2] psadbw mm4, mm7 psadbw mm5, mm7 psadbw mm6, mm7 psadbw mm7, [parm5q+%2] paddw mm0, mm4 paddw mm1, mm5 paddw mm2, mm6 paddw mm3, mm7%endmacro%macro SAD_X4_START_2x4P 0 movd mm7, [parm1q] movd mm0, [parm2q] movd mm1, [parm3q] movd mm2, [parm4q] movd mm3, [parm5q] punpckldq mm7, [parm1q+FENC_STRIDE] punpckldq mm0, [parm2q+parm6q] punpckldq mm1, [parm3q+parm6q] punpckldq mm2, [parm4q+parm6q] punpckldq mm3, [parm5q+parm6q] psadbw mm0, mm7 psadbw mm1, mm7 psadbw mm2, mm7 psadbw mm3, mm7%endmacro%macro SAD_X4_INC_2x4P 0 movd mm7, [parm1q] movd mm4, [parm2q] movd mm5, [parm3q] punpckldq mm7, [parm1q+FENC_STRIDE] punpckldq mm4, [parm2q+parm6q] punpckldq mm5, [parm3q+parm6q] psadbw mm4, mm7 psadbw mm5, mm7 paddw mm0, mm4 paddw mm1, mm5 movd mm4, [parm4q] movd mm5, [parm5q] punpckldq mm4, [parm4q+parm6q] punpckldq mm5, [parm5q+parm6q] psadbw mm4, mm7 psadbw mm5, mm7 paddw mm2, mm4 paddw mm3, mm5%endmacro%macro SAD_X4_2x16P 1%if %1 SAD_X4_START_1x8P%else SAD_X4_1x8P 0, 0%endif SAD_X4_1x8P 8, 8 SAD_X4_1x8P FENC_STRIDE, parm6q SAD_X4_1x8P FENC_STRIDE+8, parm6q+8 add parm1q, 2*FENC_STRIDE lea parm2q, [parm2q+2*parm6q] lea parm3q, [parm3q+2*parm6q] lea parm4q, [parm4q+2*parm6q] lea parm5q, [parm5q+2*parm6q]%endmacro%macro SAD_X4_2x8P 1%if %1 SAD_X4_START_1x8P%else SAD_X4_1x8P 0, 0%endif SAD_X4_1x8P FENC_STRIDE, parm6q add parm1q, 2*FENC_STRIDE lea parm2q, [parm2q+2*parm6q] lea parm3q, [parm3q+2*parm6q] lea parm4q, [parm4q+2*parm6q] lea parm5q, [parm5q+2*parm6q]%endmacro%macro SAD_X4_2x4P 1%if %1 SAD_X4_START_2x4P%else SAD_X4_INC_2x4P%endif add parm1q, 2*FENC_STRIDE lea parm2q, [parm2q+2*parm6q] lea parm3q, [parm3q+2*parm6q] lea parm4q, [parm4q+2*parm6q] lea parm5q, [parm5q+2*parm6q]%endmacro%macro SAD_X3_END 0 movd [parm6q+0], mm0 movd [parm6q+4], mm1 movd [parm6q+8], mm2 ret%endmacro%macro SAD_X4_END 0 mov rax, parm7q movd [rax+0], mm0 movd [rax+4], mm1 movd [rax+8], mm2 movd [rax+12], mm3 ret%endmacro; ssd%macro SSD_INC_1x16P 0 movq mm1, [parm1q] movq mm2, [parm3q] movq mm3, [parm1q+8] movq mm4, [parm3q+8] movq mm5, mm2 movq mm6, mm4 psubusb mm2, mm1 psubusb mm4, mm3 psubusb mm1, mm5 psubusb mm3, mm6 por mm1, mm2 por mm3, mm4 movq mm2, mm1 movq mm4, mm3 punpcklbw mm1, mm7 punpcklbw mm3, mm7 punpckhbw mm2, mm7 punpckhbw mm4, mm7 pmaddwd mm1, mm1 pmaddwd mm2, mm2 pmaddwd mm3, mm3 pmaddwd mm4, mm4 add parm1q, parm2q add parm3q, parm4q paddd mm0, mm1 paddd mm0, mm2 paddd mm0, mm3 paddd mm0, mm4%endmacro%macro SSD_INC_1x8P 0 movq mm1, [parm1q] movq mm2, [parm3q] movq mm5, mm2 psubusb mm2, mm1 psubusb mm1, mm5 por mm1, mm2 ; mm1 = 8bit abs diff movq mm2, mm1 punpcklbw mm1, mm7 punpckhbw mm2, mm7 ; (mm1,mm2) = 16bit abs diff pmaddwd mm1, mm1 pmaddwd mm2, mm2 add parm1q, parm2q add parm3q, parm4q paddd mm0, mm1 paddd mm0, mm2%endmacro%macro SSD_INC_1x4P 0 movd mm1, [parm1q] movd mm2, [parm3q] movq mm5, mm2 psubusb mm2, mm1 psubusb mm1, mm5 por mm1, mm2 punpcklbw mm1, mm7 pmaddwd mm1, mm1 add parm1q, parm2q add parm3q, parm4q paddd mm0, mm1%endmacro; satd%macro LOAD_DIFF_4P 4 ; MMP, MMT, [pix1], [pix2] movd %1, %3 movd %2, %4 punpcklbw %1, %2 punpcklbw %2, %2 psubw %1, %2%endmacro%macro HADAMARD4_SUB_BADC 4 paddw %1, %2 paddw %3, %4 paddw %2, %2 paddw %4, %4 psubw %2, %1 psubw %4, %3%endmacro%macro HADAMARD4x4 4 HADAMARD4_SUB_BADC %1, %2, %3, %4 HADAMARD4_SUB_BADC %1, %3, %2, %4%endmacro%macro SBUTTERFLYwd 3 movq %3, %1 punpcklwd %1, %2 punpckhwd %3, %2%endmacro%macro SBUTTERFLYdq 3 movq %3, %1 punpckldq %1, %2 punpckhdq %3, %2%endmacro
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -