⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pixel-a.asm

📁 linux下编译已经通过
💻 ASM
📖 第 1 页 / 共 3 页
字号:
;*****************************************************************************;* pixel.asm: h264 encoder library;*****************************************************************************;* Copyright (C) 2003 x264 project;* $Id: pixel.asm,v 1.1 2004/06/03 19:27:07 fenrir Exp $;*;* Authors: Laurent Aimar <fenrir@via.ecp.fr>;*;* This program is free software; you can redistribute it and/or modify;* it under the terms of the GNU General Public License as published by;* the Free Software Foundation; either version 2 of the License, or;* (at your option) any later version.;*;* This program is distributed in the hope that it will be useful,;* but WITHOUT ANY WARRANTY; without even the implied warranty of;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the;* GNU General Public License for more details.;*;* You should have received a copy of the GNU General Public License;* along with this program; if not, write to the Free Software;* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.;*****************************************************************************BITS 64;=============================================================================; Macros and other preprocessor constants;=============================================================================%include "amd64inc.asm"; sad%macro SAD_INC_2x16P 0    movq    mm1,    [parm1q]    movq    mm2,    [parm1q+8]    movq    mm3,    [parm1q+parm2q]    movq    mm4,    [parm1q+parm2q+8]    psadbw  mm1,    [parm3q]    psadbw  mm2,    [parm3q+8]    psadbw  mm3,    [parm3q+parm4q]    psadbw  mm4,    [parm3q+parm4q+8]    lea     parm1q, [parm1q+2*parm2q]    paddw   mm1,    mm2    paddw   mm3,    mm4    lea     parm3q, [parm3q+2*parm4q]    paddw   mm0,    mm1    paddw   mm0,    mm3%endmacro%macro SAD_INC_2x8P 0    movq    mm1,    [parm1q]    movq    mm2,    [parm1q+parm2q]    psadbw  mm1,    [parm3q]    psadbw  mm2,    [parm3q+parm4q]    lea     parm1q, [parm1q+2*parm2q]    paddw   mm0,    mm1    paddw   mm0,    mm2    lea     parm3q, [parm3q+2*parm4q]%endmacro%macro SAD_INC_2x4P 0    movd    mm1,    [parm1q]    movd    mm2,    [parm3q]    punpckldq mm1,  [parm1q+parm2q]    punpckldq mm2,  [parm3q+parm4q]    psadbw  mm1,    mm2    paddw   mm0,    mm1    lea     parm1q, [parm1q+2*parm2q]    lea     parm3q, [parm3q+2*parm4q]%endmacro; sad x3 / x4%macro SAD_X3_START_1x8P 0    movq    mm3,    [parm1q]    movq    mm0,    [parm2q]    movq    mm1,    [parm3q]    movq    mm2,    [parm4q]    psadbw  mm0,    mm3    psadbw  mm1,    mm3    psadbw  mm2,    mm3%endmacro%macro SAD_X3_1x8P 2    movq    mm3,    [parm1q+%1]    movq    mm4,    [parm2q+%2]    movq    mm5,    [parm3q+%2]    movq    mm6,    [parm4q+%2]    psadbw  mm4,    mm3    psadbw  mm5,    mm3    psadbw  mm6,    mm3    paddw   mm0,    mm4    paddw   mm1,    mm5    paddw   mm2,    mm6%endmacro%macro SAD_X3_START_2x4P 3    movd      mm3,  [parm1q]    movd      %1,   [parm2q]    movd      %2,   [parm3q]    movd      %3,   [parm4q]    punpckldq mm3,  [parm1q+FENC_STRIDE]    punpckldq %1,   [parm2q+parm5q]    punpckldq %2,   [parm3q+parm5q]    punpckldq %3,   [parm4q+parm5q]    psadbw    %1,   mm3    psadbw    %2,   mm3    psadbw    %3,   mm3%endmacro%macro SAD_X3_2x16P 1%if %1    SAD_X3_START_1x8P%else    SAD_X3_1x8P 0, 0%endif    SAD_X3_1x8P 8, 8    SAD_X3_1x8P FENC_STRIDE, parm5q    SAD_X3_1x8P FENC_STRIDE+8, parm5q+8    add     parm1q, 2*FENC_STRIDE    lea     parm2q, [parm2q+2*parm5q]    lea     parm3q, [parm3q+2*parm5q]    lea     parm4q, [parm4q+2*parm5q]%endmacro%macro SAD_X3_2x8P 1%if %1    SAD_X3_START_1x8P%else    SAD_X3_1x8P 0, 0%endif    SAD_X3_1x8P FENC_STRIDE, parm5q    add     parm1q, 2*FENC_STRIDE    lea     parm2q, [parm2q+2*parm5q]    lea     parm3q, [parm3q+2*parm5q]    lea     parm4q, [parm4q+2*parm5q]%endmacro%macro SAD_X3_2x4P 1%if %1    SAD_X3_START_2x4P mm0, mm1, mm2%else    SAD_X3_START_2x4P mm4, mm5, mm6    paddw     mm0,  mm4    paddw     mm1,  mm5    paddw     mm2,  mm6%endif    add     parm1q, 2*FENC_STRIDE    lea     parm2q, [parm2q+2*parm5q]    lea     parm3q, [parm3q+2*parm5q]    lea     parm4q, [parm4q+2*parm5q]%endmacro%macro SAD_X4_START_1x8P 0    movq    mm7,    [parm1q]    movq    mm0,    [parm2q]    movq    mm1,    [parm3q]    movq    mm2,    [parm4q]    movq    mm3,    [parm5q]    psadbw  mm0,    mm7    psadbw  mm1,    mm7    psadbw  mm2,    mm7    psadbw  mm3,    mm7%endmacro%macro SAD_X4_1x8P 2    movq    mm7,    [parm1q+%1]    movq    mm4,    [parm2q+%2]    movq    mm5,    [parm3q+%2]    movq    mm6,    [parm4q+%2]    psadbw  mm4,    mm7    psadbw  mm5,    mm7    psadbw  mm6,    mm7    psadbw  mm7,    [parm5q+%2]    paddw   mm0,    mm4    paddw   mm1,    mm5    paddw   mm2,    mm6    paddw   mm3,    mm7%endmacro%macro SAD_X4_START_2x4P 0    movd      mm7,  [parm1q]    movd      mm0,  [parm2q]    movd      mm1,  [parm3q]    movd      mm2,  [parm4q]    movd      mm3,  [parm5q]    punpckldq mm7,  [parm1q+FENC_STRIDE]    punpckldq mm0,  [parm2q+parm6q]    punpckldq mm1,  [parm3q+parm6q]    punpckldq mm2,  [parm4q+parm6q]    punpckldq mm3,  [parm5q+parm6q]    psadbw    mm0,  mm7    psadbw    mm1,  mm7    psadbw    mm2,  mm7    psadbw    mm3,  mm7%endmacro%macro SAD_X4_INC_2x4P 0    movd      mm7,  [parm1q]    movd      mm4,  [parm2q]    movd      mm5,  [parm3q]    punpckldq mm7,  [parm1q+FENC_STRIDE]    punpckldq mm4,  [parm2q+parm6q]    punpckldq mm5,  [parm3q+parm6q]    psadbw    mm4,  mm7    psadbw    mm5,  mm7    paddw     mm0,  mm4    paddw     mm1,  mm5    movd      mm4,  [parm4q]    movd      mm5,  [parm5q]    punpckldq mm4,  [parm4q+parm6q]    punpckldq mm5,  [parm5q+parm6q]    psadbw    mm4,  mm7    psadbw    mm5,  mm7    paddw     mm2,  mm4    paddw     mm3,  mm5%endmacro%macro SAD_X4_2x16P 1%if %1    SAD_X4_START_1x8P%else    SAD_X4_1x8P 0, 0%endif    SAD_X4_1x8P 8, 8    SAD_X4_1x8P FENC_STRIDE, parm6q    SAD_X4_1x8P FENC_STRIDE+8, parm6q+8    add     parm1q, 2*FENC_STRIDE    lea     parm2q, [parm2q+2*parm6q]    lea     parm3q, [parm3q+2*parm6q]    lea     parm4q, [parm4q+2*parm6q]    lea     parm5q, [parm5q+2*parm6q]%endmacro%macro SAD_X4_2x8P 1%if %1    SAD_X4_START_1x8P%else    SAD_X4_1x8P 0, 0%endif    SAD_X4_1x8P FENC_STRIDE, parm6q    add     parm1q, 2*FENC_STRIDE    lea     parm2q, [parm2q+2*parm6q]    lea     parm3q, [parm3q+2*parm6q]    lea     parm4q, [parm4q+2*parm6q]    lea     parm5q, [parm5q+2*parm6q]%endmacro%macro SAD_X4_2x4P 1%if %1    SAD_X4_START_2x4P%else    SAD_X4_INC_2x4P%endif    add     parm1q, 2*FENC_STRIDE    lea     parm2q, [parm2q+2*parm6q]    lea     parm3q, [parm3q+2*parm6q]    lea     parm4q, [parm4q+2*parm6q]    lea     parm5q, [parm5q+2*parm6q]%endmacro%macro SAD_X3_END 0    movd    [parm6q+0], mm0    movd    [parm6q+4], mm1    movd    [parm6q+8], mm2    ret%endmacro%macro SAD_X4_END 0    mov     rax, parm7q    movd    [rax+0], mm0    movd    [rax+4], mm1    movd    [rax+8], mm2    movd    [rax+12], mm3    ret%endmacro; ssd%macro SSD_INC_1x16P 0    movq    mm1,    [parm1q]    movq    mm2,    [parm3q]    movq    mm3,    [parm1q+8]    movq    mm4,    [parm3q+8]    movq    mm5,    mm2    movq    mm6,    mm4    psubusb mm2,    mm1    psubusb mm4,    mm3    psubusb mm1,    mm5    psubusb mm3,    mm6    por     mm1,    mm2    por     mm3,    mm4    movq    mm2,    mm1    movq    mm4,    mm3    punpcklbw mm1,  mm7    punpcklbw mm3,  mm7    punpckhbw mm2,  mm7    punpckhbw mm4,  mm7    pmaddwd mm1,    mm1    pmaddwd mm2,    mm2    pmaddwd mm3,    mm3    pmaddwd mm4,    mm4    add     parm1q, parm2q    add     parm3q, parm4q    paddd   mm0,    mm1    paddd   mm0,    mm2    paddd   mm0,    mm3    paddd   mm0,    mm4%endmacro%macro SSD_INC_1x8P 0    movq    mm1,    [parm1q]    movq    mm2,    [parm3q]    movq    mm5,    mm2    psubusb mm2,    mm1    psubusb mm1,    mm5    por     mm1,    mm2         ; mm1 = 8bit abs diff    movq    mm2,    mm1    punpcklbw mm1,  mm7    punpckhbw mm2,  mm7         ; (mm1,mm2) = 16bit abs diff    pmaddwd mm1,    mm1    pmaddwd mm2,    mm2    add     parm1q, parm2q    add     parm3q, parm4q    paddd   mm0,    mm1    paddd   mm0,    mm2%endmacro%macro SSD_INC_1x4P 0    movd    mm1,    [parm1q]    movd    mm2,    [parm3q]    movq    mm5,    mm2    psubusb mm2,    mm1    psubusb mm1,    mm5    por     mm1,    mm2    punpcklbw mm1,  mm7    pmaddwd mm1,    mm1    add     parm1q, parm2q    add     parm3q, parm4q    paddd   mm0,    mm1%endmacro; satd%macro LOAD_DIFF_4P 4  ; MMP, MMT, [pix1], [pix2]    movd        %1, %3    movd        %2, %4    punpcklbw   %1, %2    punpcklbw   %2, %2    psubw       %1, %2%endmacro%macro HADAMARD4_SUB_BADC 4    paddw %1,   %2    paddw %3,   %4    paddw %2,   %2    paddw %4,   %4    psubw %2,   %1    psubw %4,   %3%endmacro%macro HADAMARD4x4 4    HADAMARD4_SUB_BADC %1, %2, %3, %4    HADAMARD4_SUB_BADC %1, %3, %2, %4%endmacro%macro SBUTTERFLYwd 3    movq        %3, %1    punpcklwd   %1, %2    punpckhwd   %3, %2%endmacro%macro SBUTTERFLYdq 3    movq        %3, %1    punpckldq   %1, %2    punpckhdq   %3, %2%endmacro

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -