⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pixel-a.asm.svn-base

📁 H.264 source codes
💻 SVN-BASE
📖 第 1 页 / 共 2 页
字号:
;*****************************************************************************;* pixel.asm: h264 encoder library;*****************************************************************************;* Copyright (C) 2003 x264 project;* $Id: pixel.asm,v 1.1 2004/06/03 19:27:07 fenrir Exp $;*;* Authors: Laurent Aimar <fenrir@via.ecp.fr>;*;* This program is free software; you can redistribute it and/or modify;* it under the terms of the GNU General Public License as published by;* the Free Software Foundation; either version 2 of the License, or;* (at your option) any later version.;*;* This program is distributed in the hope that it will be useful,;* but WITHOUT ANY WARRANTY; without even the implied warranty of;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the;* GNU General Public License for more details.;*;* You should have received a copy of the GNU General Public License;* along with this program; if not, write to the Free Software;* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.;*****************************************************************************BITS 32;=============================================================================; Macros and other preprocessor constants;=============================================================================%macro cglobal 1	%ifdef PREFIX		global _%1		%define %1 _%1	%else		global %1	%endif%endmacro%macro SAD_INC_2x16P 0    movq    mm1,    [eax]    movq    mm2,    [ecx]    movq    mm3,    [eax+8]    movq    mm4,    [ecx+8]    psadbw  mm1,    mm2    psadbw  mm3,    mm4    paddw   mm0,    mm1    paddw   mm0,    mm3    movq    mm1,    [eax+ebx]    movq    mm2,    [ecx+edx]    movq    mm3,    [eax+ebx+8]    movq    mm4,    [ecx+edx+8]    psadbw  mm1,    mm2    psadbw  mm3,    mm4    paddw   mm0,    mm1    paddw   mm0,    mm3    lea     eax,    [eax+2*ebx]    lea     ecx,    [ecx+2*edx]%endmacro%macro SAD_INC_2x8P 0    movq    mm1,    [eax]    movq    mm2,    [ecx]    movq    mm3,    [eax+ebx]    movq    mm4,    [ecx+edx]    psadbw  mm1,    mm2    psadbw  mm3,    mm4    paddw   mm0,    mm1    paddw   mm0,    mm3    lea     eax,    [eax+2*ebx]    lea     ecx,    [ecx+2*edx]%endmacro%macro SAD_INC_2x4P 0    movd    mm1,    [eax]    movd    mm2,    [ecx]    movd    mm3,    [eax+ebx]    movd    mm4,    [ecx+edx]    psadbw  mm1,    mm2    psadbw  mm3,    mm4    paddw   mm0,    mm1    paddw   mm0,    mm3    lea     eax,    [eax+2*ebx]    lea     ecx,    [ecx+2*edx]%endmacro%macro SSD_INC_1x16P 0    movq    mm1,    [eax]    movq    mm2,    [ecx]    movq    mm3,    [eax+8]    movq    mm4,    [ecx+8]    movq    mm5,    mm2    movq    mm6,    mm4    psubusb mm2,    mm1    psubusb mm4,    mm3    psubusb mm1,    mm5    psubusb mm3,    mm6    por     mm1,    mm2    por     mm3,    mm4    movq    mm2,    mm1    movq    mm4,    mm3    punpcklbw mm1,  mm7    punpcklbw mm3,  mm7    punpckhbw mm2,  mm7    punpckhbw mm4,  mm7    pmaddwd mm1,    mm1    pmaddwd mm2,    mm2    pmaddwd mm3,    mm3    pmaddwd mm4,    mm4    add     eax,    ebx    add     ecx,    edx    paddd   mm0,    mm1    paddd   mm0,    mm2    paddd   mm0,    mm3    paddd   mm0,    mm4%endmacro%macro SSD_INC_1x8P 0    movq    mm1,    [eax]    movq    mm2,    [ecx]    movq    mm5,    mm2    psubusb mm2,    mm1    psubusb mm1,    mm5    por     mm1,    mm2         ; mm1 = 8bit abs diff    movq    mm2,    mm1    punpcklbw mm1,  mm7    punpckhbw mm2,  mm7         ; (mm1,mm2) = 16bit abs diff    pmaddwd mm1,    mm1    pmaddwd mm2,    mm2    add     eax,    ebx    add     ecx,    edx    paddd   mm0,    mm1    paddd   mm0,    mm2%endmacro%macro SSD_INC_1x4P 0    movd    mm1,    [eax]    movd    mm2,    [ecx]    movq    mm5,    mm2    psubusb mm2,    mm1    psubusb mm1,    mm5    por     mm1,    mm2    punpcklbw mm1,  mm7    pmaddwd mm1,    mm1    add     eax,    ebx    add     ecx,    edx    paddd   mm0,    mm1%endmacro%macro SSD_INC_8x16P 0    SSD_INC_1x16P    SSD_INC_1x16P    SSD_INC_1x16P    SSD_INC_1x16P    SSD_INC_1x16P    SSD_INC_1x16P    SSD_INC_1x16P    SSD_INC_1x16P%endmacro%macro SSD_INC_4x8P 0    SSD_INC_1x8P    SSD_INC_1x8P    SSD_INC_1x8P    SSD_INC_1x8P%endmacro%macro SSD_INC_4x4P 0    SSD_INC_1x4P    SSD_INC_1x4P    SSD_INC_1x4P    SSD_INC_1x4P%endmacro%macro LOAD_DIFF_4P 5  ; MMP, MMT, MMZ, [pix1], [pix2]    movd        %1, %4    punpcklbw   %1, %3    movd        %2, %5    punpcklbw   %2, %3    psubw       %1, %2%endmacro%macro LOAD_DIFF_INC_4x4 11 ; p1,p2,p3,p4, t, z, pix1, i_pix1, pix2, i_pix2, offset    LOAD_DIFF_4P %1, %5, %6, [%7+%11],    [%9+%11]    LOAD_DIFF_4P %2, %5, %6, [%7+%8+%11], [%9+%10+%11]    lea %7, [%7+2*%8]    lea %9, [%9+2*%10]    LOAD_DIFF_4P %3, %5, %6, [%7+%11],    [%9+%11]    LOAD_DIFF_4P %4, %5, %6, [%7+%8+%11], [%9+%10+%11]    lea %7, [%7+2*%8]    lea %9, [%9+2*%10]%endmacro%macro HADAMARD4_SUB_BADC 4    paddw %1,   %2    paddw %3,   %4    paddw %2,   %2    paddw %4,   %4    psubw %2,   %1    psubw %4,   %3%endmacro%macro HADAMARD4x4 4    HADAMARD4_SUB_BADC %1, %2, %3, %4    HADAMARD4_SUB_BADC %1, %3, %2, %4%endmacro%macro SBUTTERFLYwd 3    movq        %3, %1    punpcklwd   %1, %2    punpckhwd   %3, %2%endmacro%macro SBUTTERFLYdq 3    movq        %3, %1    punpckldq   %1, %2    punpckhdq   %3, %2%endmacro%macro TRANSPOSE4x4 5   ; abcd-t -> adtc    SBUTTERFLYwd %1, %2, %5    SBUTTERFLYwd %3, %4, %2    SBUTTERFLYdq %1, %3, %4    SBUTTERFLYdq %5, %2, %3%endmacro%macro MMX_ABS 2        ; mma, mmt    pxor    %2, %2    psubw   %2, %1    pmaxsw  %1, %2%endmacro%macro MMX_ABS_SUM 3    ; mma, mmt, mms    pxor    %2, %2    psubw   %2, %1    pmaxsw  %1, %2    paddusw %3, %1%endmacro%macro MMX_SUM_MM 2     ; mmv, mmt    movq    %2, %1    psrlq   %1, 32    paddusw %1, %2    movq    %2, %1    psrlq   %1, 16    paddusw %1, %2    movd    eax,%1    and     eax,0xffff    shr     eax,1%endmacro%macro HADAMARD4x4_FIRST 0    HADAMARD4x4 mm0, mm1, mm2, mm3    TRANSPOSE4x4 mm0, mm1, mm2, mm3, mm4    HADAMARD4x4 mm0, mm3, mm4, mm2    MMX_ABS     mm0, mm7    MMX_ABS_SUM mm3, mm7, mm0    MMX_ABS_SUM mm4, mm7, mm0    MMX_ABS_SUM mm2, mm7, mm0%endmacro%macro HADAMARD4x4_NEXT 0    HADAMARD4x4 mm1, mm2, mm3, mm4    TRANSPOSE4x4 mm1, mm2, mm3, mm4, mm5    HADAMARD4x4 mm1, mm4, mm5, mm3    MMX_ABS_SUM mm1, mm7, mm0    MMX_ABS_SUM mm4, mm7, mm0    MMX_ABS_SUM mm5, mm7, mm0    MMX_ABS_SUM mm3, mm7, mm0%endmacro;=============================================================================; Code;=============================================================================SECTION .textcglobal x264_pixel_sad_16x16_mmxextcglobal x264_pixel_sad_16x8_mmxextcglobal x264_pixel_sad_8x16_mmxextcglobal x264_pixel_sad_8x8_mmxextcglobal x264_pixel_sad_8x4_mmxextcglobal x264_pixel_sad_4x8_mmxextcglobal x264_pixel_sad_4x4_mmxextcglobal x264_pixel_ssd_16x16_mmxextcglobal x264_pixel_ssd_16x8_mmxextcglobal x264_pixel_ssd_8x16_mmxextcglobal x264_pixel_ssd_8x8_mmxextcglobal x264_pixel_ssd_8x4_mmxextcglobal x264_pixel_ssd_4x8_mmxextcglobal x264_pixel_ssd_4x4_mmxextcglobal x264_pixel_satd_4x4_mmxextcglobal x264_pixel_satd_4x8_mmxextcglobal x264_pixel_satd_8x4_mmxextcglobal x264_pixel_satd_8x8_mmxextcglobal x264_pixel_satd_16x8_mmxextcglobal x264_pixel_satd_8x16_mmxextcglobal x264_pixel_satd_16x16_mmxext%macro SAD_START 0    push    ebx    mov     eax,    [esp+ 8]    ; pix1    mov     ebx,    [esp+12]    ; stride1    mov     ecx,    [esp+16]    ; pix2    mov     edx,    [esp+20]    ; stride2    pxor    mm0,    mm0%endmacro%macro SAD_END 0    movd eax,    mm0    pop ebx    ret%endmacroALIGN 16;-----------------------------------------------------------------------------;   int __cdecl x264_pixel_sad_16x16_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_sad_16x16_mmxext:    SAD_START    SAD_INC_2x16P    SAD_INC_2x16P    SAD_INC_2x16P    SAD_INC_2x16P    SAD_INC_2x16P    SAD_INC_2x16P    SAD_INC_2x16P    SAD_INC_2x16P    SAD_ENDALIGN 16;-----------------------------------------------------------------------------;   int __cdecl x264_pixel_sad_16x8_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_sad_16x8_mmxext:    SAD_START    SAD_INC_2x16P    SAD_INC_2x16P    SAD_INC_2x16P    SAD_INC_2x16P    SAD_ENDALIGN 16;-----------------------------------------------------------------------------;   int __cdecl x264_pixel_sad_8x16_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_sad_8x16_mmxext:    SAD_START    SAD_INC_2x8P    SAD_INC_2x8P    SAD_INC_2x8P    SAD_INC_2x8P    SAD_INC_2x8P    SAD_INC_2x8P    SAD_INC_2x8P    SAD_INC_2x8P    SAD_ENDALIGN 16;-----------------------------------------------------------------------------;   int __cdecl x264_pixel_sad_8x8_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_sad_8x8_mmxext:    SAD_START    SAD_INC_2x8P    SAD_INC_2x8P    SAD_INC_2x8P    SAD_INC_2x8P    SAD_ENDALIGN 16;-----------------------------------------------------------------------------;   int __cdecl x264_pixel_sad_8x4_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_sad_8x4_mmxext:    SAD_START    SAD_INC_2x8P    SAD_INC_2x8P    SAD_ENDALIGN 16;-----------------------------------------------------------------------------;   int __cdecl x264_pixel_sad_4x8_mmxext (uint8_t *, int, uint8_t *, int );-----------------------------------------------------------------------------x264_pixel_sad_4x8_mmxext:    SAD_START

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -