📄 sad_mmx.asm

📁 这是一个压缩解压包,用C语言进行编程的,里面有详细的源代码.
💻 ASM
字号:
;/****************************************************************************; *; *  XVID MPEG-4 VIDEO CODEC; *  - K7 optimized SAD operators -; *; *  Copyright(C) 2001 Peter Ross <pross@xvid.org>; *               2002 Pascal Massimino <skal@planet-d.net>; *               2004 Andre Werthmann <wertmann@aei.mpg.de>; *; *  This program is free software; you can redistribute it and/or modify it; *  under the terms of the GNU General Public License as published by; *  the Free Software Foundation; either version 2 of the License, or; *  (at your option) any later version.; *; *  This program is distributed in the hope that it will be useful,; *  but WITHOUT ANY WARRANTY; without even the implied warranty of; *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the; *  GNU General Public License for more details.; *; *  You should have received a copy of the GNU General Public License; *  along with this program; if not, write to the Free Software; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA; *; * $Id: sad_mmx.asm,v 1.1 2005/01/05 23:02:15 edgomez Exp $; *; ***************************************************************************/BITS 64%macro cglobal 1	%ifdef PREFIX		%ifdef MARK_FUNCS			global _%1:function %1.endfunc-%1			%define %1 _%1:function %1.endfunc-%1		%else			global _%1			%define %1 _%1		%endif	%else		%ifdef MARK_FUNCS			global %1:function %1.endfunc-%1		%else			global %1		%endif	%endif%endmacro;=============================================================================; Read only data;=============================================================================%ifdef FORMAT_COFFSECTION .rodata%elseSECTION .rodata align=16%endif;=============================================================================; Code;=============================================================================SECTION .text align=16cglobal sse8_16bit_x86_64cglobal sse8_8bit_x86_64;-----------------------------------------------------------------------------;; uint32_t sse8_16bit_x86_64x(const int16_t *b1,;                         const int16_t *b2,;                         const uint32_t stride);;;-----------------------------------------------------------------------------%macro ROW_SSE_16Bit_MMX 2  movq mm0, [%1]  movq mm1, [%1+8]  psubw mm0, [%2]  psubw mm1, [%2+8]  pmaddwd mm0, mm0  pmaddwd mm1, mm1  paddd mm2, mm0  paddd mm2, mm1%endmacro		sse8_16bit_x86_64:  ; rdx is stride  ; rsi is b2  ; rdi is b1  ;; Reset the sse accumulator  pxor mm2, mm2  ;; Let's go%rep 8  ROW_SSE_16Bit_MMX rsi, rdi  lea rsi, [rsi+rdx]  lea rdi, [rdi+rdx]%endrep  ;; Finish adding each dword of the accumulator  movq mm3, mm2  psrlq mm2, 32  paddd mm2, mm3  movd eax, mm2  ;; All done  ret.endfunc  ;-----------------------------------------------------------------------------;; uint32_t sse8_8bit_x86_64(const int8_t *b1,;                        const int8_t *b2,;                        const uint32_t stride);;;-----------------------------------------------------------------------------%macro ROW_SSE_8bit_MMX 2  movq mm0, [%1] ; load a row  movq mm2, [%2] ; load a row  movq mm1, mm0  ; copy row  movq mm3, mm2  ; copy row  punpcklbw mm0, mm7 ; turn the 4low elements into 16bit  punpckhbw mm1, mm7 ; turn the 4high elements into 16bit  punpcklbw mm2, mm7 ; turn the 4low elements into 16bit  punpckhbw mm3, mm7 ; turn the 4high elements into 16bit  psubw mm0, mm2 ; low  part of src-dst  psubw mm1, mm3 ; high part of src-dst  pmaddwd mm0, mm0 ; compute the square sum  pmaddwd mm1, mm1 ; compute the square sum  paddd mm6, mm0 ; add to the accumulator  paddd mm6, mm1 ; add to the accumulator%endmacrosse8_8bit_x86_64:  ;; Reset the sse accumulator  pxor mm6, mm6  ;; Used to interleave 8bit data with 0x00 values  pxor mm7, mm7  ;; Let's go%rep 8  ROW_SSE_8bit_MMX rsi, rdi  lea rsi, [rsi+rdx]  lea rdi, [rdi+rdx]%endrep  ;; Finish adding each dword of the accumulator  movq mm7, mm6  psrlq mm6, 32  paddd mm6, mm7  movd eax, mm6  ;; All done  ret.endfunc
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -