📄 sad_sse2.asm

📁 这是一个压缩解压包,用C语言进行编程的,里面有详细的源代码.
💻 ASM
字号:
;/****************************************************************************; *; *  XVID MPEG-4 VIDEO CODEC; *  - SSE2 optimized SAD operators -; *; *  Copyright(C) 2003 Pascal Massimino <skal@planet-d.net>; *; *; *  This program is free software; you can redistribute it and/or modify it; *  under the terms of the GNU General Public License as published by; *  the Free Software Foundation; either version 2 of the License, or; *  (at your option) any later version.; *; *  This program is distributed in the hope that it will be useful,; *  but WITHOUT ANY WARRANTY; without even the implied warranty of; *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the; *  GNU General Public License for more details.; *; *  You should have received a copy of the GNU General Public License; *  along with this program; if not, write to the Free Software; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA; *; * $Id: sad_sse2.asm,v 1.12 2004/08/29 10:02:38 edgomez Exp $; *; ***************************************************************************/BITS 32%macro cglobal 1	%ifdef PREFIX		%ifdef MARK_FUNCS			global _%1:function %1.endfunc-%1			%define %1 _%1:function %1.endfunc-%1		%else			global _%1			%define %1 _%1		%endif	%else		%ifdef MARK_FUNCS			global %1:function %1.endfunc-%1		%else			global %1		%endif	%endif%endmacro;=============================================================================; Read only data;=============================================================================%ifdef FORMAT_COFFSECTION .rodata%elseSECTION .rodata align=16%endifALIGN 64zero    times 4   dd 0;=============================================================================; Code;=============================================================================SECTION .textcglobal  sad16_sse2cglobal  dev16_sse2;-----------------------------------------------------------------------------; uint32_t sad16_sse2 (const uint8_t * const cur, <- assumed aligned!;                      const uint8_t * const ref,;	                   const uint32_t stride,;                      const uint32_t /*ignored*/);;-----------------------------------------------------------------------------%macro SAD_16x16_SSE2 0  movdqu  xmm0, [edx]  movdqu  xmm1, [edx+ecx]  lea edx,[edx+2*ecx]  movdqa  xmm2, [eax]  movdqa  xmm3, [eax+ecx]  lea eax,[eax+2*ecx]  psadbw  xmm0, xmm2  paddusw xmm6,xmm0  psadbw  xmm1, xmm3  paddusw xmm6,xmm1%endmacroALIGN 16sad16_sse2:  mov eax, [esp+ 4] ; cur (assumed aligned)  mov edx, [esp+ 8] ; ref  mov ecx, [esp+12] ; stride  pxor xmm6, xmm6 ; accum  SAD_16x16_SSE2  SAD_16x16_SSE2  SAD_16x16_SSE2  SAD_16x16_SSE2  SAD_16x16_SSE2  SAD_16x16_SSE2  SAD_16x16_SSE2  SAD_16x16_SSE2  pshufd  xmm5, xmm6, 00000010b  paddusw xmm6, xmm5  pextrw  eax, xmm6, 0  ret.endfunc;-----------------------------------------------------------------------------; uint32_t dev16_sse2(const uint8_t * const cur, const uint32_t stride);;-----------------------------------------------------------------------------%macro MEAN_16x16_SSE2 0  ; eax: src, ecx:stride, mm7: zero or mean => mm6: result  movdqu xmm0, [eax]  movdqu xmm1, [eax+ecx]  lea eax, [eax+2*ecx]    ; + 2*stride  psadbw xmm0, xmm7  paddusw xmm6, xmm0  psadbw xmm1, xmm7  paddusw xmm6, xmm1%endmacroALIGN 16dev16_sse2:  mov eax, [esp+ 4]   ; src  mov ecx, [esp+ 8]   ; stride  pxor xmm6, xmm6     ; accum  pxor xmm7, xmm7     ; zero  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  mov eax, [esp+ 4]       ; src again  pshufd   xmm7, xmm6, 10b  paddusw  xmm7, xmm6  pxor     xmm6, xmm6     ; zero accum  psrlw    xmm7, 8        ; => Mean  pshuflw  xmm7, xmm7, 0  ; replicate Mean  packuswb xmm7, xmm7  pshufd   xmm7, xmm7, 00000000b  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  pshufd   xmm7, xmm6, 10b  paddusw  xmm7, xmm6  pextrw eax, xmm7, 0  ret.endfunc
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -