📄 colorspace_yuv_mmx.asm

📁 这是一个压缩解压包,用C语言进行编程的,里面有详细的源代码.
💻 ASM
字号:
;/****************************************************************************; *; *  XVID MPEG-4 VIDEO CODEC; *  - MMX and XMM YV12->YV12 conversion -; *; *  Copyright(C) 2001 Michael Militzer <isibaar@xvid.org>; *; *  This program is free software; you can redistribute it and/or modify it; *  under the terms of the GNU General Public License as published by; *  the Free Software Foundation; either version 2 of the License, or; *  (at your option) any later version.; *; *  This program is distributed in the hope that it will be useful,; *  but WITHOUT ANY WARRANTY; without even the implied warranty of; *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the; *  GNU General Public License for more details.; *; *  You should have received a copy of the GNU General Public License; *  along with this program; if not, write to the Free Software; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA; *; * $Id: colorspace_yuv_mmx.asm,v 1.5 2004/08/29 10:02:38 edgomez Exp $; *; ***************************************************************************/BITS 32%macro cglobal 1	%ifdef PREFIX		%ifdef MARK_FUNCS			global _%1:function %1.endfunc-%1			%define %1 _%1:function %1.endfunc-%1		%else			global _%1			%define %1 _%1		%endif	%else		%ifdef MARK_FUNCS			global %1:function %1.endfunc-%1		%else			global %1		%endif	%endif%endmacro;=============================================================================; Helper macros;=============================================================================;------------------------------------------------------------------------------; PLANE_COPY ( DST, DST_DIF, SRC, SRC_DIF, WIDTH, HEIGHT, OPT ); DST		dst buffer; DST_DIF	dst stride difference (e.g. stride - width); SRC		src destination buffer; SRC_DIF	src stride difference (e.g. stride - width); WIDTH		width; HEIGHT	height; OPT		0=plain mmx, 1=xmm;------------------------------------------------------------------------------%macro	PLANE_COPY	7%define DST			%1%define DST_DIF		%2%define SRC			%3%define SRC_DIF		%4%define WIDTH		%5%define HEIGHT		%6%define OPT			%7  mov eax, WIDTH  mov ebp, HEIGHT           ; $ebp$ = height  mov esi, SRC  mov edi, DST  mov ebx, eax  shr eax, 6                ; $eax$ = width / 64  and ebx, 63               ; remainder = width % 64  mov edx, ebx  shr ebx, 4                ; $ebx$ = remainder / 16  and edx, 15               ; $edx$ = remainder % 16%%loop64_start  or eax, eax  jz %%loop16_start  mov ecx, eax              ; width64%%loop64:%if OPT == 1                ; xmm  prefetchnta [esi + 64]    ; non temporal prefetch  prefetchnta [esi + 96]%endif  movq mm1, [esi]           ; read from src  movq mm2, [esi + 8]  movq mm3, [esi + 16]  movq mm4, [esi + 24]  movq mm5, [esi + 32]  movq mm6, [esi + 40]  movq mm7, [esi + 48]  movq mm0, [esi + 56]%if OPT == 0                ; plain mmx  movq [edi], mm1           ; write to y_out  movq [edi + 8], mm2  movq [edi + 16], mm3  movq [edi + 24], mm4  movq [edi + 32], mm5  movq [edi + 40], mm6  movq [edi + 48], mm7  movq [edi + 56], mm0%else  movntq [edi], mm1         ; write to y_out  movntq [edi + 8], mm2  movntq [edi + 16], mm3  movntq [edi + 24], mm4  movntq [edi + 32], mm5  movntq [edi + 40], mm6  movntq [edi + 48], mm7  movntq [edi + 56], mm0%endif  add esi, 64  add edi, 64  dec ecx  jnz %%loop64%%loop16_start  or ebx, ebx  jz %%loop1_start  mov ecx, ebx              ; width16%%loop16:  movq mm1, [esi]  movq mm2, [esi + 8]%if OPT == 0                ; plain mmx  movq [edi], mm1  movq [edi + 8], mm2%else  movntq [edi], mm1  movntq [edi + 8], mm2%endif  add esi, 16  add edi, 16  dec ecx  jnz %%loop16%%loop1_start  mov ecx, edx  rep movsb  add esi, SRC_DIF  add edi, DST_DIF  dec ebp  jnz near %%loop64_start%endmacro;------------------------------------------------------------------------------; MAKE_YV12_TO_YV12( NAME, OPT ); NAME	function name; OPT	0=plain mmx, 1=xmm;; yv12_to_yv12_mmx(uint8_t * y_dst, uint8_t * u_dst, uint8_t * v_dst,; 				int y_dst_stride, int uv_dst_stride,; 				uint8_t * y_src, uint8_t * u_src, uint8_t * v_src,; 				int y_src_stride, int uv_src_stride,; 				int width, int height, int vflip);------------------------------------------------------------------------------%macro	MAKE_YV12_TO_YV12	2%define	NAME		%1%define	OPT			%2ALIGN 16cglobal NAMENAME:%define pushsize	16%define localsize	24%define vflip			esp + localsize + pushsize + 52%define height			esp + localsize + pushsize + 48%define width        	esp + localsize + pushsize + 44%define uv_src_stride	esp + localsize + pushsize + 40%define y_src_stride	esp + localsize + pushsize + 36%define v_src			esp	+ localsize + pushsize + 32%define u_src   		esp + localsize + pushsize + 28%define y_src		    esp + localsize + pushsize + 24%define uv_dst_stride	esp + localsize + pushsize + 20%define y_dst_stride	esp + localsize + pushsize + 16%define v_dst			esp	+ localsize + pushsize + 12%define u_dst   		esp + localsize + pushsize + 8%define y_dst		    esp + localsize + pushsize + 4%define _ip				esp + localsize + pushsize + 0  push ebx	;	esp + localsize + 16  push esi	;	esp + localsize + 8  push edi	;	esp + localsize + 4  push ebp	;	esp + localsize + 0%define width2			esp + localsize - 4%define height2			esp + localsize - 8%define y_src_dif		esp + localsize - 12%define y_dst_dif		esp + localsize - 16%define uv_src_dif		esp + localsize - 20%define uv_dst_dif		esp + localsize - 24  sub esp, localsize  mov eax, [width]  mov ebx, [height]  shr eax, 1                    ; calculate widht/2, heigh/2  shr ebx, 1  mov [width2], eax  mov [height2], ebx  mov ebp, [vflip]  or ebp, ebp  jz near .dont_flip; flipping support  mov eax, [height]  mov esi, [y_src]  mov edx, [y_src_stride]  push edx  mul edx  pop edx  add esi, eax                  ; y_src += (height-1) * y_src_stride  neg edx  mov [y_src], esi  mov [y_src_stride], edx       ; y_src_stride = -y_src_stride  mov eax, [height2]  mov esi, [u_src]  mov edi, [v_src]  mov edx, [uv_src_stride]  sub eax, 1                    ; ebp = height2 - 1  push edx  mul edx  pop edx  add esi, eax                  ; u_src += (height2-1) * uv_src_stride  add edi, eax                  ; v_src += (height2-1) * uv_src_stride  neg edx  mov [u_src], esi  mov [v_src], edi  mov [uv_src_stride], edx      ; uv_src_stride = -uv_src_stride.dont_flip  mov eax, [y_src_stride]  mov ebx, [y_dst_stride]  mov ecx, [uv_src_stride]  mov edx, [uv_dst_stride]  sub eax, [width]  sub ebx, [width]  sub ecx, [width2]  sub edx, [width2]  mov [y_src_dif], eax      ; y_src_dif = y_src_stride - width  mov [y_dst_dif], ebx      ; y_dst_dif = y_dst_stride - width  mov [uv_src_dif], ecx     ; uv_src_dif = uv_src_stride - width2  mov [uv_dst_dif], edx     ; uv_dst_dif = uv_dst_stride - width2  PLANE_COPY [y_dst], [y_dst_dif],  [y_src], [y_src_dif],  [width],  [height], OPT  PLANE_COPY [u_dst], [uv_dst_dif], [u_src], [uv_src_dif], [width2], [height2], OPT  PLANE_COPY [v_dst], [uv_dst_dif], [v_src], [uv_src_dif], [width2], [height2], OPT  add esp, localsize  pop ebp  pop edi  pop esi  pop ebx  ret.endfunc%endmacro;=============================================================================; Code;=============================================================================SECTION .textMAKE_YV12_TO_YV12	yv12_to_yv12_mmx, 0MAKE_YV12_TO_YV12	yv12_to_yv12_xmm, 1
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -