⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 colorspace_yuyv_mmx.asm

📁 mpeg4 video codec mpeg4 video codec
💻 ASM
字号:
;/****************************************************************************; *; *  XVID MPEG-4 VIDEO CODEC; *  - MMX and XMM YUYV<->YV12 conversion -; *; *  Copyright(C) 2002 Peter Ross <pross@xvid.org>; *; *  This program is free software; you can redistribute it and/or modify it; *  under the terms of the GNU General Public License as published by; *  the Free Software Foundation; either version 2 of the License, or; *  (at your option) any later version.; *; *  This program is distributed in the hope that it will be useful,; *  but WITHOUT ANY WARRANTY; without even the implied warranty of; *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the; *  GNU General Public License for more details.; *; *  You should have received a copy of the GNU General Public License; *  along with this program; if not, write to the Free Software; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA; *; * $Id: colorspace_yuyv_mmx.asm,v 1.1 2006/02/23 15:13:28 kevin-fu Exp $; *; ***************************************************************************/BITS 32%macro cglobal 1	%ifdef PREFIX		%ifdef MARK_FUNCS			global _%1:function %1.endfunc-%1			%define %1 _%1:function %1.endfunc-%1		%else			global _%1			%define %1 _%1		%endif	%else		%ifdef MARK_FUNCS			global %1:function %1.endfunc-%1		%else			global %1		%endif	%endif%endmacro;=============================================================================; Read only data;=============================================================================%ifdef FORMAT_COFFSECTION .rodata%elseSECTION .rodata align=16%endif;-----------------------------------------------------------------------------; yuyv/uyvy mask for extracting yuv components;-----------------------------------------------------------------------------;				y     u     y     v     y     u     y     vALIGN 16yuyv_mask:	db 0xff,  0,  0xff,   0,   0xff,  0,   0xff,  0mmx_one:    dw 1, 1, 1, 1;=============================================================================; helper macros used with colorspace_mmx.inc;=============================================================================;-----------------------------------------------------------------------------; YUYV_TO_YV12( TYPE, PAVG );; TYPE	0=yuyv, 1=uyvy; PAVG  0=mmx, pavgusb=3dnow, pavgb=xmm;; bytes=2, pixels = 8, vpixels=2;-----------------------------------------------------------------------------%macro YUYV_TO_YV12_INIT		2  movq mm7, [yuyv_mask]%endmacro%macro YUYV_TO_YV12             2  movq mm0, [edi]               ; x_ptr[0]  movq mm1, [edi + 8]           ; x_ptr[8]  movq mm2, [edi + edx]         ; x_ptr[x_stride + 0]  movq mm3, [edi + edx + 8]     ; x_ptr[x_stride + 8]    ; average uv-components;---[ plain mmx ]----------------------------------------------------%ifidn %2,0     ; if (%2 eq "0")  movq mm4, mm0  movq mm5, mm2%if %1 == 0         ; yuyv  psrlw mm4, 8  psrlw mm5, 8%endif  pand mm4, mm7  pand mm5, mm7  paddw mm4, mm5  movq mm5, mm1  movq mm6, mm3%if %1 == 0         ; yuyv  psrlw mm5, 8  psrlw mm6, 8%endif  pand mm5, mm7  pand mm6, mm7  paddw mm5, mm6  paddw mm4, [mmx_one]      ; +1 rounding  paddw mm5, [mmx_one]      ;  psrlw mm4, 1  psrlw mm5, 1;---[ 3dnow/xmm ]----------------------------------------------------%else  movq mm4, mm0  movq mm5, mm1  %2 mm4, mm2           ;pavgb/pavgusb mm4, mm2  %2 mm5, mm3           ;pavgb/pavgusb mm5, mm3  ;;movq mm6, mm0       ; 0 rounding  ;;pxor mm6, mm2       ;  ;;psubb mm4, mm6      ;  ;;movq mm6, mm1       ;  ;;pxor mm6, mm3       ;  ;;psubb mm5, mm5      ;%if %1 == 0             ; yuyv  psrlw mm4, 8  psrlw mm5, 8%endif  pand mm4, mm7  pand mm5, mm7%endif;--------------------------------------------------------------------    ; write y-component%if %1 == 1         ; uyvy  psrlw mm0, 8  psrlw mm1, 8  psrlw mm2, 8  psrlw mm3, 8%endif  pand mm0, mm7  pand mm1, mm7  pand mm2, mm7  pand mm3, mm7  packuswb mm0, mm1  packuswb mm2, mm3%ifidn %2,pavgb         ; xmm  movntq [esi], mm0  movntq [esi+eax], mm2%else                   ; plain mmx,3dnow  movq [esi], mm0  movq [esi+eax], mm2%endif    ; write uv-components  packuswb mm4, mm5  movq mm5, mm4  psrlq mm4, 8  pand mm5, mm7  pand mm4, mm7  packuswb mm5,mm5  packuswb mm4,mm4  movd [ebx],mm5  movd [ecx],mm4%endmacro;-----------------------------------------------------------------------------; YV12_TO_YUYV( TYPE );; TYPE  0=yuyv, 1=uyvy;; bytes=2, pixels = 8, vpixels=2;-----------------------------------------------------------------------------%macro YV12_TO_YUYV_INIT        2%endmacro%macro YV12_TO_YUYV             2  movd mm4, [ebx]               ; [    |uuuu]  movd mm5, [ecx]               ; [    |vvvv]  movq mm0, [esi]               ; [yyyy|yyyy] ; y row 0  movq mm1, [esi+eax]           ; [yyyy|yyyy] ; y row 1  punpcklbw mm4, mm5            ; [vuvu|vuvu] ; uv row 0%if %1 == 0     ; YUYV  movq mm2, mm0  movq mm3, mm1  punpcklbw mm0, mm4            ; [vyuy|vyuy] ; y row 0 + 0  punpckhbw mm2, mm4            ; [vyuy|vyuy] ; y row 0 + 8  punpcklbw mm1, mm4            ; [vyuy|vyuy] ; y row 1 + 0  punpckhbw mm3, mm4            ; [vyuy|vyuy] ; y row 1 + 8  movq [edi], mm0  movq [edi+8], mm2  movq [edi+edx], mm1  movq [edi+edx+8], mm3%else           ; UYVY  movq mm5, mm4  movq mm6, mm4  movq mm7, mm4  punpcklbw mm4, mm0            ; [yvyu|yvyu]   ; y row 0 + 0  punpckhbw mm5, mm0            ; [yvyu|yvyu]   ; y row 0 + 8  punpcklbw mm6, mm1            ; [yvyu|yvyu]   ; y row 1 + 0  punpckhbw mm7, mm1            ; [yvyu|yvyu]   ; y row 1 + 8  movq [edi], mm4  movq [edi+8], mm5  movq [edi+edx], mm6  movq [edi+edx+8], mm7%endif%endmacro;------------------------------------------------------------------------------; YV12_TO_YUYVI( TYPE );; TYPE  0=yuyv, 1=uyvy;; bytes=2, pixels = 8, vpixels=4;------------------------------------------------------------------------------%macro YV12_TO_YUYVI_INIT       2%endmacro%macro YV12_TO_YUYVI                2  xchg ebp, [uv_stride]  movd mm0, [ebx]               ; [    |uuuu]  movd mm1, [ebx+ebp]           ; [    |uuuu]  punpcklbw mm0, [ecx]          ; [vuvu|vuvu] ; uv row 0  punpcklbw mm1, [ecx+ebp]      ; [vuvu|vuvu] ; uv row 1  xchg ebp, [uv_stride]%if %1 == 0     ; YUYV  movq mm4, [esi]               ; [yyyy|yyyy] ; y row 0  movq mm6, [esi+eax]           ; [yyyy|yyyy] ; y row 1  movq mm5, mm4  movq mm7, mm6  punpcklbw mm4, mm0            ; [yuyv|yuyv] ; y row 0 + 0  punpckhbw mm5, mm0            ; [yuyv|yuyv] ; y row 0 + 8  punpcklbw mm6, mm1            ; [yuyv|yuyv] ; y row 1 + 0  punpckhbw mm7, mm1            ; [yuyv|yuyv] ; y row 1 + 8  movq [edi], mm4  movq [edi+8], mm5  movq [edi+edx], mm6  movq [edi+edx+8], mm7  push esi  push edi  add esi, eax  add edi, edx  movq mm4, [esi+eax]           ; [yyyy|yyyy] ; y row 2  movq mm6, [esi+2*eax]         ; [yyyy|yyyy] ; y row 3  movq mm5, mm4  movq mm7, mm6  punpcklbw mm4, mm0            ; [yuyv|yuyv] ; y row 2 + 0  punpckhbw mm5, mm0            ; [yuyv|yuyv] ; y row 2 + 8  punpcklbw mm6, mm1            ; [yuyv|yuyv] ; y row 3 + 0  punpckhbw mm7, mm1            ; [yuyv|yuyv] ; y row 3 + 8  movq [edi+edx], mm4  movq [edi+edx+8], mm5  movq [edi+2*edx], mm6  movq [edi+2*edx+8], mm7  pop edi  pop esi%else           ; UYVY  movq mm2, [esi]               ; [yyyy|yyyy] ; y row 0  movq mm3, [esi+eax]           ; [yyyy|yyyy] ; y row 1  movq mm4, mm0  movq mm5, mm0  movq mm6, mm1  movq mm7, mm1  punpcklbw mm4, mm2            ; [uyvy|uyvy] ; y row 0 + 0  punpckhbw mm5, mm2            ; [uyvy|uyvy] ; y row 0 + 8  punpcklbw mm6, mm3            ; [uyvy|uyvy] ; y row 1 + 0  punpckhbw mm7, mm3            ; [uyvy|uyvy] ; y row 1 + 8  movq [edi], mm4  movq [edi+8], mm5  movq [edi+edx], mm6  movq [edi+edx+8], mm7  push esi  push edi  add esi, eax  add edi, edx  movq mm2, [esi+eax]           ; [yyyy|yyyy] ; y row 2  movq mm3, [esi+2*eax]         ; [yyyy|yyyy] ; y row 3  movq mm4, mm0  movq mm5, mm0  movq mm6, mm1  movq mm7, mm1  punpcklbw mm4, mm2            ; [uyvy|uyvy] ; y row 2 + 0  punpckhbw mm5, mm2            ; [uyvy|uyvy] ; y row 2 + 8  punpcklbw mm6, mm3            ; [uyvy|uyvy] ; y row 3 + 0  punpckhbw mm7, mm3            ; [uyvy|uyvy] ; y row 3 + 8  movq [edi+edx], mm4  movq [edi+edx+8], mm5  movq [edi+2*edx], mm6  movq [edi+2*edx+8], mm7  pop edi  pop esi%endif%endmacro;=============================================================================; Code;=============================================================================SECTION .text;------------------------------------------------------------------------------;; MAKE_COLORSPACE(NAME,STACK, BYTES,PIXELS,ROWS, FUNC, ARG1);; This macro provides a assembler width/height scroll loop; NAME		function name; STACK		additional stack bytes required by FUNC; BYTES		bytes-per-pixel for the given colorspace; PIXELS	pixels (columns) operated on per FUNC call; VPIXELS	vpixels (rows) operated on per FUNC call; FUNC		conversion macro name; we expect to find FUNC_INIT and FUNC macros; ARG1		argument passed to FUNC; ; throughout the FUNC the registers mean:; eax		y_stride; ebx		u_ptr; ecx		v_ptr; edx		x_stride; esi		y_ptr; edi		x_ptr; ebp		width;;------------------------------------------------------------------------------%macro		MAKE_COLORSPACE			8%define NAME		%1%define STACK		%2%define BYTES		%3%define PIXELS		%4%define VPIXELS		%5%define FUNC		%6%define ARG1		%7%define ARG2		%8	; --- define function global/symbolALIGN 16cglobal NAMENAME:	; --- init stack ---%define pushsize	16%define localsize	20 + STACK%define vflip           esp + localsize + pushsize + 40%define height          esp + localsize + pushsize + 36%define width           esp + localsize + pushsize + 32%define uv_stride       esp + localsize + pushsize + 28%define y_stride        esp + localsize + pushsize + 24%define v_ptr           esp + localsize + pushsize + 20%define u_ptr           esp + localsize + pushsize + 16%define y_ptr           esp + localsize + pushsize + 12%define x_stride        esp + localsize + pushsize + 8%define x_ptr           esp + localsize + pushsize + 4%define _ip             esp + localsize + pushsize + 0  push ebx    ;   esp + localsize + 16  push esi    ;   esp + localsize + 8  push edi    ;   esp + localsize + 4  push ebp    ;   esp + localsize + 0%define x_dif           esp + localsize - 4%define y_dif           esp + localsize - 8%define uv_dif          esp + localsize - 12%define fixed_width     esp + localsize - 16%define tmp_height      esp + localsize - 20    sub esp, localsize    ; --- init varibles ---      mov eax, [width]          ; fixed width  add eax, 15               ;  and eax, ~15              ;  mov [fixed_width],eax     ;  mov ebx, [x_stride]       ;%rep BYTES  sub ebx, eax              ;%endrep  mov [x_dif], ebx          ; x_dif = x_stride - BYTES*fixed_width  mov ebx, [y_stride]       ;  sub ebx, eax              ;  mov [y_dif], ebx          ; y_dif = y_stride - fixed_width  mov ebx, [uv_stride]      ;  mov ecx, eax              ;  shr ecx, 1                ;  sub ebx, ecx              ;  mov [uv_dif], ebx         ; uv_dif = uv_stride - fixed_width/2  mov esi, [y_ptr]          ; $esi$ = y_ptr  mov edi, [x_ptr]          ; $edi$ = x_ptr  mov edx, [x_stride]       ; $edx$ = x_stride  mov ebp, [height]         ; $ebp$ = height  mov ebx, [vflip]  or ebx, ebx  jz .dont_flip    ; --- do flipping ---  xor ebx,ebx%rep BYTES  sub ebx, eax%endrep  sub ebx, edx  mov [x_dif], ebx          ; x_dif = -BYTES*fixed_width - x_stride  mov eax, ebp  sub eax, 1  push edx                  mul edx  pop edx  add edi, eax              ; $edi$ += (height-1) * x_stride  neg edx                   ; x_stride = -x_stride.dont_flip    ; --- begin loop ---  mov eax, [y_stride]       ; $eax$ = y_stride  mov ebx, [u_ptr]          ; $ebx$ = u_ptr  mov ecx, [v_ptr]          ; $ecx$ = v_ptr  FUNC %+ _INIT ARG1, ARG2  ; call FUNC_INIT.y_loop  mov [tmp_height], ebp  mov ebp, [fixed_width].x_loop  FUNC ARG1, ARG2           ; call FUNC  add edi, BYTES*PIXELS     ; x_ptr += BYTES*PIXELS  add esi, PIXELS           ; y_ptr += PIXELS  add ebx, PIXELS/2         ; u_ptr += PIXELS/2  add ecx, PIXELS/2         ; v_ptr += PIXELS/2          sub ebp, PIXELS           ; $ebp$ -= PIXELS  jg .x_loop                ; if ($ebp$ > 0) goto .x_loop  mov ebp, [tmp_height]  add edi, [x_dif]          ; x_ptr += x_dif + (VPIXELS-1)*x_stride  add esi, [y_dif]          ; y_ptr += y_dif + (VPIXELS-1)*y_stride%rep VPIXELS-1  add edi, edx              add esi, eax            %endrep  add ebx, [uv_dif]         ; u_ptr += uv_dif + ((VPIXELS/2)-1)*uv_stride  add ecx, [uv_dif]         ; v_ptr += uv_dif + ((VPIXELS/2)-1)*uv_stride%rep (VPIXELS/2)-1  add ebx, [uv_stride]  add ecx, [uv_stride]%endrep  sub ebp, VPIXELS          ; $ebp$ -= VPIXELS  jg .y_loop                ; if ($ebp$ > 0) goto .y_loop  ; cleanup stack & undef everything  add esp, localsize  pop ebp  pop edi  pop esi  pop ebx%undef vflip%undef height%undef width%undef uv_stride%undef y_stride%undef v_ptr%undef u_ptr%undef y_ptr%undef x_stride%undef x_ptr%undef _ip%undef x_dif%undef y_dif%undef uv_dif%undef fixed_width%undef tmp_height        ret.endfunc%undef NAME%undef STACK%undef BYTES%undef PIXELS%undef VPIXELS%undef FUNC%undef ARG1%endmacro;------------------------------------------------------------------------------; inputMAKE_COLORSPACE	 yuyv_to_yv12_mmx,0,    2,8,2,  YUYV_TO_YV12, 0, 0MAKE_COLORSPACE	 yuyv_to_yv12_3dn,0,    2,8,2,  YUYV_TO_YV12, 0, pavgusbMAKE_COLORSPACE	 yuyv_to_yv12_xmm,0,    2,8,2,  YUYV_TO_YV12, 0, pavgbMAKE_COLORSPACE  uyvy_to_yv12_mmx,0,    2,8,2,  YUYV_TO_YV12, 1, 0MAKE_COLORSPACE  uyvy_to_yv12_3dn,0,    2,8,2,  YUYV_TO_YV12, 1, pavgusbMAKE_COLORSPACE  uyvy_to_yv12_xmm,0,    2,8,2,  YUYV_TO_YV12, 1, pavgb; outputMAKE_COLORSPACE  yv12_to_yuyv_mmx,0,    2,8,2,  YV12_TO_YUYV, 0, -1MAKE_COLORSPACE  yv12_to_uyvy_mmx,0,    2,8,2,  YV12_TO_YUYV, 1, -1MAKE_COLORSPACE  yv12_to_yuyvi_mmx,0,   2,8,4,  YV12_TO_YUYVI, 0, -1MAKE_COLORSPACE  yv12_to_uyvyi_mmx,0,   2,8,4,  YV12_TO_YUYVI, 1, -1

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -