📄 p2pyuv.asm
字号:
;; ***** BEGIN LICENSE BLOCK *****; Source last modified: $Id: p2pyuv.asm,v 1.1.1.1.50.1 2004/07/09 02:00:18 hubbe Exp $; ; Portions Copyright (c) 1995-2004 RealNetworks, Inc. All Rights Reserved.; ; The contents of this file, and the files included with this file,; are subject to the current version of the RealNetworks Public; Source License (the "RPSL") available at; http://www.helixcommunity.org/content/rpsl unless you have licensed; the file under the current version of the RealNetworks Community; Source License (the "RCSL") available at; http://www.helixcommunity.org/content/rcsl, in which case the RCSL; will apply. You may also obtain the license terms directly from; RealNetworks. You may not use this file except in compliance with; the RPSL or, if you have a valid RCSL with RealNetworks applicable; to this file, the RCSL. Please see the applicable RPSL or RCSL for; the rights, obligations and limitations governing use of the; contents of the file.; ; Alternatively, the contents of this file may be used under the; terms of the GNU General Public License Version 2 or later (the; "GPL") in which case the provisions of the GPL are applicable; instead of those above. If you wish to allow use of your version of; this file only under the terms of the GPL, and not to allow others; to use your version of this file under the terms of either the RPSL; or RCSL, indicate your decision by deleting the provisions above; and replace them with the notice and other provisions required by; the GPL. If you do not delete the provisions above, a recipient may; use your version of this file under the terms of any one of the; RPSL, the RCSL or the GPL.; ; This file is part of the Helix DNA Technology. RealNetworks is the; developer of the Original Code and owns the copyrights in the; portions it created.; ; This file, and the files included with this file, is distributed; and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY; KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS; ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES; OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET; ENJOYMENT OR NON-INFRINGEMENT.; ; Technology Compatibility Kit Test Suite(s) Location:; http://www.helixcommunity.org/content/tck; ; Contributor(s):; ; ***** END LICENSE BLOCK *****;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Packed YUV To PlanarYUV MMX converters.;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; YUY2ToPlanarYUV_MMX%ifdef COFF %define YUY2ToPlanarYUV_MMX _YUY2ToPlanarYUV_MMX %define UYVYToPlanarYUV_MMX _UYVYToPlanarYUV_MMX%elifdef WIN32 %define YUY2ToPlanarYUV_MMX _YUY2ToPlanarYUV_MMX %define UYVYToPlanarYUV_MMX _UYVYToPlanarYUV_MMX%elifdef ELF %define YUY2ToPlanarYUV_MMX YUY2ToPlanarYUV_MMX %define UYVYToPlanarYUV_MMX UYVYToPlanarYUV_MMX%elifdef AOUTB %define YUY2ToPlanarYUV_MMX YUY2ToPlanarYUV_MMX %define UYVYToPlanarYUV_MMX UYVYToPlanarYUV_MMX%else %error linking format currently not supported by alphbablend.asm%endif ;; Export the functions implemented here. global YUY2ToPlanarYUV_MMX global UYVYToPlanarYUV_MMX;========================= DATA SEGMENT ============================section .dataalign 8 MaskChroma dd 0xFF00FF00, 0xFF00FF00 MaskLuma dd 0x00FF00FF, 0x00FF00FF ;============================= CODE SEGMENT ======================= section .text;;;; This is our stack params definition. It is used for both;; YUY2 and UYVY routines as they both take the same parms.;; %assign numtemps 4%define var(a) [esp+a] struc parms ;Temps on stack .tmp1 resd 1 ;General DWORD temp. .tmp2 resd 1 ;General DWORD temp. .tmp3 resd 1 ;General DWORD temp. .tmp4 resd 1 ;General DWORD temp. ; Space for reg pushes and return address. .registers resd 6 ;pushed registers .return resd 1 ;return address ; input params .d1: resd 1 ;unsigned char* d1, .d2: resd 1 ;unsigned char* d2, .du: resd 1 ;unsigned char* du, .dv: resd 1 ;unsigned char* dv, .dyPitch: resd 1 ;INT32 dyPitch, .duPitch: resd 1 ;INT32 duPitch, .dvPitch: resd 1 ;INT32 dvPitch, .dest_dx: resd 1 ;INT32 dest_dx, .dest_dy: resd 1 ;INT32 dest_dy, .s1: resd 1 ;unsigned char* s1, .s2: resd 1 ;unsigned char* s2, .src_pitch: resd 1 ;INT32 src_pitchendstruc;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Our UYVY to Planar YUV MMX;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;UYVYToPlanarYUV_MMX: ;; Save some stuff... push ebx push edi push esi push ebp push ecx push edx ; Make room for temps on stack sub esp, numtemps*4; ;; Load our dest chroma pointers. movd mm4, var(parms.du) ; mm4 = du movd mm5, var(parms.dv) ; mm5 = dv ;; Set up the loops mov eax, var(parms.dest_dy) mov ecx, var(parms.dest_dx) shr eax, 1 ; eax = dest_dy/2 shr ecx, 2 ; ecx = dest_dx/4 jnc even1 ; Was dest_dx divisible by 4?odd1: ; We have 1 macro pixel left over mov DWORD var(parms.tmp3), 1 ; Store the fact that we got odd marco pixels. jmp cont1even1: mov DWORD var(parms.tmp3), 0 ; We have even pixels... cont1: mov var(parms.tmp1), ecx ; save dx loop count mov var(parms.tmp2), eax ; save dx loop count xor ecx, ecx mov edi, var(parms.s1) ;s1 mov esi, var(parms.s2) ;s2 mov ebx, var(parms.d1) ;d1 mov ebp, var(parms.d2) ;d2 pxor mm7, mm7 ; mm7 = 00000000 00000000 movq mm6, [MaskLuma] ; mm6 = Mask out Luma value. xor eax, eaxDYLOOP1: DXLOOP1: ;; Process 2 macro pixels at a time, 2 lines at a time. movq mm0, [edi+ecx*8] ; mm0= y4v2y3u2 y2v1y1u1 movq mm1, [esi+ecx*8] ; mm1= y4v2y3u2 y2v1y1u1 ;; These two lines are the only difference between the two ;; YUY2 and UYVY conversion routines. I really should make ;; time to combine the 2 routines to save a little download ;; space. ;; Byte swap the 2 MMX registers. ;; s1 movq mm2, mm0 movq mm3, mm0 psrlq mm2, 8 ; mm2 = 00y4v2y3 u2y2v1y1 psllq mm3, 8 ; mm3 = v2y3u2y2 v1y1u100 pand mm3, [MaskChroma] ; mm3 = v200u200 v100u100 pand mm2, mm6 ; mm2 = 00y400y3 00y200y1 por mm2, mm3 ; mm2 = v2y4u2y3 v1y2u1y1 movq mm0, mm2 ;; s2 movq mm2, mm1 movq mm3, mm1 psrlq mm2, 8 ; mm2 = 00y4v2y3 u2y2v1y1 psllq mm3, 8 ; mm3 = v2y3u2y2 v1y1u100 pand mm3, [MaskChroma] ; mm3 = v200u200 v100u100 pand mm2, mm6 ; mm2 = 00y400y3 00y200y1 por mm2, mm3 ; mm2 = v2y4u2y3 v1y2u1y1 movq mm1, mm2 ;; Store luma values in planar YUV space movq mm2, mm0 movq mm3, mm1 pand mm2, mm6 ; mm2 = 00Y400Y3 00Y200Y1 of s1 pand mm3, mm6 ; mm3 = 00Y400Y3 00Y200Y1 of s1 packuswb mm2, mm7 ; mm2 = 00000000 Y4Y3Y2Y1 of s1 packuswb mm3, mm7 ; mm3 = 00000000 Y4Y3Y2Y1 of s2 movd [ebx+ecx*4], mm2 ; d1=s1 psrlw mm0, 8 ; mm0 = 00v200u2 00v100u1 of S1 movd [ebp+ecx*4], mm3 ; d2=s2 ;;Compute averaged chroma values psrlw mm1, 8 ; mm1 = 00v200u2 00v100u1 of S2 paddw mm0, mm1 ; mm0 = v2v2u2u2 v1v1u1u1 s1+s2 psrlw mm0, 1 ; mm0 = 00v200u2 00v100u1 (s1+s2)/2 ;; ;;Store chromas in du and dv. ;; movd mm3, esi ;save esi ;; unpack and pack packuswb mm0, mm0 ; mm0 = 00000000 v2u2v2u1 punpcklbw mm0, mm0 ; mm0 = v2v2u2u2 v1v1u1u1 movd esi, mm4 ; esi = dU movq mm1, mm0 psrlq mm1, 32 ; mm1 = 0000000 v2v2u2u2 punpcklbw mm0, mm1 ; mm0 = v2v1v2v1 u2u1u2u1 ;; Store the U data movd edx, mm0 ; edx = u1u2u1u2 psrlq mm0, 32 ; mm0 = 00000000 v1v2v1v2 mov WORD [esi+ecx*2], dx ; store 2 bytes of U data. ;; Store the V data movd esi, mm5 ; esi = dV movd edx, mm0 ; ecx = v1v2v1v2 mov WORD [esi+ecx*2], dx ; store 2 bytes of V data. movd esi, mm3 ;; inc DX counter and loop inc ecx cmp ecx, var(parms.tmp1) jne NEAR DXLOOP1 ;; Now we have to check for any pixels left over if dest_dx ;; is not divisible by 4. Since dest_dx must be at least even ;; we can only have 0 or two(1 macro pixel). mov ecx, var(parms.tmp3) jz nextline1 ;; ;; Do odd marco pixel here. =========== ;; movd mm0, [edi+ecx*8] ; mm0= 00000000 v1y2u1y1 movd mm1, [esi+ecx*8] ; mm1= 00000000 v1y2u1y1 ;; Store luma values in planar YUV space movq mm2, mm0 movq mm3, mm1 pand mm2, mm6 ; mm2 = 00000000 00Y100Y2 of s1 pand mm3, mm6 ; mm3 = 00000000 00Y100Y2 of s1 packuswb mm2, mm7 ; mm2 = 00000000 0000Y2Y1 of s1 packuswb mm3, mm7 ; mm3 = 00000000 0000Y2Y1 of s2 movd edx, mm2 ; grab lower 32 bits of mm2 mov WORD [ebx+ecx*4], dx ; Just store 16 bits of ecx movd edx, mm3 ; grab lower 32 bits of mm3 mov WORD [ebp+ecx*4], dx ; just store 16 bits of ecx ;;Compute averaged chroma values psrlw mm0, 8 ; mm0 = 00000000 00v100u1 of S1 psrlw mm1, 8 ; mm1 = 00000000 00v100u1 of S2
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -