📄 p2pyuv.asm
字号:
;
; ***** BEGIN LICENSE BLOCK *****
; Version: RCSL 1.0/RPSL 1.0
;
; Portions Copyright (c) 1995-2002 RealNetworks, Inc. All Rights Reserved.
;
; The contents of this file, and the files included with this file, are
; subject to the current version of the RealNetworks Public Source License
; Version 1.0 (the "RPSL") available at
; http://www.helixcommunity.org/content/rpsl unless you have licensed
; the file under the RealNetworks Community Source License Version 1.0
; (the "RCSL") available at http://www.helixcommunity.org/content/rcsl,
; in which case the RCSL will apply. You may also obtain the license terms
; directly from RealNetworks. You may not use this file except in
; compliance with the RPSL or, if you have a valid RCSL with RealNetworks
; applicable to this file, the RCSL. Please see the applicable RPSL or
; RCSL for the rights, obligations and limitations governing use of the
; contents of the file.
;
; This file is part of the Helix DNA Technology. RealNetworks is the
; developer of the Original Code and owns the copyrights in the portions
; it created.
;
; This file, and the files included with this file, is distributed and made
; available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
; EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL SUCH WARRANTIES,
; INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS
; FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
;
; Technology Compatibility Kit Test Suite(s) Location:
; http://www.helixcommunity.org/content/tck
;
; Contributor(s):
;
; ***** END LICENSE BLOCK *****
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Packed YUV To PlanarYUV MMX converters.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; YUY2ToPlanarYUV_MMX
%ifdef COFF
%define YUY2ToPlanarYUV_MMX _YUY2ToPlanarYUV_MMX
%define UYVYToPlanarYUV_MMX _UYVYToPlanarYUV_MMX
%elifdef WIN32
%define YUY2ToPlanarYUV_MMX _YUY2ToPlanarYUV_MMX
%define UYVYToPlanarYUV_MMX _UYVYToPlanarYUV_MMX
%elifdef ELF
%define YUY2ToPlanarYUV_MMX YUY2ToPlanarYUV_MMX
%define UYVYToPlanarYUV_MMX UYVYToPlanarYUV_MMX
%elifdef AOUTB
%define YUY2ToPlanarYUV_MMX YUY2ToPlanarYUV_MMX
%define UYVYToPlanarYUV_MMX UYVYToPlanarYUV_MMX
%else
%error linking format currently not supported by alphbablend.asm
%endif
;; Export the functions implemented here.
global YUY2ToPlanarYUV_MMX
global UYVYToPlanarYUV_MMX
;========================= DATA SEGMENT ============================
section .data
align 8
MaskChroma
dd 0xFF00FF00, 0xFF00FF00
MaskLuma
dd 0x00FF00FF, 0x00FF00FF
;============================= CODE SEGMENT =======================
section .text
;;
;; This is our stack params definition. It is used for both
;; YUY2 and UYVY routines as they both take the same parms.
;;
%assign numtemps 4
%define var(a) [esp+a]
struc parms
;Temps on stack
.tmp1 resd 1 ;General DWORD temp.
.tmp2 resd 1 ;General DWORD temp.
.tmp3 resd 1 ;General DWORD temp.
.tmp4 resd 1 ;General DWORD temp.
; Space for reg pushes and return address.
.registers resd 6 ;pushed registers
.return resd 1 ;return address
; input params
.d1: resd 1 ;unsigned char* d1,
.d2: resd 1 ;unsigned char* d2,
.du: resd 1 ;unsigned char* du,
.dv: resd 1 ;unsigned char* dv,
.dyPitch: resd 1 ;INT32 dyPitch,
.duPitch: resd 1 ;INT32 duPitch,
.dvPitch: resd 1 ;INT32 dvPitch,
.dest_dx: resd 1 ;INT32 dest_dx,
.dest_dy: resd 1 ;INT32 dest_dy,
.s1: resd 1 ;unsigned char* s1,
.s2: resd 1 ;unsigned char* s2,
.src_pitch: resd 1 ;INT32 src_pitch
endstruc
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Our UYVY to Planar YUV MMX
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
UYVYToPlanarYUV_MMX:
;; Save some stuff...
push ebx
push edi
push esi
push ebp
push ecx
push edx
; Make room for temps on stack
sub esp, numtemps*4;
;; Load our dest chroma pointers.
movd mm4, var(parms.du) ; mm4 = du
movd mm5, var(parms.dv) ; mm5 = dv
;; Set up the loops
mov eax, var(parms.dest_dy)
mov ecx, var(parms.dest_dx)
shr eax, 1 ; eax = dest_dy/2
shr ecx, 2 ; ecx = dest_dx/4
jnc even1 ; Was dest_dx divisible by 4?
odd1:
; We have 1 macro pixel left over
mov DWORD var(parms.tmp3), 1 ; Store the fact that we got odd marco pixels.
jmp cont1
even1:
mov DWORD var(parms.tmp3), 0 ; We have even pixels...
cont1:
mov var(parms.tmp1), ecx ; save dx loop count
mov var(parms.tmp2), eax ; save dx loop count
xor ecx, ecx
mov edi, var(parms.s1) ;s1
mov esi, var(parms.s2) ;s2
mov ebx, var(parms.d1) ;d1
mov ebp, var(parms.d2) ;d2
pxor mm7, mm7 ; mm7 = 00000000 00000000
movq mm6, [MaskLuma] ; mm6 = Mask out Luma value.
xor eax, eax
DYLOOP1:
DXLOOP1:
;; Process 2 macro pixels at a time, 2 lines at a time.
movq mm0, [edi+ecx*8] ; mm0= y4v2y3u2 y2v1y1u1
movq mm1, [esi+ecx*8] ; mm1= y4v2y3u2 y2v1y1u1
;; These two lines are the only difference between the two
;; YUY2 and UYVY conversion routines. I really should make
;; time to combine the 2 routines to save a little download
;; space.
;; Byte swap the 2 MMX registers.
;; s1
movq mm2, mm0
movq mm3, mm0
psrlq mm2, 8 ; mm2 = 00y4v2y3 u2y2v1y1
psllq mm3, 8 ; mm3 = v2y3u2y2 v1y1u100
pand mm3, [MaskChroma] ; mm3 = v200u200 v100u100
pand mm2, mm6 ; mm2 = 00y400y3 00y200y1
por mm2, mm3 ; mm2 = v2y4u2y3 v1y2u1y1
movq mm0, mm2
;; s2
movq mm2, mm1
movq mm3, mm1
psrlq mm2, 8 ; mm2 = 00y4v2y3 u2y2v1y1
psllq mm3, 8 ; mm3 = v2y3u2y2 v1y1u100
pand mm3, [MaskChroma] ; mm3 = v200u200 v100u100
pand mm2, mm6 ; mm2 = 00y400y3 00y200y1
por mm2, mm3 ; mm2 = v2y4u2y3 v1y2u1y1
movq mm1, mm2
;; Store luma values in planar YUV space
movq mm2, mm0
movq mm3, mm1
pand mm2, mm6 ; mm2 = 00Y400Y3 00Y200Y1 of s1
pand mm3, mm6 ; mm3 = 00Y400Y3 00Y200Y1 of s1
packuswb mm2, mm7 ; mm2 = 00000000 Y4Y3Y2Y1 of s1
packuswb mm3, mm7 ; mm3 = 00000000 Y4Y3Y2Y1 of s2
movd [ebx+ecx*4], mm2 ; d1=s1
psrlw mm0, 8 ; mm0 = 00v200u2 00v100u1 of S1
movd [ebp+ecx*4], mm3 ; d2=s2
;;Compute averaged chroma values
psrlw mm1, 8 ; mm1 = 00v200u2 00v100u1 of S2
paddw mm0, mm1 ; mm0 = v2v2u2u2 v1v1u1u1 s1+s2
psrlw mm0, 1 ; mm0 = 00v200u2 00v100u1 (s1+s2)/2
;;
;;Store chromas in du and dv.
;;
movd mm3, esi ;save esi
;; unpack and pack
packuswb mm0, mm0 ; mm0 = 00000000 v2u2v2u1
punpcklbw mm0, mm0 ; mm0 = v2v2u2u2 v1v1u1u1
movd esi, mm4 ; esi = dU
movq mm1, mm0
psrlq mm1, 32 ; mm1 = 0000000 v2v2u2u2
punpcklbw mm0, mm1 ; mm0 = v2v1v2v1 u2u1u2u1
;; Store the U data
movd edx, mm0 ; edx = u1u2u1u2
psrlq mm0, 32 ; mm0 = 00000000 v1v2v1v2
mov WORD [esi+ecx*2], dx ; store 2 bytes of U data.
;; Store the V data
movd esi, mm5 ; esi = dV
movd edx, mm0 ; ecx = v1v2v1v2
mov WORD [esi+ecx*2], dx ; store 2 bytes of V data.
movd esi, mm3
;; inc DX counter and loop
inc ecx
cmp ecx, var(parms.tmp1)
jne NEAR DXLOOP1
;; Now we have to check for any pixels left over if dest_dx
;; is not divisible by 4. Since dest_dx must be at least even
;; we can only have 0 or two(1 macro pixel).
mov ecx, var(parms.tmp3)
jz nextline1
;;
;; Do odd marco pixel here. ===========
;;
movd mm0, [edi+ecx*8] ; mm0= 00000000 v1y2u1y1
movd mm1, [esi+ecx*8] ; mm1= 00000000 v1y2u1y1
;; Store luma values in planar YUV space
movq mm2, mm0
movq mm3, mm1
pand mm2, mm6 ; mm2 = 00000000 00Y100Y2 of s1
pand mm3, mm6 ; mm3 = 00000000 00Y100Y2 of s1
packuswb mm2, mm7 ; mm2 = 00000000 0000Y2Y1 of s1
packuswb mm3, mm7 ; mm3 = 00000000 0000Y2Y1 of s2
movd edx, mm2 ; grab lower 32 bits of mm2
mov WORD [ebx+ecx*4], dx ; Just store 16 bits of ecx
movd edx, mm3 ; grab lower 32 bits of mm3
mov WORD [ebp+ecx*4], dx ; just store 16 bits of ecx
;;Compute averaged chroma values
psrlw mm0, 8 ; mm0 = 00000000 00v100u1 of S1
psrlw mm1, 8 ; mm1 = 00000000 00v100u1 of S2
paddw mm0, mm1 ; mm0 = 00000000 v1v1u1u1 s1+s2
psrlw mm0, 1 ; mm0 = 00000000 00v100u1 (s1+s2)/2
;;
;;Store chromas in du and dv.
;;
movd mm3, esi ;save esi
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -