⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 p2pyuv.asm

📁 著名的 helix realplayer 基于手机 symbian 系统的 播放器全套源代码
💻 ASM
📖 第 1 页 / 共 2 页
字号:
;
; ***** BEGIN LICENSE BLOCK ***** 
; Version: RCSL 1.0/RPSL 1.0 
;  
; Portions Copyright (c) 1995-2002 RealNetworks, Inc. All Rights Reserved. 
;      
; The contents of this file, and the files included with this file, are 
; subject to the current version of the RealNetworks Public Source License 
; Version 1.0 (the "RPSL") available at 
; http://www.helixcommunity.org/content/rpsl unless you have licensed 
; the file under the RealNetworks Community Source License Version 1.0 
; (the "RCSL") available at http://www.helixcommunity.org/content/rcsl, 
; in which case the RCSL will apply. You may also obtain the license terms 
; directly from RealNetworks.  You may not use this file except in 
; compliance with the RPSL or, if you have a valid RCSL with RealNetworks 
; applicable to this file, the RCSL.  Please see the applicable RPSL or 
; RCSL for the rights, obligations and limitations governing use of the 
; contents of the file.  
;  
; This file is part of the Helix DNA Technology. RealNetworks is the 
; developer of the Original Code and owns the copyrights in the portions 
; it created. 
;  
; This file, and the files included with this file, is distributed and made 
; available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
; EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
; INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS 
; FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
; 
; Technology Compatibility Kit Test Suite(s) Location: 
;    http://www.helixcommunity.org/content/tck 
; 
; Contributor(s): 
;  
; ***** END LICENSE BLOCK *****
;

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;;  Packed YUV To PlanarYUV MMX converters.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;;; YUY2ToPlanarYUV_MMX
%ifdef   COFF
        %define YUY2ToPlanarYUV_MMX _YUY2ToPlanarYUV_MMX
        %define UYVYToPlanarYUV_MMX _UYVYToPlanarYUV_MMX
%elifdef WIN32
        %define YUY2ToPlanarYUV_MMX _YUY2ToPlanarYUV_MMX
        %define UYVYToPlanarYUV_MMX _UYVYToPlanarYUV_MMX
%elifdef ELF
        %define YUY2ToPlanarYUV_MMX YUY2ToPlanarYUV_MMX
        %define UYVYToPlanarYUV_MMX UYVYToPlanarYUV_MMX
%elifdef AOUTB
        %define YUY2ToPlanarYUV_MMX YUY2ToPlanarYUV_MMX
        %define UYVYToPlanarYUV_MMX UYVYToPlanarYUV_MMX
%else
        %error linking format currently not supported by alphbablend.asm
%endif

        ;; Export the functions implemented here.
        global YUY2ToPlanarYUV_MMX
        global UYVYToPlanarYUV_MMX

;========================= DATA SEGMENT ============================
section .data
align 8        

MaskChroma
    dd  0xFF00FF00, 0xFF00FF00
        
MaskLuma
    dd  0x00FF00FF, 0x00FF00FF
        

;============================= CODE SEGMENT =======================                        
section .text


;;
;; This is our stack params definition. It is used for both
;; YUY2 and UYVY routines as they both take the same parms.
;;                                        
%assign numtemps 4
%define var(a) [esp+a]       
                
struc parms
        ;Temps on stack
        .tmp1       resd 1  ;General DWORD temp.
        .tmp2       resd 1  ;General DWORD temp.
        .tmp3       resd 1  ;General DWORD temp.
        .tmp4       resd 1  ;General DWORD temp.
        
        ; Space for reg pushes and return address.
        .registers  resd 6  ;pushed registers
        .return     resd 1  ;return address

        ; input params
        .d1:        resd 1  ;unsigned char* d1,      
        .d2:        resd 1  ;unsigned char* d2,      
        .du:        resd 1  ;unsigned char* du,      
        .dv:        resd 1  ;unsigned char* dv,      
        .dyPitch:   resd 1  ;INT32          dyPitch, 
        .duPitch:   resd 1  ;INT32          duPitch, 
        .dvPitch:   resd 1  ;INT32          dvPitch, 
        .dest_dx:   resd 1  ;INT32          dest_dx, 
        .dest_dy:   resd 1  ;INT32          dest_dy, 
        .s1:        resd 1  ;unsigned char* s1,      
        .s2:        resd 1  ;unsigned char* s2,      
        .src_pitch: resd 1  ;INT32          src_pitch

endstruc


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Our UYVY to Planar YUV MMX
;;                        
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
UYVYToPlanarYUV_MMX:
        ;; Save some stuff...
        push ebx
        push edi
        push esi
        push ebp
        push ecx
        push edx

        ; Make room for temps on stack
        sub esp, numtemps*4;

        ;; Load our dest chroma pointers.
        movd  mm4, var(parms.du)      ; mm4 = du
        movd  mm5, var(parms.dv)      ; mm5 = dv
                
        ;; Set up the loops
        mov eax, var(parms.dest_dy)
        mov ecx, var(parms.dest_dx)
        shr eax, 1                    ; eax = dest_dy/2
        
        shr ecx, 2                    ; ecx = dest_dx/4
        jnc even1                      ; Was dest_dx divisible by 4?
odd1:            
        ; We have 1 macro pixel left over
        mov DWORD var(parms.tmp3), 1  ; Store the fact that we got odd marco pixels.
        jmp cont1
even1:   
        mov DWORD var(parms.tmp3), 0  ; We have even pixels...        
cont1:           
        mov var(parms.tmp1), ecx      ; save dx loop count
        mov var(parms.tmp2), eax      ; save dx loop count
        
        xor ecx, ecx

        mov edi, var(parms.s1)        ;s1
        mov esi, var(parms.s2)        ;s2
        mov ebx, var(parms.d1)        ;d1
        mov ebp, var(parms.d2)        ;d2

        pxor  mm7, mm7                ; mm7 = 00000000 00000000
        movq  mm6, [MaskLuma]         ; mm6 = Mask out Luma value.
        xor eax, eax
DYLOOP1: 
DXLOOP1:
        ;; Process 2 macro pixels at a time, 2 lines at a time.
        movq mm0, [edi+ecx*8]      ; mm0= y4v2y3u2 y2v1y1u1
        movq mm1, [esi+ecx*8]      ; mm1= y4v2y3u2 y2v1y1u1

        ;; These two lines are the only difference between the two
        ;; YUY2 and UYVY conversion routines. I really should make
        ;; time to combine the 2 routines to save a little download
        ;; space.
        ;; Byte swap the 2 MMX registers.
        ;; s1
        movq     mm2, mm0
        movq     mm3, mm0
        psrlq    mm2, 8            ; mm2 = 00y4v2y3 u2y2v1y1
        psllq    mm3, 8            ; mm3 = v2y3u2y2 v1y1u100
        pand     mm3, [MaskChroma] ; mm3 = v200u200 v100u100
        pand     mm2, mm6          ; mm2 = 00y400y3 00y200y1
        por      mm2, mm3          ; mm2 = v2y4u2y3 v1y2u1y1
        movq     mm0, mm2

        ;; s2
        movq     mm2, mm1
        movq     mm3, mm1
        psrlq    mm2, 8            ; mm2 = 00y4v2y3 u2y2v1y1
        psllq    mm3, 8            ; mm3 = v2y3u2y2 v1y1u100
        pand     mm3, [MaskChroma] ; mm3 = v200u200 v100u100
        pand     mm2, mm6          ; mm2 = 00y400y3 00y200y1
        por      mm2, mm3          ; mm2 = v2y4u2y3 v1y2u1y1
        movq     mm1, mm2
        
        ;; Store luma values in planar YUV space
        movq     mm2, mm0
        movq     mm3, mm1
        pand     mm2, mm6          ; mm2 = 00Y400Y3 00Y200Y1 of s1
        pand     mm3, mm6          ; mm3 = 00Y400Y3 00Y200Y1 of s1

        packuswb mm2, mm7          ; mm2 = 00000000 Y4Y3Y2Y1 of s1
        packuswb mm3, mm7          ; mm3 = 00000000 Y4Y3Y2Y1 of s2

        movd     [ebx+ecx*4], mm2  ; d1=s1
        psrlw    mm0, 8            ; mm0 = 00v200u2 00v100u1 of S1
        movd     [ebp+ecx*4], mm3  ; d2=s2

        ;;Compute averaged chroma values
        psrlw      mm1, 8            ; mm1 = 00v200u2 00v100u1 of S2
        paddw      mm0, mm1          ; mm0 = v2v2u2u2 v1v1u1u1 s1+s2
        psrlw      mm0, 1            ; mm0 = 00v200u2 00v100u1 (s1+s2)/2

        ;;
        ;;Store chromas in du and dv.
        ;;
        movd mm3, esi ;save esi

        ;; unpack and pack
        packuswb   mm0, mm0          ; mm0 = 00000000 v2u2v2u1
        punpcklbw  mm0, mm0          ; mm0 = v2v2u2u2 v1v1u1u1
        movd       esi, mm4          ; esi = dU
        movq       mm1, mm0
        psrlq      mm1, 32           ; mm1 = 0000000 v2v2u2u2
        punpcklbw  mm0, mm1          ; mm0 = v2v1v2v1 u2u1u2u1

        ;; Store the U data
        movd edx, mm0                ; edx = u1u2u1u2
        psrlq mm0, 32                ; mm0 = 00000000 v1v2v1v2

        mov  WORD [esi+ecx*2], dx    ; store 2 bytes of U data.

        ;; Store the V data
        movd  esi, mm5                ; esi = dV
        movd  edx, mm0                ; ecx = v1v2v1v2
        mov   WORD [esi+ecx*2], dx    ; store 2 bytes of V data.

        movd esi, mm3
        
        ;; inc DX counter and loop
        inc  ecx
        cmp  ecx, var(parms.tmp1)
        jne  NEAR DXLOOP1
        
        ;; Now we have to check for any pixels left over if dest_dx
        ;; is not divisible by 4. Since dest_dx must be at least even
        ;; we can only have 0 or two(1 macro pixel).
        mov ecx, var(parms.tmp3)
        jz  nextline1

        ;;
        ;; Do odd marco pixel here. ===========
        ;;
        movd mm0, [edi+ecx*8]      ; mm0= 00000000 v1y2u1y1
        movd mm1, [esi+ecx*8]      ; mm1= 00000000 v1y2u1y1

        ;; Store luma values in planar YUV space
        movq     mm2, mm0
        movq     mm3, mm1
        pand     mm2, mm6          ; mm2 = 00000000 00Y100Y2 of s1
        pand     mm3, mm6          ; mm3 = 00000000 00Y100Y2 of s1

        packuswb mm2, mm7          ; mm2 = 00000000 0000Y2Y1 of s1
        packuswb mm3, mm7          ; mm3 = 00000000 0000Y2Y1 of s2

        movd     edx, mm2              ; grab lower 32 bits of mm2
        mov      WORD [ebx+ecx*4], dx  ; Just store 16 bits of ecx
        movd     edx, mm3              ; grab lower 32 bits of mm3
        mov      WORD [ebp+ecx*4], dx  ; just store 16 bits of ecx

        ;;Compute averaged chroma values
        psrlw      mm0, 8            ; mm0 = 00000000 00v100u1 of S1
        psrlw      mm1, 8            ; mm1 = 00000000 00v100u1 of S2
        paddw      mm0, mm1          ; mm0 = 00000000 v1v1u1u1 s1+s2
        psrlw      mm0, 1            ; mm0 = 00000000 00v100u1 (s1+s2)/2

        ;;
        ;;Store chromas in du and dv.
        ;;
        movd mm3, esi ;save esi

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -