⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mmxlines.asm

📁 著名的 helix realplayer 基于手机 symbian 系统的 播放器全套源代码
💻 ASM
字号:
;
; ***** BEGIN LICENSE BLOCK ***** 
; Version: RCSL 1.0/RPSL 1.0 
;  
; Portions Copyright (c) 1995-2002 RealNetworks, Inc. All Rights Reserved. 
;      
; The contents of this file, and the files included with this file, are 
; subject to the current version of the RealNetworks Public Source License 
; Version 1.0 (the "RPSL") available at 
; http://www.helixcommunity.org/content/rpsl unless you have licensed 
; the file under the RealNetworks Community Source License Version 1.0 
; (the "RCSL") available at http://www.helixcommunity.org/content/rcsl, 
; in which case the RCSL will apply. You may also obtain the license terms 
; directly from RealNetworks.  You may not use this file except in 
; compliance with the RPSL or, if you have a valid RCSL with RealNetworks 
; applicable to this file, the RCSL.  Please see the applicable RPSL or 
; RCSL for the rights, obligations and limitations governing use of the 
; contents of the file.  
;  
; This file is part of the Helix DNA Technology. RealNetworks is the 
; developer of the Original Code and owns the copyrights in the portions 
; it created. 
;  
; This file, and the files included with this file, is distributed and made 
; available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
; EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
; INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS 
; FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
; 
; Technology Compatibility Kit Test Suite(s) Location: 
;    http://www.helixcommunity.org/content/tck 
; 
; Contributor(s): 
;  
; ***** END LICENSE BLOCK *****
;

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;;  _MMX_lineI420toYUY2 and _MMX_lineI420toUYVY
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;;; YUY2ToPlanarYUV_MMX
%ifdef   COFF
        %define _MMX_lineI420toYUY2 __MMX_lineI420toYUY2
        %define _MMX_lineI420toUYVY __MMX_lineI420toUYVY
%elifdef WIN32
        %define _MMX_lineI420toYUY2 __MMX_lineI420toYUY2
        %define _MMX_lineI420toUYVY __MMX_lineI420toUYVY
%elifdef ELF
        %define _MMX_lineI420toYUY2 _MMX_lineI420toYUY2
        %define _MMX_lineI420toUYVY _MMX_lineI420toUYVY
%elifdef AOUTB
        %define _MMX_lineI420toYUY2 _MMX_lineI420toYUY2
        %define _MMX_lineI420toUYVY _MMX_lineI420toUYVY
%else
        %error linking format currently not supported by _MMX_lineI420to*
%endif

        ;; Export the functions implemented here.
        global _MMX_lineI420toYUY2
        global _MMX_lineI420toUYVY

;========================= DATA SEGMENT ============================
section .data
align 8        


;============================= CODE SEGMENT ========================                  
section .text

;;
;; This is our stack params definition. It is used for both 
;; YUY2 and UYVY functions.
;;                                        
%define var(a) [esp+a]       
                
struc parms
        ; Space for reg pushes and return address.
        .registers  resd 6  ;pushed registers
        .return     resd 1  ;return address

        ; input params
        .sy:        resd 1  ;unsigned char* sy,      
        .su:        resd 1  ;unsigned char* su,      
        .sv:        resd 1  ;unsigned char* sv,      
        .d:         resd 1  ;unsigned char* d,      
        .count:     resd 1  ;int            count
endstruc


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; I420 to YUY2...
;;                        
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
_MMX_lineI420toYUY2:
        ;; Save some stuff...
        push ebx
        push edi
        push esi
        push ebp
        push ecx
        push edx

;          load frame pointers
        mov     esi, var(parms.sy)
        mov     ebx, var(parms.su)
        mov     ecx, var(parms.sv)
        mov     edi, var(parms.d)

;          get # of pixels:  
        mov     ebp, var(parms.count)

;          copy misaligned pixels first: 
        sub     ebp, 2
        jl      cont_1
loop_1:
        test    edi,15
        jz      cont_1

        xor     eax, eax
        mov     al, [esi]      ; sy[0] 
        xor     edx, edx
        mov     dl, [esi+1]    ; sy[1] 
        mov     ah, [ebx]      ; su[0] 
        mov     dh, [ecx]      ; sv[0] 
        lea     esi, [esi+2]
        shl     edx, 16
        lea     ebx, [ebx+1]
        or      eax, edx
        lea     ecx, [ecx+1]
        mov    dword  [edi], eax
        lea     edi, [edi+4]

        sub     ebp, 2
        jge     loop_1

;          process main bulk of data: 
cont_1:
        sub     ebp,16-2
        jl      cont_2
loop_2:
;          load 8 bytes from each plane 
        movq    mm0, [esi]    ; mm0: y7..y0 
        movq    mm1, [ebx]    ; mm1: u7..u0 
        movq    mm2, [ecx]    ; mm2: v7..v0 

;          process first 8 pixels: 
        movq      mm3, mm1
        punpcklbw mm3, mm2      ; mm3: v3,u3,v2,u2,v1,u1,v0,u0 
        movq      mm4, mm0
        punpcklbw mm4, mm3      ; mm4: v1,y3,u1,y2,v0,y1,u0,y0 
        movq      [edi], mm4
        psrlq     mm3, 32
        psrlq     mm0, 32
        punpcklbw mm0, mm3      ; mm0: v3,y7,u3,y6,v2,y5,u2,y4 
        movq      [edi+8], mm0

;          process another 8 pixels: 
        movq      mm0, [esi+8]  ; mm0: y15..y8 
        psrlq     mm1, 32
        psrlq     mm2, 32
        movq      mm3, mm1
        punpcklbw mm3, mm2
        movq      mm4, mm0
        punpcklbw mm4, mm3
        movq      [edi+16], mm4
        psrlq     mm3, 32
        psrlq     mm0, 32
        punpcklbw mm0, mm3
        movq      [edi+24], mm0

        lea       esi, [esi+16]       ; sy 
        lea       ebx, [ebx+8]        ; su 
        lea       ecx, [ecx+8]        ; sv 
        lea       edi, [edi+32]       ; d 
  
        sub       ebp, 16
        jge       loop_2

;          copy the remaining pixels: 
cont_2:
        sub       ebp,2-16
        jl        done
loop_3:  
        xor       eax, eax
        mov       al, [esi]      ; sy[0] 
        xor       edx, edx
        mov       dl, [esi+1]    ; sy[1] 
        mov       ah, [ebx]      ; su[0] 
        mov       dh, [ecx]      ; sv[0] 
        lea       esi, [esi+2]
        shl       edx, 16
        lea       ebx, [ebx+1]
        or        eax, edx
        lea       ecx, [ecx+1]
        mov       dword [edi], eax
        lea       edi, [edi+4]
  
        sub       ebp, 2
        jge       loop_3
                
done:   
        ;; Pop off the stack....
        pop edx
        pop ecx
        pop ebp
        pop esi
        pop edi
        pop ebx

;;; No emms on purpose....
        
        ;; success
        xor eax, eax
        ret



        
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; _MMX_lineI420toUYVY
;;                        
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
_MMX_lineI420toUYVY:
        
        ;; Save some stuff...
        push ebx
        push edi
        push esi
        push ebp
        push ecx
        push edx


        mov     esi, var(parms.sy)
        mov     ebx, var(parms.su)
        mov     ecx, var(parms.sv)
        mov     edi, var(parms.d)

        mov     ebp, var(parms.count)

        ; copy misaligned pixels first
        sub     ebp, 2
        jl      cont_11
loop_11:
        test    edi,15
        jz      cont_11

        xor     eax, eax
        mov     al, byte [ebx]  ; su
        xor     edx, edx
        mov     dl, byte [ecx]  ; sv
        mov     ah, byte [esi]  ; sy
        lea     ebx, [ebx+1]
        mov     dh, byte [esi+1]
        lea     ecx, [ecx+1]
        shl     edx, 16
        lea     esi, [esi+2]
        or      eax, edx
        sub     ebp, 2
        mov     dword [edi], eax
        lea     edi, [edi+4]
        jge     loop_11
        
        ; process main bulk of data
cont_11:
        sub     ebp,16-2
        jl      cont_22
loop_22:
        ; load chroma pixels
        movq    mm1, qword [ebx]    ; mm1: u7..u0 
        movq    mm2, qword [ecx]    ; mm2: v7..v0 

        ; process first 8 pixels
        movq      mm3, mm1
        punpcklbw mm3, mm2          ; mm3: v3,u3,v2,u2,v1,u1,v0,u0 
        movq      mm0, qword [esi]  ; mm0: y7,y6,y5,y4,y3,y2,y1,y0 
        movq      mm4, mm3
        punpcklbw mm4, mm0          ; m4: y3,v1,y2,u1,y1,v0,y0,u0 
        movq      qword [edi], mm4
        psrlq     mm3, 32
        psrlq     mm0, 32
        punpcklbw mm3, mm0          ; mm3: y7,v3,y6,u3,y5,v2,y4,u2 
        movq      qword [edi+8], mm3

        ; process another 8 pixels
        psrlq     mm1, 32
        psrlq     mm2, 32
        movq      mm3, mm1
        punpcklbw mm3, mm2
        movq      mm0, qword [esi+8]  ; mm0: y15..y8 
        movq      mm4, mm3
        punpcklbw mm4, mm0
        movq      qword [edi+16], mm4
        psrlq     mm3, 32
        psrlq     mm0, 32
        punpcklbw mm3, mm0
        movq      qword [edi+24], mm3

        lea       esi, [esi+16]       ;sy 
        lea       ebx, [ebx+8]        ;su 
        lea       ecx, [ecx+8]        ;sv 
        lea       edi, [edi+32]       ;d  
  
        sub       ebp, 16
        jge       loop_22

        ; copy the remaining pixels
cont_22:
        sub       ebp,2-16
        jl        done2
loop_32:  
        xor       eax, eax
        mov       al, byte [ebx]      ;su[0]
        xor       edx, edx
        mov       dl, byte [ecx]      ;sv[0] 
        mov       ah, byte [esi]      ;sy[0] 
        lea       ebx, [ebx+1]
        mov       dh, byte [esi+1]    ;sy[1] 
        lea       ecx, [ecx+1]
        shl       edx, 16
        lea       esi, [esi+2]
        or        eax, edx
        sub       ebp, 2
        mov       dword [edi], eax
        lea       edi, [edi+4]
        jge       loop_32

        
done2:
        ;; Pop off the stack....
        pop edx
        pop ecx
        pop ebp
        pop esi
        pop edi
        pop ebx

;;; No emms on purpose....
        
        ;; success
        xor eax, eax
        ret


;;; Ident.
version: db '$(cl13n7c0r3) Copyright 2002 RealNetworks Inc. Revision:1.0 $',0

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -