yuvammx.asm

来自「symbian 下的helix player源代码」· 汇编 代码 · 共 2,177 行 · 第 1/5 页

ASM
2,177
字号
;
; ***** BEGIN LICENSE BLOCK *****
; Source last modified: $Id: yuvammx.asm,v 1.1.1.1.50.1 2004/07/09 02:00:19 hubbe Exp $
; 
; Portions Copyright (c) 1995-2004 RealNetworks, Inc. All Rights Reserved.
; 
; The contents of this file, and the files included with this file,
; are subject to the current version of the RealNetworks Public
; Source License (the "RPSL") available at
; http://www.helixcommunity.org/content/rpsl unless you have licensed
; the file under the current version of the RealNetworks Community
; Source License (the "RCSL") available at
; http://www.helixcommunity.org/content/rcsl, in which case the RCSL
; will apply. You may also obtain the license terms directly from
; RealNetworks.  You may not use this file except in compliance with
; the RPSL or, if you have a valid RCSL with RealNetworks applicable
; to this file, the RCSL.  Please see the applicable RPSL or RCSL for
; the rights, obligations and limitations governing use of the
; contents of the file.
; 
; Alternatively, the contents of this file may be used under the
; terms of the GNU General Public License Version 2 or later (the
; "GPL") in which case the provisions of the GPL are applicable
; instead of those above. If you wish to allow use of your version of
; this file only under the terms of the GPL, and not to allow others
; to use your version of this file under the terms of either the RPSL
; or RCSL, indicate your decision by deleting the provisions above
; and replace them with the notice and other provisions required by
; the GPL. If you do not delete the provisions above, a recipient may
; use your version of this file under the terms of any one of the
; RPSL, the RCSL or the GPL.
; 
; This file is part of the Helix DNA Technology. RealNetworks is the
; developer of the Original Code and owns the copyrights in the
; portions it created.
; 
; This file, and the files included with this file, is distributed
; and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
; KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
; ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
; OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
; ENJOYMENT OR NON-INFRINGEMENT.
; 
; Technology Compatibility Kit Test Suite(s) Location:
;    http://www.helixcommunity.org/content/tck
; 
; Contributor(s):
; 
; ***** END LICENSE BLOCK *****
;

;;--------------------------------------------
;; yuvammx.asm   
;;
;;  mmx alpha blender routines
;;
;;  NASM 0.98
;;  target machine = Pentium II
;----------------------------------------------------------
; Note:  Output buffer can be the same as one of the input
;        buffers for I420 and YV12 output only.  Common 
;        input/output buffer must have same pitch, lines,
;        width and height.
;=========================================================
;; data segment should be qword aligned for best performance

%define ALIGNMENT

%ifdef   COFF
    %define _I420andYUVAtoI420_MMX        _I420andYUVAtoI420_MMX        
    %define _I420andYUVAtoYV12_MMX        _I420andYUVAtoYV12_MMX        
    %define _I420andYUVAtoYUY2_MMX        _I420andYUVAtoYUY2_MMX        
    %define _I420andYUVAtoUYVY_MMX        _I420andYUVAtoUYVY_MMX        
    %define _I420andI420toI420_MMX_sub        _I420andI420toI420_MMX_sub        
    %error data segment possibly not qword aligned
%elifdef WIN32
    %define _I420andYUVAtoI420_MMX        _I420andYUVAtoI420_MMX        
    %define _I420andYUVAtoYV12_MMX        _I420andYUVAtoYV12_MMX        
    %define _I420andYUVAtoYUY2_MMX        _I420andYUVAtoYUY2_MMX        
    %define _I420andYUVAtoUYVY_MMX        _I420andYUVAtoUYVY_MMX        
    %define _I420andI420toI420_MMX_sub        _I420andI420toI420_MMX_sub        
    %define ALIGNMENT align=8
%elifdef ELF
    %define _I420andYUVAtoI420_MMX        I420andYUVAtoI420_MMX        
    %define _I420andYUVAtoYV12_MMX        I420andYUVAtoYV12_MMX        
    %define _I420andYUVAtoYUY2_MMX        I420andYUVAtoYUY2_MMX        
    %define _I420andYUVAtoUYVY_MMX        I420andYUVAtoUYVY_MMX        
    %define _I420andI420toI420_MMX_sub        I420andI420toI420_MMX_sub        
    %error data segment possibly not qword aligned
%elifdef AOUTB
    %define _I420andYUVAtoI420_MMX        I420andYUVAtoI420_MMX        
    %define _I420andYUVAtoYV12_MMX        I420andYUVAtoYV12_MMX        
    %define _I420andYUVAtoYUY2_MMX        I420andYUVAtoYUY2_MMX        
    %define _I420andYUVAtoUYVY_MMX        I420andYUVAtoUYVY_MMX        
    %define _I420andI420toI420_MMX_sub        I420andI420toI420_MMX_sub        
    %error data segment possibly not qword aligned
%else
        %error linking format currently not supported 
%endif



global _I420andYUVAtoI420_MMX
global _I420andYUVAtoYV12_MMX
global _I420andYUVAtoYUY2_MMX 
global _I420andYUVAtoUYVY_MMX 
global _I420andI420toI420_MMX_sub 

%assign CID_I420   0   ;;/* planar YCrCb 4:2:0 format (CCIR)     */
%assign CID_YV12   1   ;;/* planar YVU 4:2:0 (ATI)               */
%assign CID_YUY2   3   ;;/* packed YVU 4:2:2 (ATI,MATROX,etc.)   */
%assign CID_UYVY   4   ;;/* yet another packed 4:2:2 (ATI)       */

%macro  make_labels 1
%define exit       exit%1
%define fail_exit  fail_exit%1
%define a100       a100%1
%define a200       a200%1
%define a300       a300%1
%define a400       a400%1
%define y100       y100%1
%define two_pels   two_pels%1
%define line_done  line_done%1
%define y_by_fours y_by_fours%1
%define y_one_two_three y_one_two_three%1
%endmacro


;=========================================================
segment .data data ALIGNMENT

align 8

mask3 
    db  -1, 0, -1, 0, -1, 0, -1, 0
mask3b 
    db  0, -1, 0, -1, 0, -1, 0, -1

con1  
    dw 1, 1, 1, 1

;;_DATA ENDS
;=========================================================
;=========================================================
segment .text code 
;;==========================================================
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;;	I420andYUVAtoYV12
;;
;;	This function alpha-blends two I420 buffers into a third
;;	YV12 buffer using the alpha info tacked to the 
;;	end of the second I420 buffer
;;
;;  yuva = top
;;  inverted alpha
;;  uv size computed as: uvpitch*uvlines = (pitch/2)*(lines/2)
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
_I420andYUVAtoYV12_MMX:
;;
;; int I420andYUVAtoYV12_MMX(
;;          unsigned char* src, int src_pels, int src_lines, int src_pitch
;;          int src_startx, int src_starty;
;;          unsigned char* yuva, int yuva_pels, int yuva yuva_lines, int yuva_pitch
;;          int yuva_startx, int yuva_starty;
;;          unsigned char* dst, int dst_pels, int dst_lines, int dst_pitch,
;;          int dst_startx, int dst_starty,
;;          int width,  int height);
;;
;; arguments
%define src            dword [esp+4*(1+npush)]
%define src_pels       dword [esp+4*(2+npush)]
%define src_lines      dword [esp+4*(3+npush)]
%define src_pitch      dword [esp+4*(4+npush)]
%define src_startx     dword [esp+4*(5+npush)]
%define src_starty     dword [esp+4*(6+npush)]

%define yuva           dword [esp+4*(7+npush)]
%define yuva_pels      dword [esp+4*(8+npush)]
%define yuva_lines     dword [esp+4*(9+npush)]
%define yuva_pitch     dword [esp+4*(10+npush)]
%define yuva_startx    dword [esp+4*(11+npush)]
%define yuva_starty    dword [esp+4*(12+npush)]

%define dst            dword [esp+4*(13+npush)]
%define dst_pels       dword [esp+4*(14+npush)]
%define dst_lines      dword [esp+4*(15+npush)]
%define dst_pitch      dword [esp+4*(16+npush)]
%define dst_startx     dword [esp+4*(17+npush)]
%define dst_starty     dword [esp+4*(18+npush)]

%define width          dword [esp+4*(19+npush)]
%define height         dword [esp+4*(20+npush)]

    push    ebp
    push    esi
    push    edi
    push    ecx
    push    ebx

;; tmp on stack
%assign ntmps 16
%assign npush (5+ntmps)
    sub esp, ntmps*4 

%define yuvay        dword [esp + 0*4]
%define yuvau        dword [esp + 1*4]
%define yuvaoffsetv  dword [esp + 2*4]
%define yuvaa        dword [esp + 3*4]
%define yuvaauv      dword [esp + 4*4]
%define yuvauvpitch  dword [esp + 5*4]

%define sy           dword [esp + 6*4]
%define su           dword [esp + 7*4]
%define soffsetv     dword [esp + 8*4]
%define suvpitch     dword [esp + 9*4]

%define dy           dword [esp + 10*4]
%define du           dword [esp + 11*4]
%define doffsetv     dword [esp + 12*4]
%define duvpitch     dword [esp + 13*4]

%define dtmp0        dword [esp + 14*4]
%define dtmp1        dword [esp + 15*4]


;; YV12
;;    [ Y Y ]
;;      [V]
;;      [U]
        
;; YV12 u v reversed from I420
;; compute reversed order pointers 
;; then comtinue with I420 routine

;;-----
    mov eax, dst_lines
    mov ecx, dst_pitch
    mov edx, eax
    imul    eax, ecx        ;; pitch*lines
    shr ecx, 1              ;; pitch/2
    mov duvpitch, ecx
    mov esi, dst
    mov dy, esi
    add esi, eax

    shr edx, 1              ;;  lines/2
    imul    edx, ecx        ;; (pitch/2)*(lines/2)

    add esi, edx
    mov du, esi
    neg edx         ;; negative offset from u
    mov doffsetv, edx
;;-----
    jmp I420andYUVAtoI420_MMX_entry



;_I420andYUVAtoYV12_MMX endp
;=============================================================
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;;	I420andYUVAtoI420
;;
;;	This function alpha-blends two I420 buffers into a third
;;	I420 buffer using the alpha info tacked to the 
;;	end of the second I420 buffer
;;
;;  yuva = top
;;  inverted alpha
;;  uv size computed as: uvpitch*uvlines = (pitch/2)*(lines/2)
;;
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
_I420andYUVAtoI420_MMX:
;;
;; int I420andYUVAtoI420_MMX(
;;          unsigned char* src, int src_pels, int src_lines, int src_pitch
;;          int src_startx, int src_starty;
;;          unsigned char* yuva, int yuva_pels, int yuva yuva_lines, int yuva_pitch
;;          int yuva_startx, int yuva_starty;
;;          unsigned char* dst, int dst_pels, int dst_lines, int dst_pitch,
;;          int dst_startx, int dst_starty,
;;          int width,  int height);
;;
;%define exit      _I420andYUVAtoI420_MMX_exit
;%define fail_exit _I420andYUVAtoI420_MMX_fail_exit
;%define a100      _I420andYUVAtoI420_MMX_a100
;%define a200      _I420andYUVAtoI420_MMX_a200
;%define a300      _I420andYUVAtoI420_MMX_a300
;%define a400      _I420andYUVAtoI420_MMX_a400

make_labels _I420andYUVAtoI420_MMX

;; arguments
%define src            dword [esp+4*(1+npush)]
%define src_pels       dword [esp+4*(2+npush)]
%define src_lines      dword [esp+4*(3+npush)]
%define src_pitch      dword [esp+4*(4+npush)]
%define src_startx     dword [esp+4*(5+npush)]
%define src_starty     dword [esp+4*(6+npush)]

%define yuva           dword [esp+4*(7+npush)]
%define yuva_pels      dword [esp+4*(8+npush)]
%define yuva_lines     dword [esp+4*(9+npush)]
%define yuva_pitch     dword [esp+4*(10+npush)]
%define yuva_startx    dword [esp+4*(11+npush)]
%define yuva_starty    dword [esp+4*(12+npush)]

%define dst            dword [esp+4*(13+npush)]
%define dst_pels       dword [esp+4*(14+npush)]
%define dst_lines      dword [esp+4*(15+npush)]
%define dst_pitch      dword [esp+4*(16+npush)]
%define dst_startx     dword [esp+4*(17+npush)]
%define dst_starty     dword [esp+4*(18+npush)]

%define width          dword [esp+4*(19+npush)]
%define height         dword [esp+4*(20+npush)]

    push    ebp
    push    esi
    push    edi
    push    ecx
    push    ebx

;; tmp on stack
%assign ntmps 16
%assign npush (5+ntmps)
    sub esp, ntmps*4 

%define yuvay        dword [esp + 0*4]
%define yuvau        dword [esp + 1*4]
%define yuvaoffsetv  dword [esp + 2*4]
%define yuvaa        dword [esp + 3*4]
%define yuvaauv      dword [esp + 4*4]
%define yuvauvpitch  dword [esp + 5*4]

%define sy           dword [esp + 6*4]
%define su           dword [esp + 7*4]
%define soffsetv     dword [esp + 8*4]
%define suvpitch     dword [esp + 9*4]

%define dy           dword [esp + 10*4]
%define du           dword [esp + 11*4]
%define doffsetv     dword [esp + 12*4]
%define duvpitch     dword [esp + 13*4]

%define dtmp0        dword [esp + 14*4]
%define dtmp1        dword [esp + 15*4]

%define btmp0(x)     byte [esp + 14*4 + x]
%define btmp1(x)     byte [esp + 15*4 + x]
;;-----
    mov eax, dst_lines
    mov ecx, dst_pitch
    mov edx, eax
    imul    eax, ecx        ;; pitch*lines
    shr ecx, 1              ;; pitch/2
    mov duvpitch, ecx
    mov esi, dst
    mov dy, esi
    add esi, eax
    mov du, esi
    shr edx, 1              ;; lines/2
    imul    edx, ecx        ;; (pitch/2)*(lines/2)
    mov doffsetv, edx
;;-----
;;  entry point for I420andYUVAtoYV12_MMX_entry
;;
I420andYUVAtoI420_MMX_entry:
;;
    mov eax, src_lines
    mov ecx, src_pitch
    mov edx, eax
    imul    eax, ecx        ;; pitch*lines
    shr ecx, 1              ;; pitch/2
    mov suvpitch, ecx
    mov esi, src
    mov sy, esi
    add esi, eax
    mov su, esi
    shr edx, 1              ;; lines/2
    imul    edx, ecx        ;; (pitch/2)*(lines/2)
    mov soffsetv, edx
;;-----
    mov eax, yuva_lines
    mov ecx, yuva_pitch
    mov edx, eax
    imul    eax, ecx        ;; pitch*lines
    shr ecx, 1              ;; pitch/2
    mov yuvauvpitch, ecx
    mov esi, yuva
    mov yuvay, esi
    add esi, eax
    mov yuvau, esi
    
    shr edx, 1              ;; lines/2
    imul   edx, ecx         ;; (pitch/2)*(lines/2)
    mov yuvaoffsetv, edx
    lea esi, [esi+edx*2]
    mov yuvaa, esi
    mov yuvaauv, esi        ;; duplicate yuvaa for uv use
;;------------------
;;------------------
;; pointer adjustment to (x,y)
    mov ecx, src_pitch
    mov eax, src_starty
    mov edx, eax
    mov ebx, src_startx
    imul    eax, ecx        ;; y*pitch
    mov esi, sy             ;;
    add esi, eax            ;;  sy + y*pitch
    add esi, ebx            ;;  sy + y*pitch + x
    mov sy, esi

    shr ecx, 1          ;; pitch/2
    shr edx, 1          ;; y/2
    imul    edx, ecx    ;; (y/2)*(pitch/2)
    shr ebx, 1          ;; x/2
    mov esi, su             ;;
    add esi, edx            ;;  su + (y/2)*(pitch/2)
    add esi, ebx            ;;  su + (y/2)*(pitch/2) + x/2
    mov su, esi
;;
;; pointer adjustment to (x,y)
    mov ecx, dst_pitch
    mov eax, dst_starty
    mov edx, eax
    mov ebx, dst_startx
    imul    eax, ecx        ;; y*pitch
    mov esi, dy             ;;
    add esi, eax            ;;  dy + y*pitch
    add esi, ebx            ;;  dy + y*pitch + x
    mov dy, esi

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?