📄 yuvammx.asm
字号:
;
; ***** BEGIN LICENSE BLOCK *****
; Version: RCSL 1.0/RPSL 1.0
;
; Portions Copyright (c) 1995-2002 RealNetworks, Inc. All Rights Reserved.
;
; The contents of this file, and the files included with this file, are
; subject to the current version of the RealNetworks Public Source License
; Version 1.0 (the "RPSL") available at
; http://www.helixcommunity.org/content/rpsl unless you have licensed
; the file under the RealNetworks Community Source License Version 1.0
; (the "RCSL") available at http://www.helixcommunity.org/content/rcsl,
; in which case the RCSL will apply. You may also obtain the license terms
; directly from RealNetworks. You may not use this file except in
; compliance with the RPSL or, if you have a valid RCSL with RealNetworks
; applicable to this file, the RCSL. Please see the applicable RPSL or
; RCSL for the rights, obligations and limitations governing use of the
; contents of the file.
;
; This file is part of the Helix DNA Technology. RealNetworks is the
; developer of the Original Code and owns the copyrights in the portions
; it created.
;
; This file, and the files included with this file, is distributed and made
; available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
; EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL SUCH WARRANTIES,
; INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS
; FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
;
; Technology Compatibility Kit Test Suite(s) Location:
; http://www.helixcommunity.org/content/tck
;
; Contributor(s):
;
; ***** END LICENSE BLOCK *****
;
;;--------------------------------------------
;; yuvammx.asm
;;
;; mmx alpha blender routines
;;
;; NASM 0.98
;; target machine = Pentium II
;----------------------------------------------------------
; Note: Output buffer can be the same as one of the input
; buffers for I420 and YV12 output only. Common
; input/output buffer must have same pitch, lines,
; width and height.
;=========================================================
;; data segment should be qword aligned for best performance
%define ALIGNMENT
%ifdef COFF
%define _I420andYUVAtoI420_MMX _I420andYUVAtoI420_MMX
%define _I420andYUVAtoYV12_MMX _I420andYUVAtoYV12_MMX
%define _I420andYUVAtoYUY2_MMX _I420andYUVAtoYUY2_MMX
%define _I420andYUVAtoUYVY_MMX _I420andYUVAtoUYVY_MMX
%define _I420andI420toI420_MMX_sub _I420andI420toI420_MMX_sub
%error data segment possibly not qword aligned
%elifdef WIN32
%define _I420andYUVAtoI420_MMX _I420andYUVAtoI420_MMX
%define _I420andYUVAtoYV12_MMX _I420andYUVAtoYV12_MMX
%define _I420andYUVAtoYUY2_MMX _I420andYUVAtoYUY2_MMX
%define _I420andYUVAtoUYVY_MMX _I420andYUVAtoUYVY_MMX
%define _I420andI420toI420_MMX_sub _I420andI420toI420_MMX_sub
%define ALIGNMENT align=8
%elifdef ELF
%define _I420andYUVAtoI420_MMX I420andYUVAtoI420_MMX
%define _I420andYUVAtoYV12_MMX I420andYUVAtoYV12_MMX
%define _I420andYUVAtoYUY2_MMX I420andYUVAtoYUY2_MMX
%define _I420andYUVAtoUYVY_MMX I420andYUVAtoUYVY_MMX
%define _I420andI420toI420_MMX_sub I420andI420toI420_MMX_sub
%error data segment possibly not qword aligned
%elifdef AOUTB
%define _I420andYUVAtoI420_MMX I420andYUVAtoI420_MMX
%define _I420andYUVAtoYV12_MMX I420andYUVAtoYV12_MMX
%define _I420andYUVAtoYUY2_MMX I420andYUVAtoYUY2_MMX
%define _I420andYUVAtoUYVY_MMX I420andYUVAtoUYVY_MMX
%define _I420andI420toI420_MMX_sub I420andI420toI420_MMX_sub
%error data segment possibly not qword aligned
%else
%error linking format currently not supported
%endif
global _I420andYUVAtoI420_MMX
global _I420andYUVAtoYV12_MMX
global _I420andYUVAtoYUY2_MMX
global _I420andYUVAtoUYVY_MMX
global _I420andI420toI420_MMX_sub
%assign CID_I420 0 ;;/* planar YCrCb 4:2:0 format (CCIR) */
%assign CID_YV12 1 ;;/* planar YVU 4:2:0 (ATI) */
%assign CID_YUY2 3 ;;/* packed YVU 4:2:2 (ATI,MATROX,etc.) */
%assign CID_UYVY 4 ;;/* yet another packed 4:2:2 (ATI) */
%macro make_labels 1
%define exit exit%1
%define fail_exit fail_exit%1
%define a100 a100%1
%define a200 a200%1
%define a300 a300%1
%define a400 a400%1
%define y100 y100%1
%define two_pels two_pels%1
%define line_done line_done%1
%define y_by_fours y_by_fours%1
%define y_one_two_three y_one_two_three%1
%endmacro
;=========================================================
segment .data data ALIGNMENT
align 8
mask3
db -1, 0, -1, 0, -1, 0, -1, 0
mask3b
db 0, -1, 0, -1, 0, -1, 0, -1
con1
dw 1, 1, 1, 1
;;_DATA ENDS
;=========================================================
;=========================================================
segment .text code
;;==========================================================
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; I420andYUVAtoYV12
;;
;; This function alpha-blends two I420 buffers into a third
;; YV12 buffer using the alpha info tacked to the
;; end of the second I420 buffer
;;
;; yuva = top
;; inverted alpha
;; uv size computed as: uvpitch*uvlines = (pitch/2)*(lines/2)
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
_I420andYUVAtoYV12_MMX:
;;
;; int I420andYUVAtoYV12_MMX(
;; unsigned char* src, int src_pels, int src_lines, int src_pitch
;; int src_startx, int src_starty;
;; unsigned char* yuva, int yuva_pels, int yuva yuva_lines, int yuva_pitch
;; int yuva_startx, int yuva_starty;
;; unsigned char* dst, int dst_pels, int dst_lines, int dst_pitch,
;; int dst_startx, int dst_starty,
;; int width, int height);
;;
;; arguments
%define src dword [esp+4*(1+npush)]
%define src_pels dword [esp+4*(2+npush)]
%define src_lines dword [esp+4*(3+npush)]
%define src_pitch dword [esp+4*(4+npush)]
%define src_startx dword [esp+4*(5+npush)]
%define src_starty dword [esp+4*(6+npush)]
%define yuva dword [esp+4*(7+npush)]
%define yuva_pels dword [esp+4*(8+npush)]
%define yuva_lines dword [esp+4*(9+npush)]
%define yuva_pitch dword [esp+4*(10+npush)]
%define yuva_startx dword [esp+4*(11+npush)]
%define yuva_starty dword [esp+4*(12+npush)]
%define dst dword [esp+4*(13+npush)]
%define dst_pels dword [esp+4*(14+npush)]
%define dst_lines dword [esp+4*(15+npush)]
%define dst_pitch dword [esp+4*(16+npush)]
%define dst_startx dword [esp+4*(17+npush)]
%define dst_starty dword [esp+4*(18+npush)]
%define width dword [esp+4*(19+npush)]
%define height dword [esp+4*(20+npush)]
push ebp
push esi
push edi
push ecx
push ebx
;; tmp on stack
%assign ntmps 16
%assign npush (5+ntmps)
sub esp, ntmps*4
%define yuvay dword [esp + 0*4]
%define yuvau dword [esp + 1*4]
%define yuvaoffsetv dword [esp + 2*4]
%define yuvaa dword [esp + 3*4]
%define yuvaauv dword [esp + 4*4]
%define yuvauvpitch dword [esp + 5*4]
%define sy dword [esp + 6*4]
%define su dword [esp + 7*4]
%define soffsetv dword [esp + 8*4]
%define suvpitch dword [esp + 9*4]
%define dy dword [esp + 10*4]
%define du dword [esp + 11*4]
%define doffsetv dword [esp + 12*4]
%define duvpitch dword [esp + 13*4]
%define dtmp0 dword [esp + 14*4]
%define dtmp1 dword [esp + 15*4]
;; YV12
;; [ Y Y ]
;; [V]
;; [U]
;; YV12 u v reversed from I420
;; compute reversed order pointers
;; then comtinue with I420 routine
;;-----
mov eax, dst_lines
mov ecx, dst_pitch
mov edx, eax
imul eax, ecx ;; pitch*lines
shr ecx, 1 ;; pitch/2
mov duvpitch, ecx
mov esi, dst
mov dy, esi
add esi, eax
shr edx, 1 ;; lines/2
imul edx, ecx ;; (pitch/2)*(lines/2)
add esi, edx
mov du, esi
neg edx ;; negative offset from u
mov doffsetv, edx
;;-----
jmp I420andYUVAtoI420_MMX_entry
;_I420andYUVAtoYV12_MMX endp
;=============================================================
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; I420andYUVAtoI420
;;
;; This function alpha-blends two I420 buffers into a third
;; I420 buffer using the alpha info tacked to the
;; end of the second I420 buffer
;;
;; yuva = top
;; inverted alpha
;; uv size computed as: uvpitch*uvlines = (pitch/2)*(lines/2)
;;
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
_I420andYUVAtoI420_MMX:
;;
;; int I420andYUVAtoI420_MMX(
;; unsigned char* src, int src_pels, int src_lines, int src_pitch
;; int src_startx, int src_starty;
;; unsigned char* yuva, int yuva_pels, int yuva yuva_lines, int yuva_pitch
;; int yuva_startx, int yuva_starty;
;; unsigned char* dst, int dst_pels, int dst_lines, int dst_pitch,
;; int dst_startx, int dst_starty,
;; int width, int height);
;;
;%define exit _I420andYUVAtoI420_MMX_exit
;%define fail_exit _I420andYUVAtoI420_MMX_fail_exit
;%define a100 _I420andYUVAtoI420_MMX_a100
;%define a200 _I420andYUVAtoI420_MMX_a200
;%define a300 _I420andYUVAtoI420_MMX_a300
;%define a400 _I420andYUVAtoI420_MMX_a400
make_labels _I420andYUVAtoI420_MMX
;; arguments
%define src dword [esp+4*(1+npush)]
%define src_pels dword [esp+4*(2+npush)]
%define src_lines dword [esp+4*(3+npush)]
%define src_pitch dword [esp+4*(4+npush)]
%define src_startx dword [esp+4*(5+npush)]
%define src_starty dword [esp+4*(6+npush)]
%define yuva dword [esp+4*(7+npush)]
%define yuva_pels dword [esp+4*(8+npush)]
%define yuva_lines dword [esp+4*(9+npush)]
%define yuva_pitch dword [esp+4*(10+npush)]
%define yuva_startx dword [esp+4*(11+npush)]
%define yuva_starty dword [esp+4*(12+npush)]
%define dst dword [esp+4*(13+npush)]
%define dst_pels dword [esp+4*(14+npush)]
%define dst_lines dword [esp+4*(15+npush)]
%define dst_pitch dword [esp+4*(16+npush)]
%define dst_startx dword [esp+4*(17+npush)]
%define dst_starty dword [esp+4*(18+npush)]
%define width dword [esp+4*(19+npush)]
%define height dword [esp+4*(20+npush)]
push ebp
push esi
push edi
push ecx
push ebx
;; tmp on stack
%assign ntmps 16
%assign npush (5+ntmps)
sub esp, ntmps*4
%define yuvay dword [esp + 0*4]
%define yuvau dword [esp + 1*4]
%define yuvaoffsetv dword [esp + 2*4]
%define yuvaa dword [esp + 3*4]
%define yuvaauv dword [esp + 4*4]
%define yuvauvpitch dword [esp + 5*4]
%define sy dword [esp + 6*4]
%define su dword [esp + 7*4]
%define soffsetv dword [esp + 8*4]
%define suvpitch dword [esp + 9*4]
%define dy dword [esp + 10*4]
%define du dword [esp + 11*4]
%define doffsetv dword [esp + 12*4]
%define duvpitch dword [esp + 13*4]
%define dtmp0 dword [esp + 14*4]
%define dtmp1 dword [esp + 15*4]
%define btmp0(x) byte [esp + 14*4 + x]
%define btmp1(x) byte [esp + 15*4 + x]
;;-----
mov eax, dst_lines
mov ecx, dst_pitch
mov edx, eax
imul eax, ecx ;; pitch*lines
shr ecx, 1 ;; pitch/2
mov duvpitch, ecx
mov esi, dst
mov dy, esi
add esi, eax
mov du, esi
shr edx, 1 ;; lines/2
imul edx, ecx ;; (pitch/2)*(lines/2)
mov doffsetv, edx
;;-----
;; entry point for I420andYUVAtoYV12_MMX_entry
;;
I420andYUVAtoI420_MMX_entry:
;;
mov eax, src_lines
mov ecx, src_pitch
mov edx, eax
imul eax, ecx ;; pitch*lines
shr ecx, 1 ;; pitch/2
mov suvpitch, ecx
mov esi, src
mov sy, esi
add esi, eax
mov su, esi
shr edx, 1 ;; lines/2
imul edx, ecx ;; (pitch/2)*(lines/2)
mov soffsetv, edx
;;-----
mov eax, yuva_lines
mov ecx, yuva_pitch
mov edx, eax
imul eax, ecx ;; pitch*lines
shr ecx, 1 ;; pitch/2
mov yuvauvpitch, ecx
mov esi, yuva
mov yuvay, esi
add esi, eax
mov yuvau, esi
shr edx, 1 ;; lines/2
imul edx, ecx ;; (pitch/2)*(lines/2)
mov yuvaoffsetv, edx
lea esi, [esi+edx*2]
mov yuvaa, esi
mov yuvaauv, esi ;; duplicate yuvaa for uv use
;;------------------
;;------------------
;; pointer adjustment to (x,y)
mov ecx, src_pitch
mov eax, src_starty
mov edx, eax
mov ebx, src_startx
imul eax, ecx ;; y*pitch
mov esi, sy ;;
add esi, eax ;; sy + y*pitch
add esi, ebx ;; sy + y*pitch + x
mov sy, esi
shr ecx, 1 ;; pitch/2
shr edx, 1 ;; y/2
imul edx, ecx ;; (y/2)*(pitch/2)
shr ebx, 1 ;; x/2
mov esi, su ;;
add esi, edx ;; su + (y/2)*(pitch/2)
add esi, ebx ;; su + (y/2)*(pitch/2) + x/2
mov su, esi
;;
;; pointer adjustment to (x,y)
mov ecx, dst_pitch
mov eax, dst_starty
mov edx, eax
mov ebx, dst_startx
imul eax, ecx ;; y*pitch
mov esi, dy ;;
add esi, eax ;; dy + y*pitch
add esi, ebx ;; dy + y*pitch + x
mov dy, esi
shr ecx, 1 ;; pitch/2
shr edx, 1 ;; y/2
imul edx, ecx ;; (y/2)*(pitch/2)
shr ebx, 1 ;; x/2
mov esi, du ;;
add esi, edx ;; du + (y/2)*(pitch/2)
add esi, ebx ;; du + (y/2)*(pitch/2) + x/2
mov du, esi
;;
;; pointer adjustment to (x,y)
mov ecx, yuva_pitch
mov eax, yuva_starty
mov edx, eax
mov ebx, yuva_startx
imul eax, ecx ;; y*pitch
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -