📄 yuvammx.asm
字号:
;; ***** BEGIN LICENSE BLOCK *****; Source last modified: $Id: yuvammx.asm,v 1.1.1.1.50.1 2004/07/09 02:00:19 hubbe Exp $; ; Portions Copyright (c) 1995-2004 RealNetworks, Inc. All Rights Reserved.; ; The contents of this file, and the files included with this file,; are subject to the current version of the RealNetworks Public; Source License (the "RPSL") available at; http://www.helixcommunity.org/content/rpsl unless you have licensed; the file under the current version of the RealNetworks Community; Source License (the "RCSL") available at; http://www.helixcommunity.org/content/rcsl, in which case the RCSL; will apply. You may also obtain the license terms directly from; RealNetworks. You may not use this file except in compliance with; the RPSL or, if you have a valid RCSL with RealNetworks applicable; to this file, the RCSL. Please see the applicable RPSL or RCSL for; the rights, obligations and limitations governing use of the; contents of the file.; ; Alternatively, the contents of this file may be used under the; terms of the GNU General Public License Version 2 or later (the; "GPL") in which case the provisions of the GPL are applicable; instead of those above. If you wish to allow use of your version of; this file only under the terms of the GPL, and not to allow others; to use your version of this file under the terms of either the RPSL; or RCSL, indicate your decision by deleting the provisions above; and replace them with the notice and other provisions required by; the GPL. If you do not delete the provisions above, a recipient may; use your version of this file under the terms of any one of the; RPSL, the RCSL or the GPL.; ; This file is part of the Helix DNA Technology. RealNetworks is the; developer of the Original Code and owns the copyrights in the; portions it created.; ; This file, and the files included with this file, is distributed; and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY; KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS; ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES; OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET; ENJOYMENT OR NON-INFRINGEMENT.; ; Technology Compatibility Kit Test Suite(s) Location:; http://www.helixcommunity.org/content/tck; ; Contributor(s):; ; ***** END LICENSE BLOCK *****;;;--------------------------------------------;; yuvammx.asm ;;;; mmx alpha blender routines;;;; NASM 0.98;; target machine = Pentium II;----------------------------------------------------------; Note: Output buffer can be the same as one of the input; buffers for I420 and YV12 output only. Common ; input/output buffer must have same pitch, lines,; width and height.;=========================================================;; data segment should be qword aligned for best performance%define ALIGNMENT%ifdef COFF %define _I420andYUVAtoI420_MMX _I420andYUVAtoI420_MMX %define _I420andYUVAtoYV12_MMX _I420andYUVAtoYV12_MMX %define _I420andYUVAtoYUY2_MMX _I420andYUVAtoYUY2_MMX %define _I420andYUVAtoUYVY_MMX _I420andYUVAtoUYVY_MMX %define _I420andI420toI420_MMX_sub _I420andI420toI420_MMX_sub %error data segment possibly not qword aligned%elifdef WIN32 %define _I420andYUVAtoI420_MMX _I420andYUVAtoI420_MMX %define _I420andYUVAtoYV12_MMX _I420andYUVAtoYV12_MMX %define _I420andYUVAtoYUY2_MMX _I420andYUVAtoYUY2_MMX %define _I420andYUVAtoUYVY_MMX _I420andYUVAtoUYVY_MMX %define _I420andI420toI420_MMX_sub _I420andI420toI420_MMX_sub %define ALIGNMENT align=8%elifdef ELF %define _I420andYUVAtoI420_MMX I420andYUVAtoI420_MMX %define _I420andYUVAtoYV12_MMX I420andYUVAtoYV12_MMX %define _I420andYUVAtoYUY2_MMX I420andYUVAtoYUY2_MMX %define _I420andYUVAtoUYVY_MMX I420andYUVAtoUYVY_MMX %define _I420andI420toI420_MMX_sub I420andI420toI420_MMX_sub %error data segment possibly not qword aligned%elifdef AOUTB %define _I420andYUVAtoI420_MMX I420andYUVAtoI420_MMX %define _I420andYUVAtoYV12_MMX I420andYUVAtoYV12_MMX %define _I420andYUVAtoYUY2_MMX I420andYUVAtoYUY2_MMX %define _I420andYUVAtoUYVY_MMX I420andYUVAtoUYVY_MMX %define _I420andI420toI420_MMX_sub I420andI420toI420_MMX_sub %error data segment possibly not qword aligned%else %error linking format currently not supported %endifglobal _I420andYUVAtoI420_MMXglobal _I420andYUVAtoYV12_MMXglobal _I420andYUVAtoYUY2_MMX global _I420andYUVAtoUYVY_MMX global _I420andI420toI420_MMX_sub %assign CID_I420 0 ;;/* planar YCrCb 4:2:0 format (CCIR) */%assign CID_YV12 1 ;;/* planar YVU 4:2:0 (ATI) */%assign CID_YUY2 3 ;;/* packed YVU 4:2:2 (ATI,MATROX,etc.) */%assign CID_UYVY 4 ;;/* yet another packed 4:2:2 (ATI) */%macro make_labels 1%define exit exit%1%define fail_exit fail_exit%1%define a100 a100%1%define a200 a200%1%define a300 a300%1%define a400 a400%1%define y100 y100%1%define two_pels two_pels%1%define line_done line_done%1%define y_by_fours y_by_fours%1%define y_one_two_three y_one_two_three%1%endmacro;=========================================================segment .data data ALIGNMENTalign 8mask3 db -1, 0, -1, 0, -1, 0, -1, 0mask3b db 0, -1, 0, -1, 0, -1, 0, -1con1 dw 1, 1, 1, 1;;_DATA ENDS;=========================================================;=========================================================segment .text code ;;==========================================================;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; I420andYUVAtoYV12;;;; This function alpha-blends two I420 buffers into a third;; YV12 buffer using the alpha info tacked to the ;; end of the second I420 buffer;;;; yuva = top;; inverted alpha;; uv size computed as: uvpitch*uvlines = (pitch/2)*(lines/2);;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;_I420andYUVAtoYV12_MMX:;;;; int I420andYUVAtoYV12_MMX(;; unsigned char* src, int src_pels, int src_lines, int src_pitch;; int src_startx, int src_starty;;; unsigned char* yuva, int yuva_pels, int yuva yuva_lines, int yuva_pitch;; int yuva_startx, int yuva_starty;;; unsigned char* dst, int dst_pels, int dst_lines, int dst_pitch,;; int dst_startx, int dst_starty,;; int width, int height);;;;; arguments%define src dword [esp+4*(1+npush)]%define src_pels dword [esp+4*(2+npush)]%define src_lines dword [esp+4*(3+npush)]%define src_pitch dword [esp+4*(4+npush)]%define src_startx dword [esp+4*(5+npush)]%define src_starty dword [esp+4*(6+npush)]%define yuva dword [esp+4*(7+npush)]%define yuva_pels dword [esp+4*(8+npush)]%define yuva_lines dword [esp+4*(9+npush)]%define yuva_pitch dword [esp+4*(10+npush)]%define yuva_startx dword [esp+4*(11+npush)]%define yuva_starty dword [esp+4*(12+npush)]%define dst dword [esp+4*(13+npush)]%define dst_pels dword [esp+4*(14+npush)]%define dst_lines dword [esp+4*(15+npush)]%define dst_pitch dword [esp+4*(16+npush)]%define dst_startx dword [esp+4*(17+npush)]%define dst_starty dword [esp+4*(18+npush)]%define width dword [esp+4*(19+npush)]%define height dword [esp+4*(20+npush)] push ebp push esi push edi push ecx push ebx;; tmp on stack%assign ntmps 16%assign npush (5+ntmps) sub esp, ntmps*4 %define yuvay dword [esp + 0*4]%define yuvau dword [esp + 1*4]%define yuvaoffsetv dword [esp + 2*4]%define yuvaa dword [esp + 3*4]%define yuvaauv dword [esp + 4*4]%define yuvauvpitch dword [esp + 5*4]%define sy dword [esp + 6*4]%define su dword [esp + 7*4]%define soffsetv dword [esp + 8*4]%define suvpitch dword [esp + 9*4]%define dy dword [esp + 10*4]%define du dword [esp + 11*4]%define doffsetv dword [esp + 12*4]%define duvpitch dword [esp + 13*4]%define dtmp0 dword [esp + 14*4]%define dtmp1 dword [esp + 15*4];; YV12;; [ Y Y ];; [V];; [U] ;; YV12 u v reversed from I420;; compute reversed order pointers ;; then comtinue with I420 routine;;----- mov eax, dst_lines mov ecx, dst_pitch mov edx, eax imul eax, ecx ;; pitch*lines shr ecx, 1 ;; pitch/2 mov duvpitch, ecx mov esi, dst mov dy, esi add esi, eax shr edx, 1 ;; lines/2 imul edx, ecx ;; (pitch/2)*(lines/2) add esi, edx mov du, esi neg edx ;; negative offset from u mov doffsetv, edx;;----- jmp I420andYUVAtoI420_MMX_entry;_I420andYUVAtoYV12_MMX endp;=============================================================;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; I420andYUVAtoI420;;;; This function alpha-blends two I420 buffers into a third;; I420 buffer using the alpha info tacked to the ;; end of the second I420 buffer;;;; yuva = top;; inverted alpha;; uv size computed as: uvpitch*uvlines = (pitch/2)*(lines/2);;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;_I420andYUVAtoI420_MMX:;;;; int I420andYUVAtoI420_MMX(;; unsigned char* src, int src_pels, int src_lines, int src_pitch;; int src_startx, int src_starty;;; unsigned char* yuva, int yuva_pels, int yuva yuva_lines, int yuva_pitch;; int yuva_startx, int yuva_starty;;; unsigned char* dst, int dst_pels, int dst_lines, int dst_pitch,;; int dst_startx, int dst_starty,;; int width, int height);;;;%define exit _I420andYUVAtoI420_MMX_exit;%define fail_exit _I420andYUVAtoI420_MMX_fail_exit;%define a100 _I420andYUVAtoI420_MMX_a100;%define a200 _I420andYUVAtoI420_MMX_a200;%define a300 _I420andYUVAtoI420_MMX_a300;%define a400 _I420andYUVAtoI420_MMX_a400make_labels _I420andYUVAtoI420_MMX;; arguments%define src dword [esp+4*(1+npush)]%define src_pels dword [esp+4*(2+npush)]%define src_lines dword [esp+4*(3+npush)]%define src_pitch dword [esp+4*(4+npush)]%define src_startx dword [esp+4*(5+npush)]%define src_starty dword [esp+4*(6+npush)]%define yuva dword [esp+4*(7+npush)]%define yuva_pels dword [esp+4*(8+npush)]%define yuva_lines dword [esp+4*(9+npush)]%define yuva_pitch dword [esp+4*(10+npush)]%define yuva_startx dword [esp+4*(11+npush)]%define yuva_starty dword [esp+4*(12+npush)]%define dst dword [esp+4*(13+npush)]%define dst_pels dword [esp+4*(14+npush)]%define dst_lines dword [esp+4*(15+npush)]%define dst_pitch dword [esp+4*(16+npush)]%define dst_startx dword [esp+4*(17+npush)]%define dst_starty dword [esp+4*(18+npush)]%define width dword [esp+4*(19+npush)]%define height dword [esp+4*(20+npush)] push ebp push esi push edi push ecx push ebx;; tmp on stack%assign ntmps 16%assign npush (5+ntmps) sub esp, ntmps*4 %define yuvay dword [esp + 0*4]%define yuvau dword [esp + 1*4]%define yuvaoffsetv dword [esp + 2*4]%define yuvaa dword [esp + 3*4]%define yuvaauv dword [esp + 4*4]%define yuvauvpitch dword [esp + 5*4]%define sy dword [esp + 6*4]%define su dword [esp + 7*4]%define soffsetv dword [esp + 8*4]%define suvpitch dword [esp + 9*4]%define dy dword [esp + 10*4]%define du dword [esp + 11*4]%define doffsetv dword [esp + 12*4]%define duvpitch dword [esp + 13*4]%define dtmp0 dword [esp + 14*4]%define dtmp1 dword [esp + 15*4]%define btmp0(x) byte [esp + 14*4 + x]%define btmp1(x) byte [esp + 15*4 + x];;----- mov eax, dst_lines mov ecx, dst_pitch mov edx, eax imul eax, ecx ;; pitch*lines shr ecx, 1 ;; pitch/2 mov duvpitch, ecx mov esi, dst mov dy, esi add esi, eax mov du, esi shr edx, 1 ;; lines/2 imul edx, ecx ;; (pitch/2)*(lines/2) mov doffsetv, edx;;-----;; entry point for I420andYUVAtoYV12_MMX_entry;;I420andYUVAtoI420_MMX_entry:;; mov eax, src_lines mov ecx, src_pitch mov edx, eax imul eax, ecx ;; pitch*lines shr ecx, 1 ;; pitch/2 mov suvpitch, ecx mov esi, src mov sy, esi add esi, eax mov su, esi shr edx, 1 ;; lines/2 imul edx, ecx ;; (pitch/2)*(lines/2) mov soffsetv, edx;;----- mov eax, yuva_lines mov ecx, yuva_pitch mov edx, eax imul eax, ecx ;; pitch*lines shr ecx, 1 ;; pitch/2 mov yuvauvpitch, ecx mov esi, yuva mov yuvay, esi add esi, eax mov yuvau, esi shr edx, 1 ;; lines/2 imul edx, ecx ;; (pitch/2)*(lines/2) mov yuvaoffsetv, edx lea esi, [esi+edx*2] mov yuvaa, esi mov yuvaauv, esi ;; duplicate yuvaa for uv use;;------------------;;------------------;; pointer adjustment to (x,y) mov ecx, src_pitch mov eax, src_starty mov edx, eax mov ebx, src_startx imul eax, ecx ;; y*pitch mov esi, sy ;; add esi, eax ;; sy + y*pitch add esi, ebx ;; sy + y*pitch + x mov sy, esi shr ecx, 1 ;; pitch/2 shr edx, 1 ;; y/2 imul edx, ecx ;; (y/2)*(pitch/2) shr ebx, 1 ;; x/2 mov esi, su ;; add esi, edx ;; su + (y/2)*(pitch/2) add esi, ebx ;; su + (y/2)*(pitch/2) + x/2 mov su, esi;;;; pointer adjustment to (x,y) mov ecx, dst_pitch mov eax, dst_starty mov edx, eax mov ebx, dst_startx imul eax, ecx ;; y*pitch mov esi, dy ;; add esi, eax ;; dy + y*pitch add esi, ebx ;; dy + y*pitch + x mov dy, esi shr ecx, 1 ;; pitch/2 shr edx, 1 ;; y/2 imul edx, ecx ;; (y/2)*(pitch/2) shr ebx, 1 ;; x/2 mov esi, du ;; add esi, edx ;; du + (y/2)*(pitch/2) add esi, ebx ;; du + (y/2)*(pitch/2) + x/2 mov du, esi;; ;; pointer adjustment to (x,y) mov ecx, yuva_pitch mov eax, yuva_starty mov edx, eax mov ebx, yuva_startx imul eax, ecx ;; y*pitch add eax, ebx ;; y*pitch + x
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -