⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 yuvammx.asm

📁 著名的 helix realplayer 基于手机 symbian 系统的 播放器全套源代码
💻 ASM
📖 第 1 页 / 共 5 页
字号:
    movzx   ebp, byte [ebx+ecx]     ;; prevent possible access vio
    movd    mm0, ebp            ;; 0 0 0 pv1
    punpckldq    mm0, mm0       ;; word 0 pv1 0 pv1

    ;;movd    mm5, [edx+eax]            ;; qv1
    movzx   eax, byte [edx+eax]     ;; prevent possible access vio
    movd    mm5, eax            ;;  0 0 0 qv1
    punpckldq    mm5, mm5       ;; word 0 qv1 0 qv1

    psubw   mm5, mm0            ;; qv - pv

    ;;add ebx, 2                  ;; inc yuvau address
    ;;add edx, 2                  ;; inc su address

    pmullw  mm5, mm4            ;; alpha*(qv-pv)
    psrlw   mm5, 8
    paddb   mm5, mm0            ;; line 2 line 1 blended v2 v1 v2 v1 

    ;;   mm2 = line 1 blended for 0 y2 0 y1
    ;;   mm3 = line 2 blended for 0 y2 0 y1
    ;;   mm1 = line 2 line 1 blended 0 u2 0 u1 
    ;;   mm5 = line 2 line 1 blended 0 v2 0 v1 
    ;;
    mov ecx, dy
    mov eax, dst_pitch

    packuswb    mm2, mm3        ;; x x y22 y21 x x y12 y11

    psllq   mm5, 8              ;;  v2 0 v1 0 v2 0 v1 0
    por mm1, mm5                ;;  v22 u22 v21 u21 v12 u12 v11 u11
    
    movq    mm3, mm2
    punpcklbw   mm2, mm1        ;;  v11 y12 u11 y11
    movd    [ecx], mm2          ;; line 1 result

    punpckhbw   mm3, mm1        ;;  v21 y22 u21 y21 

    movd    [ecx+eax], mm3      ;; line 2 result

;;-----------
;; line loop
line_done:

;;
;;
    mov eax, yuvayinc      ;; move down two lines
    add esi, eax
    
    mov eax, yuvauvinc
    add ebx, eax
    
    mov eax, syinc
    add edi, eax

    mov eax, suvinc
    add edx, eax
    
    mov eax, dyinc
    mov ecx, dy
    add ecx, eax
    mov dy,  ecx

    mov eax, height
    sub eax, 2
    mov height, eax
    jg  near y100
;;-----------
exit:

    xor eax, eax    ;; return success

    add esp, ntmps*4
    pop ebx
    pop ecx
    pop edi
    pop esi
    pop ebp

    emms

    ret

;_I420andYUVAtoYUY2_MMX endp
;====================================
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;;	I420andYUVAtoUYVY
;;
;;	This function alpha-blends two I420 buffers into a third
;;	UYVY buffer using the alpha info tacked to the 
;;	end of the second I420 buffer
;;
;;  yuva = top
;;  inverted alpha
;;  uv size computed as: uvpitch*uvlines = (pitch/2)*(lines/2)
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
_I420andYUVAtoUYVY_MMX:
;;
;; int I420andYUVAtoUYVY_MMX(
;;    unsigned char* src,  int src_pels,    int src_lines,  int src_pitch,
;;                         int src_startx,  int src_starty,
;;    unsigned char* yuva, int yuva_pels,   int yuva_lines, int yuva_pitch,
;;                         int yuva_startx, int yuva_starty,
;;    unsigned char* dst,  int dst_pels,    int dst_lines,  int dst_pitch,
;;                         int dst_startx,  int dst_starty,
;;    int width,  int height);
;;
;;
make_labels _I420andYUVAtoUYVY_MMX


;; arguments
%define src            dword [esp+4*(1+npush)]
%define src_pels       dword [esp+4*(2+npush)]
%define src_lines      dword [esp+4*(3+npush)]
%define src_pitch      dword [esp+4*(4+npush)]
%define src_startx     dword [esp+4*(5+npush)]
%define src_starty     dword [esp+4*(6+npush)]

%define yuva           dword [esp+4*(7+npush)]
%define yuva_pels      dword [esp+4*(8+npush)]
%define yuva_lines     dword [esp+4*(9+npush)]
%define yuva_pitch     dword [esp+4*(10+npush)]
%define yuva_startx    dword [esp+4*(11+npush)]
%define yuva_starty    dword [esp+4*(12+npush)]

%define dst            dword [esp+4*(13+npush)]
%define dst_pels       dword [esp+4*(14+npush)]
%define dst_lines      dword [esp+4*(15+npush)]
%define dst_pitch      dword [esp+4*(16+npush)]
%define dst_startx     dword [esp+4*(17+npush)]
%define dst_starty     dword [esp+4*(18+npush)]

%define width          dword [esp+4*(19+npush)]
%define height         dword [esp+4*(20+npush)]

    push    ebp
    push    esi
    push    edi
    push    ecx
    push    ebx

;; tmp on stack
%assign ntmps 15
%assign npush (5+ntmps)
    sub esp, ntmps*4 

%define yuvay        dword [esp + 0*4]
%define yuvau        dword [esp + 1*4]
%define yuvaoffsetv  dword [esp + 2*4]
%define yuvaoffseta  dword [esp + 3*4]
%define negyuvapitch dword [esp + 4*4]
%define yuvayinc     dword [esp + 5*4]
%define yuvauvinc    dword [esp + 6*4]

%define sy           dword [esp + 7*4]
%define su           dword [esp + 8*4]
%define soffsetv     dword [esp + 9*4]
%define syinc        dword [esp + 10*4]
%define suvinc       dword [esp + 11*4]

%define dy           dword [esp + 12*4]
%define width0       dword [esp + 13*4]
%define dyinc        dword [esp + 14*4]


    mov edi, width
    and edi, -4         ;; pels truncated to multiple of 4 (width & ~ 3)
    mov ebp, edi
    shr ebp, 1              ;; (width & ~ 3)/2
;;-----
    mov eax, dst
    mov dy, eax

    mov ecx, dst_pitch
    add ecx, ecx
    sub ecx, edi
    sub ecx, edi
    mov dyinc, ecx          ;; inc = 2*pitch - 2*(width & ~ 3)
;;-----
    mov eax, src_lines
    mov ecx, src_pitch
    mov edx, eax
    imul    eax, ecx        ;; pitch*lines

    mov esi, src
    mov sy, esi
    add esi, eax
    mov su, esi

    shr edx, 1              ;; lines/2
    shr ecx, 1              ;; pitch/2
    imul    edx, ecx        ;; (pitch/2)*(lines*2)
    mov soffsetv, edx
    
    sub ecx, ebp            ;; pitch/2 - width/2
    mov suvinc, ecx

    mov ecx, src_pitch
    add ecx, ecx
    sub ecx, edi
    mov syinc, ecx          ;; inc = 2*pitch - (width & ~ 3)
;;-----
    mov eax, yuva_lines
    mov ecx, yuva_pitch
    mov edx, ecx
    neg edx
    mov negyuvapitch, edx

    mov edx, eax            ;; lines
    imul    eax, ecx        ;; pitch*lines
    mov esi, yuva
    mov yuvay, esi
    add esi, eax
    mov yuvau, esi
    shr ecx, 1              ;; pitch/2
    shr edx, 1              ;; lines/2
    imul    edx, ecx        ;; (lines/2)*(pitch/2)
    mov yuvaoffsetv, edx
    lea edx, [eax+edx*2]
    mov yuvaoffseta, edx
    sub ecx, ebp
    mov yuvauvinc, ecx

    mov ecx, yuva_pitch
    add ecx, ecx
    sub ecx, edi
    mov yuvayinc, ecx          ;; inc = 2*pitch - (width & ~ 3)
;;-----
;;------------------
;; pointer adjustment to (x,y)
    mov ecx, src_pitch
    mov eax, src_starty
    mov edx, eax
    mov ebx, src_startx
    imul    eax, ecx        ;; y*pitch
    mov esi, sy             ;;
    add esi, eax            ;;  sy + y*pitch
    add esi, ebx            ;;  sy + y*pitch + x
    mov sy, esi

    shr ecx, 1          ;; pitch/2
    shr edx, 1          ;; y/2
    imul    edx, ecx    ;; (y/2)*(pitch/2)
    shr ebx, 1          ;; x/2
    mov esi, su             ;;
    add esi, edx            ;;  su + (y/2)*(pitch/2)
    add esi, ebx            ;;  su + (y/2)*(pitch/2) + x/2
    mov su, esi
;;
;; pointer adjustment to (x,y)
    mov ecx, yuva_pitch
    mov eax, yuva_starty
    mov edx, eax
    mov ebx, yuva_startx
    imul    eax, ecx        ;; y*pitch
    add eax, ebx            ;; y*pitch + x
    
    mov esi, yuvay          ;;
    add esi, eax            ;;  yuvay + y*pitch + x
    mov yuvay, esi
    
    shr ecx, 1          ;; pitch/2
    shr edx, 1          ;; y/2
    imul    edx, ecx    ;; (y/2)*(pitch/2)
    shr ebx, 1          ;; x/2
    mov esi, yuvau          ;;
    add esi, edx            ;;  yuvau + (y/2)*(pitch/2)
    add esi, ebx            ;;  yuvau + (y/2)*(pitch/2) + x/2
    mov yuvau, esi

;; pointer adjustment to (x,y)
    mov ecx, dst_pitch
    mov eax, dst_starty
    mov ebx, dst_startx
    imul    eax, ecx        ;; y*pitch
    add ebx, ebx            ;; 2*x

    mov esi, dy             ;;
    add esi, eax            ;;  dy + y*pitch
    add esi, ebx            ;;  dy + y*pitch + 2*x
    mov dy, esi
;;-----



;; yuva   P = top
;; [Y01Y02][Y03Y04]
;; [Y11Y12][Y13Y04]
;;   [U1]    [U3]
;;   [V1]    [V3]
;; [A01A02][A03A04]
;; [A11Y12][A13A04]


;; I420 src  Q = bot
;; [Y01Y02][Y03Y04]
;; [Y11Y12][Y13Y04]
;;   [U1]    [U3]
;;   [V1]    [V3]

;; UYVY dst 
;; [UYVY][UYVY]         ;; byte order [U][Y0][V][Y1]
;; [UYVY][UYVY]
;;-------

    pxor    mm7, mm7
    movq    mm6, [con1]

    ;; set up line loop
    mov eax, height  
    dec eax
    mov height, eax
    jle near exit

    mov esi, yuvay
    mov ebx, yuvau
    mov edi, sy
    mov edx, su


y100:               ;; line loop

    mov eax, width
    sub eax, 3
    mov width0, eax
    jle near two_pels


a100:
    ;; do four pels per iteration
    ;;
    mov ecx, yuvaoffseta    
    mov eax, src_pitch
    ;; ------- first line --------
    ;; yuva 4 y's
    movd    mm0, [esi]          ;; p4 p3 p2 p1
    punpcklbw   mm0, mm7        ;; word p4 p3 p2 p1 

    ;; src I420 4 y's
    movq    mm2, [edi]          ;; q4 q3 q2 q1
    punpcklbw mm2, mm7          ;; word q4 q3 q2 q1 
    psubw   mm2, mm0            ;; q-p

    ;; yuva 4 a's
    movd    mm4, [esi+ecx]      ;; a4 a3 a2 a1
    punpcklbw mm4, mm7          ;; word a4 a3 a2 a1

    pmullw  mm2, mm4            ;; word alpha*(q-p)  for y4 y3 y2 y1
    psrlw   mm2, 8              ;; byte alpha*(q-p)  for 0 y4 0 y3 0 y2 0 y1
    paddb   mm2, mm0            ;; blended for 0 y4 0 y3 0 y2 0 y1

    pmaddwd mm4, mm6           ;; line 1 dword a4+a3 a2+a1
    ;; 
    ;;   mm2 = line 1 blended for 0 y4 0 y3 0 y2 0 y1
    ;;   mm4 = line 1 dword ave(a4,a3) ave(a2,a1)
    ;;
    ;; ------ second line ---------
    mov ebp, negyuvapitch
    sub esi, ebp            ;; point to line 2
    ;;add esi, yuva_pitch
    ;;;;add edi, src_pitch

    ;; yuva 4 y's
    movd    mm1, [esi]          ;; p4 p3 p2 p1
    punpcklbw   mm1, mm7        ;; word p4 p3 p2 p1 

    ;; src I420 4 y's
    movd    mm3, [edi+eax]          ;; q4 q3 q2 q1
    punpcklbw mm3, mm7          ;; word q4 q3 q2 q1 
    psubw   mm3, mm1            ;; q-p

    ;; yuva 4 a's
    movd    mm5, [esi+ecx]      ;; a4 a3 a2 a1
    punpcklbw mm5, mm7          ;; word a4 a3 a2 a1

    lea esi, [esi+ebp+4]        ;; back to line 1 and inc
    add edi, 4                  ;; inc sy

    pmullw  mm3, mm5            ;; word alpha*(q-p)  for y4 y3 y2 y1
    psrlw   mm3, 8              ;; byte alpha*(q-p)  for 0 y4 0 y3 0 y2 0 y1
    paddb   mm3, mm1            ;; blended for 0 y4 0 y3 0 y2 0 y1

    pmaddwd mm5, mm6           ;; line 2 dword a4+a3 a2+a1
    ;; 
    packssdw    mm4, mm5        ;; for u v sum a22 a21 a12 a11
    psrlw       mm4, 1          ;; for u v ave  a22 a21 a12 a11
    ;;
    ;;   mm2 = line 1 blended for 0 y4 0 y3 0 y2 0 y1
    ;;   mm3 = line 2 blended for 0 y4 0 y3 0 y2 0 y1 0
    ;;   mm4 = ave for line 2 line 1 =  a22 a21 a12 a11
    ;;
    ;; ----- U V --------
    ;;mov ebx, yuvau
    ;;mov edx, su
    mov ecx, yuvaoffsetv
    mov eax, soffsetv

    ;; --U--
    movd    mm0, [ebx]          ;; x x pu2 pu1
    punpcklwd    mm0, mm0       ;; byte pu2 pu1 pu2 pu1
    punpcklbw    mm0, mm7       ;; word pu2 pu1 pu2 pu1

    movd    mm1, [edx]          ;; x x qu2 qu1
    punpcklwd    mm1, mm1       ;; byte qu2 qu1 qu2 qu1
    punpcklbw    mm1, mm7       ;; word qu2 qu1 qu2 qu1

    psubw   mm1, mm0            ;; qu - pu
    pmullw  mm1, mm4            ;; alpha*(qu-pu)
    psrlw   mm1, 8
    paddb   mm1, mm0            ;; line 2 line 1 blended u2 u1 u2 u1 

    ;; --V--
    movd    mm0, [ebx+ecx]          ;; x x pv2 pv1
    punpcklwd    mm0, mm0       ;; byte pv2 pv1 pv2 pv1
    punpcklbw    mm0, mm7       ;; word pv2 pv1 pv2 pv1

    ;;movd    mm5, [edx+eax]            ;; x x qv2 qv1
    movzx   eax, word [edx+eax]     ;; prevent possible access vio
    movd    mm5, eax            ;;  0 0 qv2 qv1
    punpcklwd    mm5, mm5       ;; byte qv2 qv1 qv2 qv1
    punpcklbw    mm5, mm7       ;; word qv2 qv1 qv2 qv1

    psubw   mm5, mm0            ;; qv - pv

    add ebx, 2                  ;; inc yuvau address
    add edx, 2                  ;; inc su address

    pmullw  mm5, mm4            ;; alpha*(qv-pv)
    psrlw   mm5, 8
    paddb   mm5, mm0            ;; line 2 line 1 blended v2 v1 v2 v1 

    ;;   mm2 = line 1 blended for 0 y4 0 y3 0 y2 0 y1
    ;;   mm3 = line 2 blended for 0 y4 0 y3 0 y2 0 y1
    ;;   mm1 = line 2 line 1 blended 0 u2 0 u1 0 u2 0 u1 
    ;;   mm5 = line 2 line 1 blended 0 v2 0 v1 0 v2 0 v1 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -