⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 yuvammx.asm

📁 著名的 helix realplayer 基于手机 symbian 系统的 播放器全套源代码
💻 ASM
📖 第 1 页 / 共 5 页
字号:
%define yuva_lines     dword [esp+4*(9+npush)]
%define yuva_pitch     dword [esp+4*(10+npush)]
%define yuva_startx    dword [esp+4*(11+npush)]
%define yuva_starty    dword [esp+4*(12+npush)]

%define dst            dword [esp+4*(13+npush)]
%define dst_pels       dword [esp+4*(14+npush)]
%define dst_lines      dword [esp+4*(15+npush)]
%define dst_pitch      dword [esp+4*(16+npush)]
%define dst_startx     dword [esp+4*(17+npush)]
%define dst_starty     dword [esp+4*(18+npush)]

%define width          dword [esp+4*(19+npush)]
%define height         dword [esp+4*(20+npush)]

    push    ebp
    push    esi
    push    edi
    push    ecx
    push    ebx

;; tmp on stack
%assign ntmps 15
%assign npush (5+ntmps)
    sub esp, ntmps*4 

%define yuvay        dword [esp + 0*4]
%define yuvau        dword [esp + 1*4]
%define yuvaoffsetv  dword [esp + 2*4]
%define yuvaoffseta  dword [esp + 3*4]
%define negyuvapitch dword [esp + 4*4]
%define yuvayinc     dword [esp + 5*4]
%define yuvauvinc    dword [esp + 6*4]

%define sy           dword [esp + 7*4]
%define su           dword [esp + 8*4]
%define soffsetv     dword [esp + 9*4]
%define syinc        dword [esp + 10*4]
%define suvinc       dword [esp + 11*4]

%define dy           dword [esp + 12*4]
%define width0       dword [esp + 13*4]
%define dyinc        dword [esp + 14*4]


;; 
    mov edi, width
    and edi, -4    ;; width truncated to multiple of 4 (width & ~ 3)
    mov ebp, edi   ;; truncated for address increment computation only
    shr ebp, 1              ;; (width & ~ 3)/2
;;-----
    mov eax, dst
    mov dy, eax

    mov ecx, dst_pitch
    add ecx, ecx
    sub ecx, edi
    sub ecx, edi
    mov dyinc, ecx          ;; inc = 2*pitch - 2*(width & ~ 3)
;;-----
    mov eax, src_lines
    mov ecx, src_pitch
    mov edx, eax
    imul    eax, ecx        ;; pitch*lines

    mov esi, src
    mov sy, esi
    add esi, eax
    mov su, esi

    shr edx, 1              ;; lines/2
    shr ecx, 1              ;; pitch/2
    imul    edx, ecx        ;; (pitch/2)*(lines*2)
    mov soffsetv, edx
    
    sub ecx, ebp            ;; pitch/2 - width/2
    mov suvinc, ecx

    mov ecx, src_pitch
    add ecx, ecx
    sub ecx, edi
    mov syinc, ecx          ;; inc = 2*pitch - (width & ~ 3)
;;-----
    mov eax, yuva_lines
    mov ecx, yuva_pitch
    mov edx, ecx
    neg edx
    mov negyuvapitch, edx

    mov edx, eax            ;; lines
    imul    eax, ecx        ;; pitch*lines
    mov esi, yuva
    mov yuvay, esi
    add esi, eax
    mov yuvau, esi
    shr ecx, 1              ;; pitch/2
    shr edx, 1              ;; lines/2
    imul    edx, ecx        ;; (lines/2)*(pitch/2)
    mov yuvaoffsetv, edx
    lea edx, [eax+edx*2]
    mov yuvaoffseta, edx
    sub ecx, ebp
    mov yuvauvinc, ecx

    mov ecx, yuva_pitch
    add ecx, ecx
    sub ecx, edi
    mov yuvayinc, ecx          ;; inc = 2*pitch - (width & ~ 3)
;;-----
;;------------------
;; pointer adjustment to (x,y)
    mov ecx, src_pitch
    mov eax, src_starty
    mov edx, eax
    mov ebx, src_startx
    imul    eax, ecx        ;; y*pitch
    mov esi, sy             ;;
    add esi, eax            ;;  sy + y*pitch
    add esi, ebx            ;;  sy + y*pitch + x
    mov sy, esi

    shr ecx, 1          ;; pitch/2
    shr edx, 1          ;; y/2
    imul    edx, ecx    ;; (y/2)*(pitch/2)
    shr ebx, 1          ;; x/2
    mov esi, su             ;;
    add esi, edx            ;;  su + (y/2)*(pitch/2)
    add esi, ebx            ;;  su + (y/2)*(pitch/2) + x/2
    mov su, esi
;;
;; pointer adjustment to (x,y)
    mov ecx, yuva_pitch
    mov eax, yuva_starty
    mov edx, eax
    mov ebx, yuva_startx
    imul    eax, ecx        ;; y*pitch
    add eax, ebx            ;; y*pitch + x
    
    mov esi, yuvay          ;;
    add esi, eax            ;;  yuvay + y*pitch + x
    mov yuvay, esi
    
    shr ecx, 1          ;; pitch/2
    shr edx, 1          ;; y/2
    imul    edx, ecx    ;; (y/2)*(pitch/2)
    shr ebx, 1          ;; x/2
    mov esi, yuvau          ;;
    add esi, edx            ;;  yuvau + (y/2)*(pitch/2)
    add esi, ebx            ;;  yuvau + (y/2)*(pitch/2) + x/2
    mov yuvau, esi

;; pointer adjustment to (x,y)
    mov ecx, dst_pitch
    mov eax, dst_starty
    mov ebx, dst_startx
    imul    eax, ecx        ;; y*pitch
    add ebx, ebx            ;; 2*x

    mov esi, dy             ;;
    add esi, eax            ;;  dy + y*pitch
    add esi, ebx            ;;  dy + y*pitch + 2*x
    mov dy, esi
;;-----



;; yuva   P = top
;; [Y01Y02][Y03Y04]
;; [Y11Y12][Y13Y04]
;;   [U1]    [U3]
;;   [V1]    [V3]
;; [A01A02][A03A04]
;; [A11Y12][A13A04]


;; I420 src  Q = bot
;; [Y01Y02][Y03Y04]
;; [Y11Y12][Y13Y04]
;;   [U1]    [U3]
;;   [V1]    [V3]

;; YUY2 dst 
;; [YUYV][YUYV]         ;; byte order [Y0][U][Y1][V]
;; [YUYV][YUYV]
;;-------

    pxor    mm7, mm7
    movq    mm6, [con1]

    ;; set up line loop
    mov eax, height
    dec eax
    mov height, eax
    jle near exit

    mov esi, yuvay
    mov ebx, yuvau
    mov edi, sy
    mov edx, su


y100:               ;; line loop

    mov eax, width
    sub eax, 3
    mov width0, eax
    jle near two_pels


a100:
    ;; do four pels per iteration
    ;;
    mov ecx, yuvaoffseta    
    mov eax, src_pitch
    ;; ------- first line --------
    ;; yuva 4 y's
    movd    mm0, [esi]          ;; p4 p3 p2 p1
    punpcklbw   mm0, mm7        ;; word p4 p3 p2 p1 

    ;; src I420 4 y's
    movq    mm2, [edi]          ;; q4 q3 q2 q1
    punpcklbw mm2, mm7          ;; word q4 q3 q2 q1 
    psubw   mm2, mm0            ;; q-p

    ;; yuva 4 a's
    movd    mm4, [esi+ecx]      ;; a4 a3 a2 a1
    punpcklbw mm4, mm7          ;; word a4 a3 a2 a1

    pmullw  mm2, mm4            ;; word alpha*(q-p)  for y4 y3 y2 y1
    psrlw   mm2, 8              ;; byte alpha*(q-p)  for 0 y4 0 y3 0 y2 0 y1
    paddb   mm2, mm0            ;; blended for 0 y4 0 y3 0 y2 0 y1

    pmaddwd mm4, mm6           ;; line 1 dword a4+a3 a2+a1
    ;; 
    ;;   mm2 = line 1 blended for 0 y4 0 y3 0 y2 0 y1
    ;;   mm4 = line 1 dword ave(a4,a3) ave(a2,a1)
    ;;
    ;; ------ second line ---------
    mov ebp, negyuvapitch
    sub esi, ebp            ;; point to line 2
    ;;add esi, yuva_pitch
    ;;;;add edi, src_pitch

    ;; yuva 4 y's
    movd    mm1, [esi]          ;; p4 p3 p2 p1
    punpcklbw   mm1, mm7        ;; word p4 p3 p2 p1 

    ;; src I420 4 y's
    movd    mm3, [edi+eax]          ;; q4 q3 q2 q1
    punpcklbw mm3, mm7          ;; word q4 q3 q2 q1 
    psubw   mm3, mm1            ;; q-p

    ;; yuva 4 a's
    movd    mm5, [esi+ecx]      ;; a4 a3 a2 a1
    punpcklbw mm5, mm7          ;; word a4 a3 a2 a1

    lea esi, [esi+ebp+4]        ;; back to line 1 and inc
    add edi, 4                  ;; inc sy

    pmullw  mm3, mm5            ;; word alpha*(q-p)  for y4 y3 y2 y1
    psrlw   mm3, 8              ;; byte alpha*(q-p)  for 0 y4 0 y3 0 y2 0 y1
    paddb   mm3, mm1            ;; blended for 0 y4 0 y3 0 y2 0 y1

    pmaddwd mm5, mm6           ;; line 2 dword a4+a3 a2+a1
    ;; 
    packssdw    mm4, mm5        ;; for u v sum a22 a21 a12 a11
    psrlw       mm4, 1          ;; for u v ave  a22 a21 a12 a11
    ;;
    ;;   mm2 = line 1 blended for 0 y4 0 y3 0 y2 0 y1
    ;;   mm3 = line 2 blended for 0 y4 0 y3 0 y2 0 y1 0
    ;;   mm4 = ave for line 2 line 1 =  a22 a21 a12 a11
    ;;
    ;; ----- U V --------
    ;;mov ebx, yuvau
    ;;mov edx, su
    mov ecx, yuvaoffsetv
    mov eax, soffsetv

    ;; --U--
    movd    mm0, [ebx]          ;; x x pu2 pu1
    punpcklwd    mm0, mm0       ;; byte pu2 pu1 pu2 pu1
    punpcklbw    mm0, mm7       ;; word pu2 pu1 pu2 pu1

    movd    mm1, [edx]          ;; x x qu2 qu1
    punpcklwd    mm1, mm1       ;; byte qu2 qu1 qu2 qu1
    punpcklbw    mm1, mm7       ;; word qu2 qu1 qu2 qu1

    psubw   mm1, mm0            ;; qu - pu
    pmullw  mm1, mm4            ;; alpha*(qu-pu)
    psrlw   mm1, 8
    paddb   mm1, mm0            ;; line 2 line 1 blended u2 u1 u2 u1 

    ;; --V--
    movd    mm0, [ebx+ecx]          ;; x x pv2 pv1
    punpcklwd    mm0, mm0       ;; byte pv2 pv1 pv2 pv1
    punpcklbw    mm0, mm7       ;; word pv2 pv1 pv2 pv1

    ;;movd    mm5, [edx+eax]            ;; x x qv2 qv1
    movzx   eax, word [edx+eax]     ;; prevent possible access vio
    movd    mm5, eax            ;;  0 0 qv2 qv1
    punpcklwd    mm5, mm5       ;; byte qv2 qv1 qv2 qv1
    punpcklbw    mm5, mm7       ;; word qv2 qv1 qv2 qv1

    psubw   mm5, mm0            ;; qv - pv

    add ebx, 2                  ;; inc yuvau address
    add edx, 2                  ;; inc su address

    pmullw  mm5, mm4            ;; alpha*(qv-pv)
    psrlw   mm5, 8
    paddb   mm5, mm0            ;; line 2 line 1 blended v2 v1 v2 v1 

    ;;   mm2 = line 1 blended for 0 y4 0 y3 0 y2 0 y1
    ;;   mm3 = line 2 blended for 0 y4 0 y3 0 y2 0 y1
    ;;   mm1 = line 2 line 1 blended 0 u2 0 u1 0 u2 0 u1 
    ;;   mm5 = line 2 line 1 blended 0 v2 0 v1 0 v2 0 v1 
    ;;
    mov ecx, dy
    mov eax, dst_pitch

    packuswb    mm2, mm3        ;; blended y24 y23 y22 y21 y14 y13 y12 y11 

    psllq   mm5, 8              ;;  v2 0 v1 0 v2 0 v1 0
    por mm1, mm5                ;;  v22 u22 v21 u21 v12 u12 v11 u11
    
    movq    mm3, mm2
    punpcklbw   mm3, mm1        ;;  v12 y14 u12 y13 v11 y12 u11 y11

    movq    [ecx], mm3          ;; line 1 result

    punpckhbw   mm2, mm1        ;;  v22 y24 u22 y23 v21 y22 u21 y21

    movq    [ecx+eax], mm2      ;; line 2 result

    add ecx, 8                  ;; inc dy address
    mov dy, ecx

    mov eax, width0             ;; pel loop
    sub eax, 4
    mov width0, eax
    jg  near a100


;;------------------------------
two_pels:
    ;; do two pels if any
    ;; remaining pels = eax+3
    ;; compute 2 pels if remaining pels = 2 or 3, 1 not computed
    add eax, 2
    jle near line_done
    ;;
    ;;
    mov ecx, yuvaoffseta    
    mov eax, src_pitch
    ;; ------- first line --------
    ;; yuva 2 y's
    movd    mm0, [esi]          ;; p2 p1
    punpcklbw   mm0, mm7        ;; word p2 p1 

    ;; src I420 2 y's
    movq    mm2, [edi]          ;; q2 q1
    punpcklbw mm2, mm7          ;; word q2 q1 
    psubw   mm2, mm0            ;; q-p

    ;; yuva 2 a's
    movd    mm4, [esi+ecx]      ;; a2 a1
    punpcklbw mm4, mm7          ;; word a2 a1

    pmullw  mm2, mm4            ;; word alpha*(q-p)  for y2 y1
    psrlw   mm2, 8              ;; byte alpha*(q-p)  for 0 y2 0 y1
    paddb   mm2, mm0            ;; blended for 0 y2 0 y1

    pmaddwd mm4, mm6           ;; line 1 dword a2+a1
    ;; 
    ;;   mm2 = line 1 blended for 0 y2 0 y1
    ;;   mm4 = line 1 ave(a2,a1)
    ;;
    ;; ------ second line ---------
    mov ebp, negyuvapitch
    sub esi, ebp            ;; point to line 2

    ;; yuva 2 y's
    movd    mm1, [esi]          ;; p2 p1
    punpcklbw   mm1, mm7        ;; word p2 p1 

    ;; src I420 2 y's
    movd    mm3, [edi+eax]          ;; q2 q1
    punpcklbw mm3, mm7          ;; word q2 q1 
    psubw   mm3, mm1            ;; q-p

    ;; yuva 2 a's
    ;;movd    mm5, [esi+ecx]      ;; a2 a1
    movzx   eax, word [esi+ecx]     ;; prevent possible access vio
    movd    mm5, eax            ;; a2 a1
    punpcklbw mm5, mm7          ;; word a2 a1

    lea esi, [esi+ebp]        ;; back to line 1,  no inc
    ;;add edi, 4                  ;; inc sy

    pmullw  mm3, mm5            ;; word alpha*(q-p)  for y2 y1
    psrlw   mm3, 8              ;; byte alpha*(q-p)  for 0 y2 0 y1
    paddb   mm3, mm1            ;; blended for 0 y2 0 y1

    pmaddwd mm5, mm6           ;; line 2 dword a2+a1
    ;; 
    packssdw    mm4, mm5        ;; for uv sum xx a21 xx a11
    psrlw   mm4, 1              ;; for uv ave xx a21 xx a11
    ;;
    ;;   mm2 = line 1 blended for 0 y2 0 y1
    ;;   mm3 = line 2 blended for 0 y2 0 y1
    ;;   mm4 = ave =  a21 a11
    ;;
    ;; ----- U V --------
    mov ecx, yuvaoffsetv
    mov eax, soffsetv

    ;; --U--
    ;;movd    mm0, [ebx]          ;; x x x pu1
    ;;punpcklwd    mm0, mm0       ;; byte x pu1 x pu1
    ;;punpcklbw    mm0, mm7       ;; word x pu1 x pu1
    movzx ebp, byte [ebx]
    movd    mm0, ebp                ;; 0 0 0 pu1
    punpckldq   mm0, mm0            ;; word 0 pu1 0 pu1

    ;;movd    mm1, [edx]          ;; x x qu2 qu1
    ;;punpcklwd    mm1, mm1       ;; byte qu2 qu1 qu2 qu1
    ;;punpcklbw    mm1, mm7       ;; word qu2 qu1 qu2 qu1
    movzx   ebp, byte [edx]
    movd    mm1, ebp                ;; 0 0 0 qu1          
    punpckldq   mm1, mm1            ;; word 0 qu1 0 qu1   
    
    psubw   mm1, mm0            ;; qu - pu
    pmullw  mm1, mm4            ;; alpha*(qu-pu)
    psrlw   mm1, 8
    paddb   mm1, mm0            ;; line 2 line 1 blended u2 u1 u2 u1 

    ;; --V--
    ;;movd    mm0, [ebx+ecx]          ;; x x pv2 pv1

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -