⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 yuvammx.asm

📁 著名的 helix realplayer 基于手机 symbian 系统的 播放器全套源代码
💻 ASM
📖 第 1 页 / 共 5 页
字号:
    add eax, ebx            ;; y*pitch + x
    
    mov esi, yuvay          ;;
    add esi, eax            ;;  yuvay + y*pitch + x
    mov yuvay, esi
    
    mov edi, yuvaa          ;;
    add edi, eax            ;;  yuvaa + y*pitch + x
    mov yuvaa, edi
    mov yuvaauv, edi        ;; dup for uv use

    shr ecx, 1          ;; pitch/2
    shr edx, 1          ;; y/2
    imul    edx, ecx    ;; (y/2)*(pitch/2)
    shr ebx, 1          ;; x/2
    mov esi, yuvau             ;;
    add esi, edx            ;;  yuvau + (y/2)*(pitch/2)
    add esi, ebx            ;;  yuvau + (y/2)*(pitch/2) + x/2
    mov yuvau, esi
    
;;-----



;; yuva   P = top
;; [Y01Y02][Y03Y04]
;; [Y11Y12][Y13Y04]
;;   [U1]    [U3]
;;   [V1]    [V3]
;; [A01A02][A03A04]
;; [A11A12][A13A04]


;; I420 src  Q = bot
;; [Y01Y02][Y03Y04]
;; [Y11Y12][Y13Y04]
;;   [U1]    [U3]
;;   [V1]    [V3]

;; I420 dst
;; [Y01Y02][Y03Y04]
;; [Y11Y12][Y13Y04]
;;   [U1]    [U3]
;;   [V1]    [V3]



;;-------
    mov esi, yuvay
    mov ecx, yuvaa
    mov edi, sy
    mov ebx, dy
    mov ebp, height      ;; y lines loop counter
    or  ebp, ebp
    jle near exit
y100:
    mov eax, width
    sub eax, 7
    jle  y_by_fours

    movq    mm7, [mask3]
    movq    mm6, [mask3b]
a100:
    ;; yuva 8 y's
    movq    mm0, [esi]          ;; p8 p7 p6 p5 p4 p3 p2 p1
    movq    mm1, mm0            ;; p8 p7 p6 p5 p4 p3 p2 p1
    psrlw   mm0, 8              ;; word p8 p6 p4 p2 

    ;; src I420 8 y's
    movq    mm2, [edi]          ;; q8 q7 q6 q5 q4 q3 q2 q1
    movq    mm3, mm2            ;; q8 q7 q6 q5 q4 q3 q2 q1
    psrlw   mm2, 8              ;; word q8 q6 q4 q2 
    psubw   mm2, mm0            ;; q-p

    ;; yuva 8 a's
    movq    mm4, [ecx]          ;; a8 a7 a6 a5 a4 a3 a2 a1
    movq    mm5, mm4            ;; a8 a7 a6 a5 a4 a3 a2 a1
    psrlw   mm4, 8              ;; word a8 a6 a4 a2

    pmullw  mm2, mm4            ;; word alpha*(q-p)  for y8 y6 y4 y2
    pand    mm2, mm6            ;; word hi-byte alpha*(q-p) for y8 y6 y4 y2
    ;;
    ;;
    movq    mm0, mm1            ;; p8 p7 p6 p5 p4 p3 p2 p1
    pand    mm1, mm7            ;; word p7 p5 p3 p1
    pand    mm3, mm7            ;; word q7 q5 q3 q1
    
    psubw   mm3, mm1            ;; q-p
    pand    mm5, mm7            ;; word a7 a5 a3 a1
    pmullw  mm3, mm5
    psrlw   mm3, 8              ;; alpha*(q-p) >> 8 for y7 y5 y3 y1
    
    por     mm2, mm3            ;; alpha*(q-p) >> 8 for y8 y7 y6 y5 y4 y3 y2 y1
    paddb   mm2, mm0            ;; blended y8 y7 y6 y5 y4 y3 y2 y1
    
    movq    [ebx], mm2
    
    add esi, 8
    add edi, 8
    add ecx, 8
    add ebx, 8
    sub eax, 8
    jg  a100
;;----------------------
y_by_fours:
    pxor    mm7, mm7        ;; set to zero for unpack
    add eax, 4
    jle  y_one_two_three
a200:
    ;; yuva 4 y's
    movd    mm0, [esi]          ;; byte p4 p3 p2 p1
    punpcklbw mm0, mm7          ;; p4 p3 p2 p1

    ;; src I420 4 y's
    movd    mm2, [edi]          ;; byte  q4 q3 q2 q1
    punpcklbw mm2, mm7          ;; q4 q3 q2 q1
    psubw   mm2, mm0            ;; q-p

    ;; yuva 4 a's
    movd    mm4, [ecx]          ;; byte a4 a3 a2 a1
    punpcklbw mm4, mm7          ;; a4 a3 a2 a1

    pmullw  mm2, mm4
    psrlw   mm2, 8              ;; alpha*(q-p) >> 8 for y4 y3 y2 y1
    paddb   mm2, mm0            ;; blended word y4 y3 y2 y1
    
    packuswb    mm2, mm7        ;; blended byte y4 y3 y2 y1 
    ;;
    movd    [ebx], mm2

    add esi, 4
    add edi, 4
    add ecx, 4
    add ebx, 4
    sub eax, 4      ;; not needed if doing by_eights
    jg  a200        ;; not needed if doing by_eights
;;
;;
;;----------------------
;; do one, two, or three odd bytes
y_one_two_three:
    add eax, 3      ;; number bytes to do
    jle  line_done
    mov edx, 4
    sub edx, eax    ;; 4-eax = numbers bytes offset
    ;;
    movd mm6, edx   ;; number bytes to shift
    psllq   mm6, 3              ;; number bits to shift 
    ;;
    ;; pointer adjustment, negative offset
    neg edx     
;;
;;
    ;; yuva 1-3 y's
    movd    mm0, [esi]          ;; byte xx p3 p2 p1
    punpcklbw mm0, mm7          ;; xx p3 p2 p1

    ;; src I420 1-3 y's
    movd    mm2, [edi]          ;; byte  xx q3 q2 q1
    punpcklbw mm2, mm7          ;; xx p3 p2 p1
    psubw   mm2, mm0            ;; q-p

    ;; yuva 1-3 a's
    movd    mm4, [ecx+edx]      ;; byte a3 a2 a1 xx
    psrlq   mm4, mm6            ;; byte xx a3 a2 a1 
    punpcklbw mm4, mm7          ;; xx a3 a2 a1

    pmullw  mm2, mm4
    psrlw   mm2, 8              ;; alpha*(q-p) >> 8 for xx y3 y2 y1
    paddb   mm2, mm0            ;; blended word xx y3 y2 y1
    
    packuswb    mm2, mm7        ;; blended byte xx y3 y2 y1 
    ;;
    movd    dtmp0, mm2

    ;; store result, byte by byte
    ;; eax = bytes to do
a300:
    mov dl, btmp0(eax-1)
    mov [ebx+eax-1], dl
    dec eax
    jg  a300
;;------
line_done:

    mov eax, yuva_pitch
    mov esi, yuvay
    add esi, eax
    mov yuvay, esi

    mov ecx, yuvaa
    add ecx, eax
    mov yuvaa, ecx
        
    mov eax, src_pitch
    mov edi, sy
    add edi, eax
    mov sy, edi
    
    mov eax, dst_pitch
    mov ebx, dy
    add ebx, eax
    mov dy, ebx

    dec ebp
    jg  near y100        ;; line loop
;;-----------------------------------------------
;;-----------------------------------------------
;; do u and v
;;
    mov esi, yuvau
    mov ecx, yuvaauv
    add ecx, yuva_pitch    ;; use a's from second line
    mov yuvaauv, ecx       ;; save for uv line loop
    mov edi, su
    mov ebx, du

    mov ebp, height      ;; uv lines loop counter
    shr ebp, 1
    jle near exit
uv100:
    mov eax, width
    shr eax, 1      ;; u v width = width/2
    sub eax, 3
    jle  near uv_one_two_three

b200:
    ;; yuva 4 u's
    movd    mm0, [esi]          ;; byte pu4 pu3 pu2 pu1
    punpcklbw mm0, mm7          ;; pu4 pu3 pu2 pu1

    ;; src I420 4 u's
    movd    mm2, [edi]          ;; byte  qu4 qu3 qu2 qu1
    punpcklbw mm2, mm7          ;; qu4 qu3 qu2 qu1
    psubw   mm2, mm0            ;; q-p

    ;; yuva 8 a's
    ;; We need to average the alpha values by 2's. In the C version
    ;; In the C version we do the average by the whole 2x2 block.
    ;;movq    mm4, [ecx]          ;; byte a8 a7 a6 a5 a4 a3 a2 a1
    ;;psrlw   mm4, 8              ;; word a8 a6 a4 a2
    movq    mm4, [ecx]
    movq    mm7, mm4          ;; mm7=mm4= a8 a7 a6 a5 a4 a3 a2 a1
    psllw   mm7, 8            ;; mm7    = a7 00 a5 00 a3 00 a1 00
    psrlw   mm4, 8            ;; mm4    = 00 a8 00 a6 00 a4 00 a2 
    psrlw   mm7, 8            ;; mm7    = 00 a7 00 a5 00 a3 00 a1
    paddw   mm4, mm7          ;; mm4    = (a8+a7) (a6+a5) (a4+a3) (a2+a1)
    pxor    mm7, mm7          ;; mm7    = 0...0
    psrlw   mm4, 1            ;; mm4    = (a8+a7)/2 (a6+a5)/2 (a4+a3)/2 (a2+a1)/2

    pmullw  mm2, mm4
    psrlw   mm2, 8             ;; alpha*(q-p) >> 8 for u4 u3 u2 u1
    paddb   mm2, mm0           ;; byte add -> blended word u4 u3 u2 u1
    
    packuswb    mm2, mm7       ;; blended byte u4 u3 u2 u1 

    mov edx, yuvaoffsetv
    ;; yuva 4 v's
    movd    mm0, [esi+edx]      ;; byte pv4 pv3 pv2 pv1
    punpcklbw mm0, mm7          ;; pv4 pv3 pv2 pv1

    mov edx, soffsetv
    ;; src I420 4 v's
    movd    mm3, [edi+edx]      ;; byte  qv4 qv3 qv2 qv1
    punpcklbw mm3, mm7          ;; qv4 qv3 qv2 qv1
    psubw   mm3, mm0            ;; q-p

    pmullw  mm3, mm4
    psrlw   mm3, 8              ;; alpha*(q-p) >> 8 for v4 v3 v2 v1
    paddb   mm3, mm0            ;; blended word v4 v3 v2 v1
    
    packuswb    mm3, mm7        ;; blended byte v4 v3 v2 v1 

    movd    [ebx], mm2          ;; postpone store for src buf = dest buf
    mov edx, doffsetv
    movd    [ebx+edx], mm3

    add esi, 4
    add edi, 4
    add ecx, 8
    add ebx, 4
    sub eax, 4
    jg  near b200
;;----------------------
;; do one, two, or three odd bytes
;; reads are unaligned
uv_one_two_three:
    add eax, 3
    jle  near uv_line_done
    mov edx, 4
    sub edx, eax    ;; 4-eax = numbers bytes offset
    ;;
    movd mm6, edx   ;; number bytes to shift
    psllq   mm6, 3              ;; number bits to shift 
    ;;
    ;; pointer adjustment, negative offset
    neg edx
    add esi, edx
    add edi, edx
    lea ecx, [ecx+edx*2]
;;
    ;; yuva 1-3 u's
    movd    mm0, [esi]      ;; byte pu3 pu2 pu1 xxx
    punpcklbw mm0, mm7          ;; pu3 pu2 pu1 xxx

    ;; src I420 1-3 u's
    movd    mm2, [edi]      ;; byte qu3 qu2 qu1 xxx
    punpcklbw mm2, mm7          ;; qu3 qu2 qu1 xxx
    psubw   mm2, mm0            ;; q-p

    ;; yuva 1-3 a's
    movq    mm4, [ecx]    ;; byte  a6 a5 a4 a3 a2 a1 xx xx
    psrlw   mm4, 8              ;; word  a6 a4 a2 xx 

    pmullw  mm2, mm4
    psrlw   mm2, 8             ;; alpha*(q-p) >> 8 for u3 u2 u1 xx
    paddb   mm2, mm0           ;; blended word u3 u2 u1 xx
    
    packuswb    mm2, mm7       ;; blended byte u3 u2 u1 xx
    movd    dtmp0, mm2

    mov edx, yuvaoffsetv
    ;; yuva 2 v's
    movd    mm0, [esi+edx]      ;; byte  pv3 pv2 pv1 xxx
    punpcklbw mm0, mm7          ;; pv3 pv2 pv1 xxx

    mov edx, soffsetv
    ;; src I420 1-3 u's
    movd    mm2, [edi+edx]      ;; byte  qv3 qv2 qv1 xx
    punpcklbw mm2, mm7          ;; qv3 qv2 qv1 xx
    psubw   mm2, mm0            ;; q-p

    pmullw  mm2, mm4
    psrlw   mm2, 8              ;; alpha*(q-p) >> 8 for v3 v2 v1 xx
    paddb   mm2, mm0            ;; blended word v3 v2 v1 xx
    
    packuswb    mm2, mm7        ;; blended byte v3 v2 v1 xx
    movd    dtmp1, mm2

    
    mov edx, doffsetv
    ;; store result, byte by byte
    ;; eax = bytes to do
    neg eax
b300:
    mov cl, btmp0(4+eax)   ;; u     ;; ecx trashed
    mov ch, btmp1(4+eax)   ;; v
    mov [ebx], cl                   
    mov [ebx+edx], ch
    inc ebx
    inc eax
    jl  b300
;;------
uv_line_done:

    mov eax, yuvauvpitch
    mov esi, yuvau
    add esi, eax
    mov yuvau, esi

    mov eax, yuva_pitch
    mov ecx, yuvaauv
    lea ecx, [ecx + eax*2]  ;; down two lines
    mov yuvaauv, ecx        ;; save for uv line loop

    mov eax, suvpitch
    mov edi, su
    add edi, eax
    mov su, edi
    
    mov eax, duvpitch
    mov ebx, du
    add ebx, eax
    mov du, ebx

    dec ebp
    jg  near uv100           ;; uv line loop


;;------
exit:
    xor eax, eax    ;; return success
exit2:
    add esp, ntmps*4
    pop ebx
    pop ecx
    pop edi
    pop esi
    pop ebp
    emms
    ret

fail_exit:
    mov eax, -1     ;; signal fail
    jmp exit2


;_I420andYUVAtoI420_MMX endp
;====================================
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;;	I420andYUVAtoYUY2
;;
;;	This function alpha-blends two I420 buffers into a third
;;	YUY2 buffer using the alpha info tacked to the 
;;	end of the second I420 buffer
;;
;;  yuva = top
;;  inverted alpha
;;  uv size computed as: uvpitch*uvlines = (pitch/2)*(lines/2)
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
_I420andYUVAtoYUY2_MMX:
;;
;; int I420andYUVAtoYUY2_MMX(
;;    unsigned char* src,  int src_pels,    int src_lines,  int src_pitch,
;;                         int src_startx,  int src_starty,
;;    unsigned char* yuva, int yuva_pels,   int yuva_lines, int yuva_pitch,
;;                         int yuva_startx, int yuva_starty,
;;    unsigned char* dst,  int dst_pels,    int dst_lines,  int dst_pitch,
;;                         int dst_startx,  int dst_starty,
;;    int width,  int height);
;;


make_labels  _I420andYUVAtoYUY2_MMX

;;
;; arguments
%define src            dword [esp+4*(1+npush)]
%define src_pels       dword [esp+4*(2+npush)]
%define src_lines      dword [esp+4*(3+npush)]
%define src_pitch      dword [esp+4*(4+npush)]
%define src_startx     dword [esp+4*(5+npush)]
%define src_starty     dword [esp+4*(6+npush)]

%define yuva           dword [esp+4*(7+npush)]
%define yuva_pels      dword [esp+4*(8+npush)]

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -