⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 frame_sse2.asm

📁 由bmp生成mpeg2 的I_frame 数据
💻 ASM
📖 第 1 页 / 共 3 页
字号:
		push       eax
;-------------------------------------------------------------------
; 儀乕僗億僀儞僞偵僗僞僢僋傾僪儗僗傪婰壇
		mov        ebp, esp
;-------------------------------------------------------------------
; 儘乕僇儖曄悢椞堟偺妋曐
		sub        esp, 128
		and        esp, 0fffffff0h
;-------------------------------------------------------------------
; 曄姺學悢偺嶌惉
;
; 嶌惉偡傋偒僷儔儊乕僞
;
; y_offset   - mm0
; c_offset   - mm1
; y_gain     - [esp+32]
; half_13    - [esp+48]
; bu         - [esp+64]
; guv        - [esp+80]
; rv         - [esp+96]
; y_src      - esi & mm2
; u_src      - eax & mm3
; v_src      - ebx & mm4
; y_src_next - mm5
; u_src_next - mm6
; v_src_next - mm7
; width/8    - ecx & [esp+112]
; width%8*3  - [esp+116]
; height     - edx
; in_step*2  - [esp+120]
; out_step   - [esp+124]
;
		mov        edx, [ebp+28+16]
		pcmpeqw    mm0, mm0
		pxor       xmm0, xmm0
		pxor       mm1, mm1
		pcmpeqw    xmm1, xmm1
		movd       mm2, [edx+20]  ; y_offset
		movd       xmm2, [edx+24] ; y_gain
		movd       xmm3, [edx+28] ; bu
		movd       xmm4, [edx+32] ; gu
		movd       xmm5, [edx+36] ; gv
		movd       xmm6, [edx+40] ; rv
		psubw      mm1, mm0       ; all 1
		psubd      xmm0, xmm1     ; all 1
		pshufw     mm0, mm2, 0    ; yo_yo_yo_yo
		psllw      mm1, 7         ; c_offset
		movdqa     xmm1, xmm0     ; all 1
		pshufd     xmm2, xmm2, 0  ; yg_yg_yg_yg
		pshufd     xmm3, xmm3, 0  ; bu_bu_bu_bu
		punpckldq  xmm4, xmm5     ; xx_xx_gv_gu
		pshufd     xmm6, xmm6, 0  ; rv_rv_rv_rv
		pslld      xmm0, 2        ; half_3
		pslld      xmm1, 12       ; half_13
		pshufd     xmm4, xmm4, 01000100b
		paddd      xmm2, xmm0
		paddd      xmm3, xmm0
		paddd      xmm4, xmm0
		paddd      xmm6, xmm0
		psrad      xmm2, 3
		psrad      xmm3, 3
		psrad      xmm4, 3
		psrad      xmm6, 3
		packssdw   xmm4, xmm4
		movdqa     [esp+32], xmm2
		movdqa     [esp+48], xmm1
		movdqa     [esp+64], xmm3
		movdqa     [esp+80], xmm4
		movdqa     [esp+96], xmm6
		mov        eax, [edx] ; width
		mov        ecx, [edx] ; width
		movd       mm7, [edx+8] ; in_step
		movd       mm4, [edx+16] ; c_offset
		mov        esi, [ebp+28+4] ; top
		mov        edi, [ebp+28+8] ; bottom
		punpckldq  mm7, mm7
		punpckldq  mm4, mm4
		movd       mm2, [esi+16] ; top y
		movq       mm3, [esi+20] ; top uv
		movd       mm5, [edi+16] ; bottom y
		movq       mm6, [edi+20] ; bottom uv
		shr        ecx, 3
		and        eax, 7
		paddd      mm5, mm7
		mov        ebx, eax
		paddd      mm6, mm7
		shl        eax, 1
		paddd      mm3, mm4
		add        eax, ebx
		paddd      mm6, mm4
		mov        ebx, [edx+12] ; out_step
		mov        edx, [edx+4] ; height
		pslld      mm7, 1
		mov        [esp+112], ecx
		mov        [esp+116], eax
		movd       [esp+120], mm7
		mov        [esp+124], ebx
		movq       mm4, mm3
		movq       mm7, mm6
		psrlq      mm4, 32
		psrlq      mm7, 32
		movd       esi, mm2
		movd       eax, mm3
		movd       ebx, mm4
		mov        edi, [ebp+28+12]
;-------------------------------------------------------------------
; 廲曽岦儖乕僾
yuv422_to_bgr_next_line:
;-------------------------------------------------------------------
; 墶曽岦儖乕僾
yuv422_to_bgr_next_8_pixel:
;-------------------------------------------------------------------
; 曄姺僐傾
;
		movd       xmm0, [esi]
		movd       xmm1, [esi+4]
		movd       xmm2, [eax]
		movd       xmm4, [ebx]
		movq2dq    xmm5, mm0 ; y_offset
		movq2dq    xmm6, mm1 ; c_offset
		pxor       xmm7, xmm7
		lea        esi, [esi+8]
		lea        eax, [eax+4]
		lea        ebx, [ebx+4]
		punpcklbw  xmm0, xmm7
		punpcklbw  xmm1, xmm7
		punpcklbw  xmm2, xmm7
		punpcklbw  xmm4, xmm7
		psubw      xmm0, xmm5
		psubw      xmm1, xmm5
		psubw      xmm2, xmm6
		psubw      xmm4, xmm6
		movdqa     xmm5, [esp+32] ; y_gain
		movdqa     xmm6, [esp+48] ; half_13
		movdqa     xmm3, xmm2
		punpcklwd  xmm0, xmm7
		punpcklwd  xmm1, xmm7
		punpcklwd  xmm2, xmm7
		punpcklwd  xmm3, xmm4
		punpcklwd  xmm4, xmm7
		pmaddwd    xmm0, xmm5
		pmaddwd    xmm1, xmm5
		pmaddwd    xmm2, [esp+64] ; bu
		pmaddwd    xmm3, [esp+80] ; guv
		pmaddwd    xmm4, [esp+96] ; rv
		paddd      xmm0, xmm6
		paddd      xmm1, xmm6		
		movdqa     xmm5, xmm2
		movdqa     xmm6, xmm3
		movdqa     xmm7, xmm4
		punpckldq  xmm2, xmm2
		punpckldq  xmm3, xmm3
		punpckldq  xmm4, xmm4
		punpckhdq  xmm5, xmm5
		punpckhdq  xmm6, xmm6
		punpckhdq  xmm7, xmm7
		paddd      xmm2, xmm0
		paddd      xmm3, xmm0
		paddd      xmm4, xmm0
		paddd      xmm5, xmm1
		paddd      xmm6, xmm1
		paddd      xmm7, xmm1
		psrad      xmm2, 13
		psrad      xmm3, 13
		psrad      xmm4, 13
		psrad      xmm5, 13
		psrad      xmm6, 13
		psrad      xmm7, 13
		packuswb   xmm2, xmm5 ; b
		packuswb   xmm3, xmm6 ; g
		packuswb   xmm4, xmm7 ; r
		movdqa     xmm5, xmm2 ; b'
		movdqa     xmm6, xmm3 ; g'
		movdqa     xmm7, xmm4 ; r'
		psrldq     xmm2, 1
		pslldq     xmm3, 1
		pslldq     xmm4, 1
		por        xmm7, xmm2 ; xAx7x4x1
		por        xmm5, xmm3 ; x9x6x3x0
		por        xmm6, xmm4 ; Bx8x5x2x
		pshufd     xmm7, xmm7, 11011000b ; xAx4x7x1
		pshufd     xmm5, xmm5, 11011000b ; x9x3x6x0
		pshufd     xmm6, xmm6, 11011000b ; Bx5x8x2x
		movdqa     xmm4, xmm5
		psrldq     xmm6, 2 ; xBx5x8x2
		punpcklwd  xmm5, xmm7 ; xx76xx10
		psrldq     xmm4, 8 ; xxxxx9x3
		punpckhwd  xmm7, xmm6 ; xxBAxx54
		punpcklwd  xmm6, xmm4 ; xx98xx32
		movd       [edi], xmm5
		movd       [edi+4], xmm6
		movd       [edi+8], xmm7
		psrldq     xmm5, 8
		psrldq     xmm6, 8
		psrldq     xmm7, 8
		movd       [edi+12], xmm5
		movd       [edi+16], xmm6
		movd       [edi+20], xmm7
		lea        edi, [edi+24]
;-------------------------------------------------------------------
; 墶曽岦儖乕僾廔抂僠僃僢僋
		dec        ecx
		jnz        yuv422_to_bgr_next_8_pixel
;-------------------------------------------------------------------
; 抂悢張棟
		mov        ecx, [esp+116] ; width%8*3
		test       ecx, ecx
		jz         yuv422_to_bgr_line_end
		movd       xmm0, [esi]
		movd       xmm1, [esi+4]
		movd       xmm2, [eax]
		movd       xmm4, [ebx]
		movq2dq    xmm5, mm0 ; y_offset
		movq2dq    xmm6, mm1 ; c_offset
		pxor       xmm7, xmm7
		punpcklbw  xmm0, xmm7
		punpcklbw  xmm1, xmm7
		punpcklbw  xmm2, xmm7
		punpcklbw  xmm4, xmm7
		psubw      xmm0, xmm5
		psubw      xmm1, xmm5
		psubw      xmm2, xmm6
		psubw      xmm4, xmm6
		movdqa     xmm5, [esp+32] ; y_gain
		movdqa     xmm6, [esp+48] ; half_13
		movdqa     xmm3, xmm2
		punpcklwd  xmm0, xmm7
		punpcklwd  xmm1, xmm7
		punpcklwd  xmm2, xmm7
		punpcklwd  xmm3, xmm4
		punpcklwd  xmm4, xmm7
		pmaddwd    xmm0, xmm5
		pmaddwd    xmm1, xmm5
		pmaddwd    xmm2, [esp+64] ; bu
		pmaddwd    xmm3, [esp+80] ; guv
		pmaddwd    xmm4, [esp+96] ; rv
		paddd      xmm0, xmm6
		paddd      xmm1, xmm6		
		movdqa     xmm5, xmm2
		movdqa     xmm6, xmm3
		movdqa     xmm7, xmm4
		punpckldq  xmm2, xmm2
		punpckldq  xmm3, xmm3
		punpckldq  xmm4, xmm4
		punpckhdq  xmm5, xmm5
		punpckhdq  xmm6, xmm6
		punpckhdq  xmm7, xmm7
		paddd      xmm2, xmm0
		paddd      xmm3, xmm0
		paddd      xmm4, xmm0
		paddd      xmm5, xmm1
		paddd      xmm6, xmm1
		paddd      xmm7, xmm1
		psrad      xmm2, 13
		psrad      xmm3, 13
		psrad      xmm4, 13
		psrad      xmm5, 13
		psrad      xmm6, 13
		psrad      xmm7, 13
		packuswb   xmm2, xmm5 ; b
		packuswb   xmm3, xmm6 ; g
		packuswb   xmm4, xmm7 ; r
		movdqa     xmm5, xmm2 ; b'
		movdqa     xmm6, xmm3 ; g'
		movdqa     xmm7, xmm4 ; r'
		pcmpeqw    xmm0, xmm0
		pcmpeqw    xmm1, xmm1
		psrldq     xmm2, 1
		pslldq     xmm3, 1
		pslldq     xmm4, 1
		psrld      xmm0, 16
		pslld      xmm1, 16
		por        xmm7, xmm2 ; xAx7x4x1
		por        xmm5, xmm3 ; x9x6x3x0
		por        xmm6, xmm4 ; Bx8x5x2x
		pshufd     xmm7, xmm7, 11011000b ; xAx4x7x1
		pshufd     xmm5, xmm5, 11011000b ; x9x3x6x0
		pshufd     xmm6, xmm6, 11011000b ; Bx5x8x2x
		movdqa     xmm4, xmm5
		psrldq     xmm6, 2 ; xBx5x8x2
		punpcklwd  xmm5, xmm7 ; xx76xx10
		psrldq     xmm4, 8 ; xxxxx9x3
		punpckhwd  xmm7, xmm6 ; xxBAxx54
		punpcklwd  xmm6, xmm4 ; xx98xx32
		movd       [esp+8], xmm5
		movd       [esp+12], xmm6
		movd       [esp+16], xmm7
		psrldq     xmm5, 8
		psrldq     xmm6, 8
		psrldq     xmm7, 8
		movd       [esp+20], xmm5
		movd       [esp+24], xmm6
		movd       [esp+28], xmm7
		lea        esi, [esp+8]
		rep movsb
;-------------------------------------------------------------------
; 廲曽岦儖乕僾廔抂僠僃僢僋
yuv422_to_bgr_line_end:
		mov        ecx, [esp+120]
		movd       esi, mm2
		movd       eax, mm3
		movd       ebx, mm4
		movq       mm2, mm5
		movq       mm3, mm6
		movq       mm4, mm7
		add        esi, ecx
		add        eax, ecx
		add        ebx, ecx
		mov        ecx, [esp+124]
		mov        edi, [ebp+28+12]
		movd       mm5, esi
		movd       mm6, eax
		movd       mm7, ebx
		movd       esi, mm2
		movd       eax, mm3
		movd       ebx, mm4
		add        edi, ecx
		mov        [ebp+28+12], edi
		mov        ecx,[esp+112]
		dec        edx
		jnz        yuv422_to_bgr_next_line
;-------------------------------------------------------------------
; 屻巒枛
		mov        esp, ebp

		pop        eax
		pop        ebx
		pop        ecx
		pop        edx
		pop        esi
		pop        edi
		pop        ebp

		ret        16
;
_yuv422_to_bgr_sse2@16 ENDP
;-------------------------------------------------------------------
; 廔椆

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; yuv422_to_yuy2_sse2 - YUV -> YUY2 曄姺
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;-------------------------------------------------------------------
PUBLIC              C _yuv422_to_yuy2_sse2@16

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -