⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 frame_sse2.asm

📁 由bmp生成mpeg2 的I_frame 数据
💻 ASM
📖 第 1 页 / 共 3 页
字号:
;      void __stdcall  yuv422_to_yuy2_sse2(
; [esp + 4] = FRAME                    *top,
; [esp + 8] = FRAME                    *bottom,
; [esp +12] = unsigned char            *out,
; [esp +16] = CONVERSION_PARAMETER     *prm
; )
_yuv422_to_yuy2_sse2@16 PROC
;
;-------------------------------------------------------------------
; 巊梡偡傞儘乕僇儖曄悢
; 
; [esp+ 4] work
; [esp+36] out
; [esp+40] width/16
; [esp+44] width%16*3
; [esp+48] in_step*2
; [esp+52] out_step
; 
; total 88 + 兛
;-------------------------------------------------------------------
; 儗僕僗僞偺戅旔
		push       ebp
		push       edi
		push       esi
		push       edx
		push       ecx
		push       ebx
		push       eax
;-------------------------------------------------------------------
; 儀乕僗億僀儞僞偵僗僞僢僋傾僪儗僗傪婰壇
		mov        ebp, esp
;-------------------------------------------------------------------
; 儘乕僇儖曄悢椞堟偺妋曐
		sub        esp, 88
		and        esp, 0fffffff0h
		sub        esp, 4
;-------------------------------------------------------------------
; 儖乕僾僷儔儊乕僞偺嶌惉
		mov        eax, [ebp+28+4]
		mov        ebx, [ebp+28+8]
		mov        edi, [ebp+28+12]
		mov        edx, [ebp+28+16]
		movd       mm0, [eax+16]
		movq       mm1, [eax+20]
		movd       mm3, [ebx+16]
		movq       mm4, [ebx+20]
		mov        eax, [edx]    ; width
		movd       mm6, [edx+8]  ; in_step
		mov        ebx, [edx+12] ; out_step
		movd       mm7, [edx+16] ; c_offset
		mov        edx, [edx+4]  ; height
		mov        ecx, eax
		mov        [esp+52], ebx ; out_step
		and        eax, 0fh      ; width%16
		shr        ecx, 4        ; width/16
		punpckldq  mm6, mm6
		punpckldq  mm7, mm7
		mov        ebx, eax
		paddd      mm3, mm6      ; bottom y + in_step
		paddd      mm4, mm6      ; bottom uv + in_step
		paddd      mm1, mm7      ; top uv + c_offset
		shl        eax, 1
		paddd      mm4, mm7      ; bottom uv + in_step + c_offset
		pslld      mm6, 1        ; in_step * 2
		add        eax, ebx      ; width%16*3
		movq       mm2, mm1
		movq       mm5, mm4
		psrlq      mm2, 32
		psrlq      mm5, 32
		mov        [esp+36], edi
		mov        [esp+40], ecx
		mov        [esp+44], eax
		movd       [esp+48], mm6
		movd       esi, mm0
		movd       eax, mm1
		movd       ebx, mm2
;-------------------------------------------------------------------
; 廲曽岦儖乕僾
yuv422_to_yuy2_next_line:
;-------------------------------------------------------------------
; 墶曽岦儖乕僾
yuv422_to_yuy2_next_16_pixel:
;-------------------------------------------------------------------
; 曄姺僐傾
		movq       xmm0, [esi]
		movq       xmm1, [esi+8]
		movd       xmm4, [eax]
		movd       xmm5, [eax+4]
		movd       xmm6, [ebx]
		movd       xmm7, [ebx+4]
		lea        esi, [esi+16]
		lea        eax, [eax+8]
		lea        ebx, [ebx+8]
		punpcklbw  xmm4, xmm6
		punpcklbw  xmm5, xmm7
		punpcklbw  xmm0, xmm4
		punpcklbw  xmm1, xmm5
		movdqu     [edi], xmm0
		movdqu     [edi+16], xmm1
		lea        edi, [edi+32]
;-------------------------------------------------------------------
; 墶曽岦儖乕僾廔抂僠僃僢僋
		dec        ecx
		jnz        yuv422_to_yuy2_next_16_pixel
;-------------------------------------------------------------------
; 抂悢張棟
		mov        ecx, [esp+44]
		test       ecx, ecx
		jz         yuv422_to_yuy2_line_end
; 抂悢張棟僐傾
		movq       xmm0, [esi]
		movq       xmm1, [esi+8]
		movd       xmm4, [eax]
		movd       xmm5, [eax+4]
		movd       xmm6, [ebx]
		movd       xmm7, [ebx+4]
		punpcklbw  xmm4, xmm6
		punpcklbw  xmm5, xmm7
		punpcklbw  xmm0, xmm4
		punpcklbw  xmm1, xmm5
		movdqa     [esp+4], xmm0
		movdqa     [esp+20], xmm1
		lea        esi, [esp+4]
		rep movsb
;-------------------------------------------------------------------
; 廲曽岦儖乕僾廔抂僠僃僢僋
yuv422_to_yuy2_line_end:
		mov        ecx, [esp+48]
		movd       esi, mm0
		movd       eax, mm1
		movd       ebx, mm2
		add        esi, ecx
		add        eax, ecx
		add        ebx, ecx
		mov        ecx, [esp+52]
		mov        edi, [esp+36]
		movq       mm0, mm3
		movq       mm1, mm4
		movq       mm2, mm5
		movd       mm3, esi
		movd       mm4, eax
		movd       mm5, ebx
		movd       esi, mm0
		movd       eax, mm1
		movd       ebx, mm2
		add        edi, ecx
		mov        ecx, [esp+40]
		mov        [esp+36], edi
		dec        edx
		jnz        yuv422_to_yuy2_next_line
;-------------------------------------------------------------------
; 屻巒枛
		mov        esp, ebp

		pop        eax
		pop        ebx
		pop        ecx
		pop        edx
		pop        esi
		pop        edi
		pop        ebp

		ret        16
;
_yuv422_to_yuy2_sse2@16 ENDP
;-------------------------------------------------------------------
; 廔椆

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; yuy2_convert_sse2 - YUY2 僨乕僞偺曄姺峴楍曄峏
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;-------------------------------------------------------------------
PUBLIC              C _yuy2_convert_sse2@16
;      void __stdcall  yuy2_convert_sse2(
; [esp + 4] = unsigned char             *yuy2
; [esp + 8] = int                        step
; [esp +12] = int                        height
; [esp +16] = YUY2_CONVERSION_PARAMETER *prm
; )
_yuy2_convert_sse2@16 PROC
;
;-------------------------------------------------------------------
; 巊梡偡傞儘乕僇儖曄悢
; 
; [esp+16] uv&vv
; [esp+32] work[16]
; [esp+48] abs(step)/16
; [esp+52] p
;-------------------------------------------------------------------
; 儗僕僗僞偺戅旔
;
		push       ebp
		push       eax
		push       ebx
		push       ecx
		push       esi
		push       edi
;-------------------------------------------------------------------
; 儀乕僗億僀儞僞偵僗僞僢僋傾僪儗僗傪婰壇
		mov        ebp, esp
;-------------------------------------------------------------------
; 儘乕僇儖曄悢椞堟偺妋曐
		sub        esp, 56
		and        esp, 0fffffff0h
;-------------------------------------------------------------------
; 曄悢偺僙僢僩傾僢僾
		mov        esi, [ebp+24+4]
		mov        ecx, [ebp+24+8]
		mov        ebx, [ebp+24+12]
		mov        eax, [ebp+24+16]
		movq       xmm5, [eax]
		movq       xmm4, [eax+8]
		movq       xmm3, [eax+16]
		mov        eax, ecx ; abs(step) phase-1
		pcmpeqw    xmm2, xmm2
		pxor       xmm6, xmm6
		pxor       xmm7, xmm7
		sar        ecx, 31  ; abs(step) phase-2
		pshufd     xmm3, xmm3, 01000100b
		pshufd     xmm4, xmm4, 01000100b
		pshufd     xmm5, xmm5, 01000100b
		psrad      xmm3, 2
		psrad      xmm4, 2
		psrad      xmm5, 2
		xor        eax, ecx ; abs(step) phase-3
		psubw      xmm6, xmm2
		psubd      xmm7, xmm2
		sub        eax, ecx ; abs(step) phase-4
		packssdw   xmm3, xmm3
		packssdw   xmm4, xmm4
		packssdw   xmm5, xmm5
		mov        ecx, eax ; copy abs(step)
		psllw      xmm6, 7        ; 128x8
		pslld      xmm7, 12       ; 4096x4
		movdqa     [esp+16], xmm3 ; vu&vv
		shr        ecx, 4   ; abs(step)/16
		and        eax, 15  ; abs(step)%16
		mov        [esp+48], ecx
;-------------------------------------------------------------------
; 廲曽岦儖乕僾
yuy2_convert_sse2_v_head:
		test       ecx,ecx
		jz         yuy2_convert_sse2_h_tail
yuy2_convert_sse2_h_head:
		movdqu     xmm0, [esi]
		movdqu     xmm3, [esi]
		psrlw      xmm0, 8
		psllw      xmm3, 8
		psubw      xmm0, xmm6
		psrlw      xmm3, 8
		movdqa     xmm1, xmm0
		movdqa     xmm2, xmm0
		pmaddwd    xmm0, xmm5
		pmaddwd    xmm1, xmm4
		pmaddwd    xmm2, [esp+16]
		paddd      xmm0, xmm7
		paddd      xmm1, xmm7
		paddd      xmm2, xmm7
		psrad      xmm0, 14
		psrad      xmm1, 14
		psrad      xmm2, 14
		packssdw   xmm0, xmm0 ; YD_YC_YB_YA_YD_YC_YB_YA
		packssdw   xmm1, xmm1 ; VD_VC_VB_VA_VD_VC_VB_VA
		packssdw   xmm2, xmm2 ; UD_UC_UB_UA_UD_UC_UB_UA
		punpcklwd  xmm0, xmm0 ; YD_YD_YC_YC_YB_YB_YA_YA
		punpcklwd  xmm1, xmm2 ; VD_UD_VC_UC_VB_UB_VA_UA
		paddw      xmm0, xmm3 ; YH_YG_YF_YE_YD_YC_YB_YA
		paddw      xmm1, xmm6 ; UV+128
		packuswb   xmm0, xmm0 ; YD_YC_YB_YA_YD_YC_YB_YA
		packuswb   xmm1, xmm1 ; VB_UB_VA_UA_VB_UB_VA_UA
		punpcklbw  xmm0, xmm1 ; VB_YD_UB_YC_VA_YB_UA_YA
		movdqu     [esi], xmm0
		add        esi, 16
		dec        ecx
		jnz        yuy2_convert_sse2_h_head
yuy2_convert_sse2_h_tail:
		test       eax,eax
		jz         yuy2_convert_sse2_h_last
		mov        [esp+52], esi
		lea        edi, [esp+32]
		mov        ecx, eax
		rep movsb
		movdqu     xmm0, [esp+32]
		movdqu     xmm3, [esi+32]
		psrlw      xmm0, 8
		psllw      xmm3, 8
		psubw      xmm0, xmm6
		psrlw      xmm3, 8
		movdqa     xmm1, xmm0
		movdqa     xmm2, xmm0
		pmaddwd    xmm0, xmm5
		pmaddwd    xmm1, xmm4
		pmaddwd    xmm2, [esp+16]
		paddd      xmm0, xmm7
		paddd      xmm1, xmm7
		paddd      xmm2, xmm7
		psrad      xmm0, 14
		psrad      xmm1, 14
		psrad      xmm2, 14
		packssdw   xmm0, xmm0 ; YD_YC_YB_YA_YD_YC_YB_YA
		packssdw   xmm1, xmm1 ; VD_VC_VB_VA_VD_VC_VB_VA
		packssdw   xmm2, xmm2 ; UD_UC_UB_UA_UD_UC_UB_UA
		punpcklwd  xmm0, xmm0 ; YD_YD_YC_YC_YB_YB_YA_YA
		punpcklwd  xmm1, xmm2 ; VD_UD_VC_UC_VB_UB_VA_UA
		paddw      xmm0, xmm3 ; YH_YG_YF_YE_YD_YC_YB_YA
		paddw      xmm1, xmm6 ; UV+128
		packuswb   xmm1, xmm1 ; VB_UB_VA_UA_VB_UB_VA_UA
		packuswb   xmm0, xmm0 ; YD_YC_YB_YA_YD_YC_YB_YA
		punpcklbw  xmm0, xmm1 ; VB_YD_UB_YC_VA_YB_UA_YA
		movdqu     [esp+32], xmm0
		lea        esi, [esp+32]
		mov        edi, [esp+52]
		mov        ecx, eax
		rep        movsb
yuy2_convert_sse2_h_last:
		mov        ecx, [esp+48]
		mov        esi, [ebp+24+4]
		add        esi, [ebp+24+8]
		mov        [ebp+24+4], esi
		dec        ebx
		jnz        yuy2_convert_sse2_v_head
;-------------------------------------------------------------------
; 屻巒枛
		mov        esp, ebp

		pop        edi
		pop        esi
		pop        ecx
		pop        ebx
		pop        eax
		pop        ebp

		ret        16
_yuy2_convert_sse2@16 ENDP
;-------------------------------------------------------------------
; 廔椆

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; _TEXT64 僙僌儊儞僩偺廔椆
END
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -