📄 frame_mmx.asm
字号:
mov eax, [esp+104]
mov ebx, [esp+108]
mov ecx, [esp+124]
dec edx
jnz yuv422_to_bgr_next_line
;-------------------------------------------------------------------
; 屻巒枛
mov esp, ebp
pop eax
pop ebx
pop ecx
pop edx
pop esi
pop edi
pop ebp
ret 16
;
_yuv422_to_bgr_mmx@16 ENDP
;-------------------------------------------------------------------
; 廔椆
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; yuv422_to_yuy2_mmx - YUV -> YUY2 曄姺
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;-------------------------------------------------------------------
PUBLIC C _yuv422_to_yuy2_mmx@16
; void __stdcall yuv422_to_yuy2_mmx(
; [esp + 4] = FRAME *top,
; [esp + 8] = FRAME *bottom,
; [esp +12] = unsigned char *out,
; [esp +16] = CONVERSION_PARAMETER *prm
; )
_yuv422_to_yuy2_mmx@16 PROC
;
;-------------------------------------------------------------------
; 巊梡偡傞儘乕僇儖曄悢
;
; [esp+ 8] work
; [esp+40] y_src
; [esp+44] y_src_next
; [esp+48] u_src
; [esp+52] v_src
; [esp+56] u_src_next
; [esp+60] v_src_next
; [esp+64] out
; [esp+68] width/16
; [esp+72] width%16*3
; [esp+76] in_step*2
; [esp+80] out_step
;
; total 84 + 兛
;-------------------------------------------------------------------
; 儗僕僗僞偺戅旔
push ebp
push edi
push esi
push edx
push ecx
push ebx
push eax
;-------------------------------------------------------------------
; 儀乕僗億僀儞僞偵僗僞僢僋傾僪儗僗傪婰壇
mov ebp, esp
;-------------------------------------------------------------------
; 儘乕僇儖曄悢椞堟偺妋曐
sub esp, 84
and esp, 0fffffff8h
;-------------------------------------------------------------------
; 儖乕僾僷儔儊乕僞偺嶌惉
mov eax, [ebp+28+4]
mov ebx, [ebp+28+8]
mov edi, [ebp+28+12]
mov edx, [ebp+28+16]
movd mm0, [eax+16]
movd mm1, [eax+20]
movd mm2, [eax+24]
movd mm3, [ebx+16]
movd mm4, [ebx+20]
movd mm5, [ebx+24]
mov eax, [edx] ; width
movd mm6, [edx+8] ; in_step
mov ebx, [edx+12] ; out_step
movd mm7, [edx+16] ; c_offset
mov edx, [edx+4] ; height
mov ecx, eax
mov [esp+80], ebx ; out_step
and eax, 0fh ; width%16
shr ecx, 4 ; width/16
punpckldq mm1, mm2
punpckldq mm4, mm5
punpckldq mm6, mm6
punpckldq mm7, mm7
mov ebx, eax
paddd mm3, mm6 ; bottom y + in_step
paddd mm4, mm6 ; bottom uv + in_step
paddd mm1, mm7 ; top uv + c_offset
shl eax, 1
paddd mm4, mm7 ; bottom uv + in_step + c_offset
punpckldq mm0, mm3 ; y
pslld mm6, 1 ; in_step * 2
add eax, ebx ; width%16*3
movq [esp+40], mm0
movq [esp+48], mm1
movq [esp+56], mm4
mov [esp+64], edi
mov [esp+68], ecx
mov [esp+72], eax
movd [esp+76], mm6
mov esi, [esp+40]
mov eax, [esp+48]
mov ebx, [esp+52]
;-------------------------------------------------------------------
; 廲曽岦儖乕僾
yuv422_to_yuy2_next_line:
;-------------------------------------------------------------------
; 墶曽岦儖乕僾
yuv422_to_yuy2_next_16_pixel:
;-------------------------------------------------------------------
; 曄姺僐傾
movd mm0, [esi]
movd mm1, [esi+4]
movd mm2, [esi+8]
movd mm3, [esi+12]
movd mm4, [eax]
movd mm5, [eax+4]
movd mm6, [ebx]
movd mm7, [ebx+4]
lea esi, [esi+16]
lea eax, [eax+8]
lea ebx, [ebx+8]
psllq mm1, 32
psllq mm3, 32
punpcklbw mm4, mm6 ; v3_u3_v2_u2_v1_u1_v0_u0
punpcklbw mm5, mm7 ; v7_u7_v6_u6_v5_u5_v4_u4
punpcklbw mm0, mm4
punpckhbw mm1, mm4
punpcklbw mm2, mm5
punpckhbw mm3, mm5
movq [edi], mm0
movq [edi+8], mm1
movq [edi+16], mm2
movq [edi+24], mm3
lea edi, [edi+32]
;-------------------------------------------------------------------
; 墶曽岦儖乕僾廔抂僠僃僢僋
dec ecx
jnz yuv422_to_yuy2_next_16_pixel
;-------------------------------------------------------------------
; 抂悢張棟
mov ecx, [esp+72]
test ecx, ecx
jz yuv422_to_yuy2_line_end
; 抂悢張棟僐傾
movd mm0, [esi]
movd mm1, [esi+4]
movd mm2, [esi+8]
movd mm3, [esi+12]
movd mm4, [eax]
movd mm5, [eax+4]
movd mm6, [ebx]
movd mm7, [ebx+4]
lea esi, [esi+16]
lea eax, [eax+8]
lea ebx, [ebx+8]
psllq mm1, 32
psllq mm3, 32
punpcklbw mm4, mm6
punpcklbw mm5, mm7
punpcklbw mm0, mm4
punpckhbw mm1, mm4
punpcklbw mm2, mm5
punpckhbw mm3, mm5
movq [esp+8], mm0
movq [esp+16], mm1
movq [esp+24], mm2
movq [esp+32], mm3
lea esi, [esp+8]
rep movsb
;-------------------------------------------------------------------
; 廲曽岦儖乕僾廔抂僠僃僢僋
yuv422_to_yuy2_line_end:
movq mm0, [esp+40]
movq mm1, [esp+48]
movq mm2, [esp+56]
mov edi, [esp+64]
mov ecx, [esp+68] ; width/16
movd mm3, [esp+76]
add edi, [esp+80] ; out
paddd mm0, mm3
punpckldq mm3, mm3
mov [esp+64], edi ; out
paddd mm1, mm3
movd [esp+44], mm0 ; next y
psrlq mm0, 32
movq [esp+48], mm2 ; current uv
movq [esp+56], mm1 ; next uv
movd esi, mm0 ; current y
mov eax, [esp+48]
mov ebx, [esp+52]
mov [esp+40], esi ; current y
dec edx
jnz yuv422_to_yuy2_next_line
;-------------------------------------------------------------------
; 屻巒枛
mov esp, ebp
pop eax
pop ebx
pop ecx
pop edx
pop esi
pop edi
pop ebp
ret 16
;
_yuv422_to_yuy2_mmx@16 ENDP
;-------------------------------------------------------------------
; 廔椆
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; yuy2_convert_mmx - YUY2 僨乕僞偺曄姺峴楍曄峏
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;-------------------------------------------------------------------
PUBLIC C _yuy2_convert_mmx@16
; void __stdcall yuy2_convert_mmx(
; [esp + 4] = unsigned char *yuy2
; [esp + 8] = int step
; [esp +12] = int height
; [esp +16] = YUY2_CONVERSION_PARAMETER *prm
; )
_yuy2_convert_mmx@16 PROC
;
;-------------------------------------------------------------------
; 巊梡偡傞儘乕僇儖曄悢
;
; [esp+ 8] uv&vv
; [esp+16] work[8]
; [esp+24] abs(step)/8
; [esp+28] p
;-------------------------------------------------------------------
; 儗僕僗僞偺戅旔
;
push ebp
push eax
push ebx
push ecx
push esi
push edi
;-------------------------------------------------------------------
; 儀乕僗億僀儞僞偵僗僞僢僋傾僪儗僗傪婰壇
mov ebp, esp
;-------------------------------------------------------------------
; 儘乕僇儖曄悢椞堟偺妋曐
sub esp, 32
and esp, 0fffffff8h
;-------------------------------------------------------------------
; 曄悢偺僙僢僩傾僢僾
mov esi, [ebp+24+4]
mov ecx, [ebp+24+8]
mov ebx, [ebp+24+12]
mov eax, [ebp+24+16]
movq mm5, [eax]
movq mm4, [eax+8]
movq mm3, [eax+16]
mov eax, ecx ; abs(step) phase-1
pcmpeqw mm2, mm2
pxor mm6, mm6
pxor mm7, mm7
sar ecx, 31 ; abs(step) phase-2
psrad mm3, 2
psrad mm4, 2
psrad mm5, 2
xor eax, ecx ; abs(step) phase-3
psubw mm6, mm2
psubd mm7, mm2
sub eax, ecx ; abs(step) phase-4
packssdw mm3, mm3
packssdw mm4, mm4
packssdw mm5, mm5
mov ecx, eax ; copy abs(step)
psllw mm6, 7 ; 128x4
pslld mm7, 12 ; 4096x2
movq [esp+8], mm3 ; vu&vv
shr ecx, 3 ; abs(step)/8
and eax, 7 ; abs(step)%8
mov [esp+24], ecx
;-------------------------------------------------------------------
; 廲曽岦儖乕僾
yuy2_convert_mmx_v_head:
test ecx,ecx
jz yuy2_convert_mmx_h_tail
yuy2_convert_mmx_h_head:
movq mm0, [esi]
movq mm3, [esi]
psrlw mm0, 8 ; 00VB_00UB_00VA_00UA
psllw mm3, 8 ; YD00_YC00_YB00_YA00
psubw mm0, mm6 ; UV-128
psrlw mm3, 8 ; 00YD_00YC_00YB_00YA
movq mm1, mm0
movq mm2, mm0
pmaddwd mm0, mm5
pmaddwd mm1, mm4
pmaddwd mm2, [esp+8]
paddd mm0, mm7
paddd mm1, mm7
paddd mm2, mm7
psrad mm0, 14
psrad mm1, 14
psrad mm2, 14
packssdw mm0, mm0 ; YB_YA_YB_YA
packssdw mm1, mm1 ; UB_UA_UB_UA
packssdw mm2, mm2 ; VB_VA_VB_VA
punpcklwd mm0, mm0 ; YB_YB_YA_YA
punpcklwd mm1, mm2 ; VB_UB_VA_UA
paddw mm0, mm3 ; YD_YC_YB_YA
paddw mm1, mm6 ; UV+128
packuswb mm0, mm0 ; YD_YC_YB_YA_YD_YC_YB_YA
packuswb mm1, mm1 ; VB_UB_VA_UA_VB_UB_VA_UA
punpcklbw mm0, mm1 ; VB_YD_UB_YC_VA_YB_UA_YA
movq [esi], mm0
add esi, 8
dec ecx
jnz yuy2_convert_mmx_h_head
yuy2_convert_mmx_h_tail:
test eax,eax
jz yuy2_convert_mmx_h_last
mov [esp+28], esi
lea edi, [esp+16]
mov ecx, eax
rep movsb
movq mm0, [esp+16]
movq mm3, [esp+16]
psrlw mm0, 8 ; 00VB_00UB_00VA_00UA
psllw mm3, 8 ; YD00_YC00_YB00_YA00
psubw mm0, mm6 ; UV-128
psrlw mm3, 8 ; 00YD_00YC_00YB_00YA
movq mm1, mm0
movq mm2, mm0
pmaddwd mm0, mm5
pmaddwd mm1, mm4
pmaddwd mm2, [esp+8]
paddd mm0, mm7
paddd mm1, mm7
paddd mm2, mm7
psrad mm0, 14
psrad mm1, 14
psrad mm2, 14
packssdw mm0, mm0 ; YB_YA_YB_YA
packssdw mm1, mm1 ; UB_UA_UB_UA
packssdw mm2, mm2 ; VB_VA_VB_VA
punpcklwd mm0, mm0 ; YB_YB_YA_YA
punpcklwd mm1, mm2 ; VB_UB_VA_UA
paddw mm0, mm3 ; YD_YC_YB_YA
paddw mm1, mm6 ; UV+128
packuswb mm0, mm0 ; YD_YC_YB_YA_YD_YC_YB_YA
packuswb mm1, mm1 ; VB_UB_VA_UA_VB_UB_VA_UA
punpcklbw mm0, mm1 ; VB_YD_UB_YC_VA_YB_UA_YA
movq [esp+16], mm0
lea esi, [esp+16]
mov edi, [esp+28]
mov ecx, eax
rep movsb
yuy2_convert_mmx_h_last:
mov ecx, [esp+24]
mov esi, [ebp+24+4]
add esi, [ebp+24+8]
mov [ebp+24+4], esi
dec ebx
jnz yuy2_convert_mmx_v_head
;-------------------------------------------------------------------
; 屻巒枛
mov esp, ebp
pop edi
pop esi
pop ecx
pop ebx
pop eax
pop ebp
ret 16
_yuy2_convert_mmx@16 ENDP
;-------------------------------------------------------------------
; 廔椆
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; _TEXT64 僙僌儊儞僩偺廔椆
END
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -