📄 frame_sse2.asm
字号:
push eax
;-------------------------------------------------------------------
; 儀乕僗億僀儞僞偵僗僞僢僋傾僪儗僗傪婰壇
mov ebp, esp
;-------------------------------------------------------------------
; 儘乕僇儖曄悢椞堟偺妋曐
sub esp, 128
and esp, 0fffffff0h
;-------------------------------------------------------------------
; 曄姺學悢偺嶌惉
;
; 嶌惉偡傋偒僷儔儊乕僞
;
; y_offset - mm0
; c_offset - mm1
; y_gain - [esp+32]
; half_13 - [esp+48]
; bu - [esp+64]
; guv - [esp+80]
; rv - [esp+96]
; y_src - esi & mm2
; u_src - eax & mm3
; v_src - ebx & mm4
; y_src_next - mm5
; u_src_next - mm6
; v_src_next - mm7
; width/8 - ecx & [esp+112]
; width%8*3 - [esp+116]
; height - edx
; in_step*2 - [esp+120]
; out_step - [esp+124]
;
mov edx, [ebp+28+16]
pcmpeqw mm0, mm0
pxor xmm0, xmm0
pxor mm1, mm1
pcmpeqw xmm1, xmm1
movd mm2, [edx+20] ; y_offset
movd xmm2, [edx+24] ; y_gain
movd xmm3, [edx+28] ; bu
movd xmm4, [edx+32] ; gu
movd xmm5, [edx+36] ; gv
movd xmm6, [edx+40] ; rv
psubw mm1, mm0 ; all 1
psubd xmm0, xmm1 ; all 1
pshufw mm0, mm2, 0 ; yo_yo_yo_yo
psllw mm1, 7 ; c_offset
movdqa xmm1, xmm0 ; all 1
pshufd xmm2, xmm2, 0 ; yg_yg_yg_yg
pshufd xmm3, xmm3, 0 ; bu_bu_bu_bu
punpckldq xmm4, xmm5 ; xx_xx_gv_gu
pshufd xmm6, xmm6, 0 ; rv_rv_rv_rv
pslld xmm0, 2 ; half_3
pslld xmm1, 12 ; half_13
pshufd xmm4, xmm4, 01000100b
paddd xmm2, xmm0
paddd xmm3, xmm0
paddd xmm4, xmm0
paddd xmm6, xmm0
psrad xmm2, 3
psrad xmm3, 3
psrad xmm4, 3
psrad xmm6, 3
packssdw xmm4, xmm4
movdqa [esp+32], xmm2
movdqa [esp+48], xmm1
movdqa [esp+64], xmm3
movdqa [esp+80], xmm4
movdqa [esp+96], xmm6
mov eax, [edx] ; width
mov ecx, [edx] ; width
movd mm7, [edx+8] ; in_step
movd mm4, [edx+16] ; c_offset
mov esi, [ebp+28+4] ; top
mov edi, [ebp+28+8] ; bottom
punpckldq mm7, mm7
punpckldq mm4, mm4
movd mm2, [esi+16] ; top y
movq mm3, [esi+20] ; top uv
movd mm5, [edi+16] ; bottom y
movq mm6, [edi+20] ; bottom uv
shr ecx, 3
and eax, 7
paddd mm5, mm7
mov ebx, eax
paddd mm6, mm7
shl eax, 1
paddd mm3, mm4
add eax, ebx
paddd mm6, mm4
mov ebx, [edx+12] ; out_step
mov edx, [edx+4] ; height
pslld mm7, 1
mov [esp+112], ecx
mov [esp+116], eax
movd [esp+120], mm7
mov [esp+124], ebx
movq mm4, mm3
movq mm7, mm6
psrlq mm4, 32
psrlq mm7, 32
movd esi, mm2
movd eax, mm3
movd ebx, mm4
mov edi, [ebp+28+12]
;-------------------------------------------------------------------
; 廲曽岦儖乕僾
yuv422_to_bgr_next_line:
;-------------------------------------------------------------------
; 墶曽岦儖乕僾
yuv422_to_bgr_next_8_pixel:
;-------------------------------------------------------------------
; 曄姺僐傾
;
movd xmm0, [esi]
movd xmm1, [esi+4]
movd xmm2, [eax]
movd xmm4, [ebx]
movq2dq xmm5, mm0 ; y_offset
movq2dq xmm6, mm1 ; c_offset
pxor xmm7, xmm7
lea esi, [esi+8]
lea eax, [eax+4]
lea ebx, [ebx+4]
punpcklbw xmm0, xmm7
punpcklbw xmm1, xmm7
punpcklbw xmm2, xmm7
punpcklbw xmm4, xmm7
psubw xmm0, xmm5
psubw xmm1, xmm5
psubw xmm2, xmm6
psubw xmm4, xmm6
movdqa xmm5, [esp+32] ; y_gain
movdqa xmm6, [esp+48] ; half_13
movdqa xmm3, xmm2
punpcklwd xmm0, xmm7
punpcklwd xmm1, xmm7
punpcklwd xmm2, xmm7
punpcklwd xmm3, xmm4
punpcklwd xmm4, xmm7
pmaddwd xmm0, xmm5
pmaddwd xmm1, xmm5
pmaddwd xmm2, [esp+64] ; bu
pmaddwd xmm3, [esp+80] ; guv
pmaddwd xmm4, [esp+96] ; rv
paddd xmm0, xmm6
paddd xmm1, xmm6
movdqa xmm5, xmm2
movdqa xmm6, xmm3
movdqa xmm7, xmm4
punpckldq xmm2, xmm2
punpckldq xmm3, xmm3
punpckldq xmm4, xmm4
punpckhdq xmm5, xmm5
punpckhdq xmm6, xmm6
punpckhdq xmm7, xmm7
paddd xmm2, xmm0
paddd xmm3, xmm0
paddd xmm4, xmm0
paddd xmm5, xmm1
paddd xmm6, xmm1
paddd xmm7, xmm1
psrad xmm2, 13
psrad xmm3, 13
psrad xmm4, 13
psrad xmm5, 13
psrad xmm6, 13
psrad xmm7, 13
packuswb xmm2, xmm5 ; b
packuswb xmm3, xmm6 ; g
packuswb xmm4, xmm7 ; r
movdqa xmm5, xmm2 ; b'
movdqa xmm6, xmm3 ; g'
movdqa xmm7, xmm4 ; r'
psrldq xmm2, 1
pslldq xmm3, 1
pslldq xmm4, 1
por xmm7, xmm2 ; xAx7x4x1
por xmm5, xmm3 ; x9x6x3x0
por xmm6, xmm4 ; Bx8x5x2x
pshufd xmm7, xmm7, 11011000b ; xAx4x7x1
pshufd xmm5, xmm5, 11011000b ; x9x3x6x0
pshufd xmm6, xmm6, 11011000b ; Bx5x8x2x
movdqa xmm4, xmm5
psrldq xmm6, 2 ; xBx5x8x2
punpcklwd xmm5, xmm7 ; xx76xx10
psrldq xmm4, 8 ; xxxxx9x3
punpckhwd xmm7, xmm6 ; xxBAxx54
punpcklwd xmm6, xmm4 ; xx98xx32
movd [edi], xmm5
movd [edi+4], xmm6
movd [edi+8], xmm7
psrldq xmm5, 8
psrldq xmm6, 8
psrldq xmm7, 8
movd [edi+12], xmm5
movd [edi+16], xmm6
movd [edi+20], xmm7
lea edi, [edi+24]
;-------------------------------------------------------------------
; 墶曽岦儖乕僾廔抂僠僃僢僋
dec ecx
jnz yuv422_to_bgr_next_8_pixel
;-------------------------------------------------------------------
; 抂悢張棟
mov ecx, [esp+116] ; width%8*3
test ecx, ecx
jz yuv422_to_bgr_line_end
movd xmm0, [esi]
movd xmm1, [esi+4]
movd xmm2, [eax]
movd xmm4, [ebx]
movq2dq xmm5, mm0 ; y_offset
movq2dq xmm6, mm1 ; c_offset
pxor xmm7, xmm7
punpcklbw xmm0, xmm7
punpcklbw xmm1, xmm7
punpcklbw xmm2, xmm7
punpcklbw xmm4, xmm7
psubw xmm0, xmm5
psubw xmm1, xmm5
psubw xmm2, xmm6
psubw xmm4, xmm6
movdqa xmm5, [esp+32] ; y_gain
movdqa xmm6, [esp+48] ; half_13
movdqa xmm3, xmm2
punpcklwd xmm0, xmm7
punpcklwd xmm1, xmm7
punpcklwd xmm2, xmm7
punpcklwd xmm3, xmm4
punpcklwd xmm4, xmm7
pmaddwd xmm0, xmm5
pmaddwd xmm1, xmm5
pmaddwd xmm2, [esp+64] ; bu
pmaddwd xmm3, [esp+80] ; guv
pmaddwd xmm4, [esp+96] ; rv
paddd xmm0, xmm6
paddd xmm1, xmm6
movdqa xmm5, xmm2
movdqa xmm6, xmm3
movdqa xmm7, xmm4
punpckldq xmm2, xmm2
punpckldq xmm3, xmm3
punpckldq xmm4, xmm4
punpckhdq xmm5, xmm5
punpckhdq xmm6, xmm6
punpckhdq xmm7, xmm7
paddd xmm2, xmm0
paddd xmm3, xmm0
paddd xmm4, xmm0
paddd xmm5, xmm1
paddd xmm6, xmm1
paddd xmm7, xmm1
psrad xmm2, 13
psrad xmm3, 13
psrad xmm4, 13
psrad xmm5, 13
psrad xmm6, 13
psrad xmm7, 13
packuswb xmm2, xmm5 ; b
packuswb xmm3, xmm6 ; g
packuswb xmm4, xmm7 ; r
movdqa xmm5, xmm2 ; b'
movdqa xmm6, xmm3 ; g'
movdqa xmm7, xmm4 ; r'
pcmpeqw xmm0, xmm0
pcmpeqw xmm1, xmm1
psrldq xmm2, 1
pslldq xmm3, 1
pslldq xmm4, 1
psrld xmm0, 16
pslld xmm1, 16
por xmm7, xmm2 ; xAx7x4x1
por xmm5, xmm3 ; x9x6x3x0
por xmm6, xmm4 ; Bx8x5x2x
pshufd xmm7, xmm7, 11011000b ; xAx4x7x1
pshufd xmm5, xmm5, 11011000b ; x9x3x6x0
pshufd xmm6, xmm6, 11011000b ; Bx5x8x2x
movdqa xmm4, xmm5
psrldq xmm6, 2 ; xBx5x8x2
punpcklwd xmm5, xmm7 ; xx76xx10
psrldq xmm4, 8 ; xxxxx9x3
punpckhwd xmm7, xmm6 ; xxBAxx54
punpcklwd xmm6, xmm4 ; xx98xx32
movd [esp+8], xmm5
movd [esp+12], xmm6
movd [esp+16], xmm7
psrldq xmm5, 8
psrldq xmm6, 8
psrldq xmm7, 8
movd [esp+20], xmm5
movd [esp+24], xmm6
movd [esp+28], xmm7
lea esi, [esp+8]
rep movsb
;-------------------------------------------------------------------
; 廲曽岦儖乕僾廔抂僠僃僢僋
yuv422_to_bgr_line_end:
mov ecx, [esp+120]
movd esi, mm2
movd eax, mm3
movd ebx, mm4
movq mm2, mm5
movq mm3, mm6
movq mm4, mm7
add esi, ecx
add eax, ecx
add ebx, ecx
mov ecx, [esp+124]
mov edi, [ebp+28+12]
movd mm5, esi
movd mm6, eax
movd mm7, ebx
movd esi, mm2
movd eax, mm3
movd ebx, mm4
add edi, ecx
mov [ebp+28+12], edi
mov ecx,[esp+112]
dec edx
jnz yuv422_to_bgr_next_line
;-------------------------------------------------------------------
; 屻巒枛
mov esp, ebp
pop eax
pop ebx
pop ecx
pop edx
pop esi
pop edi
pop ebp
ret 16
;
_yuv422_to_bgr_sse2@16 ENDP
;-------------------------------------------------------------------
; 廔椆
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; yuv422_to_yuy2_sse2 - YUV -> YUY2 曄姺
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;-------------------------------------------------------------------
PUBLIC C _yuv422_to_yuy2_sse2@16
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -