📄 frame_sse2.asm
字号:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; 偍栺懇
.586
.mmx
.xmm
.model flat
_TEXT64 segment page public use32 'CODE'
align 16
;-------------------------------------------------------------------
; 掕悢
;-------------------------------------------------------------------
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; chroma420i_to_422_sse2 - 僀儞僞儗乕僗 420 偐傜 422 傊偺曗娫
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; (y*4+2) = ((y*2 ) * 5 + (y*2+2) * 3 )>> 3
; (y*4+3) = ((y*2+1) * 7 + (y*2+3) * 1 )>> 3
; (y*4+4) = ((y*2 ) * 1 + (y*2+2) * 7 )>> 3
; (y*4+5) = ((y*2+1) * 3 + (y*2+3) * 5 )>> 3
;-------------------------------------------------------------------
PUBLIC C _chroma420i_to_422_sse2@12
; void __stdcall chroma420i_to_422_sse2(
; [esp + 4] = unsigned char *data,
; [esp + 8] = int width,
; [esp +12] = int height
; )
;
_chroma420i_to_422_sse2@12 PROC
;-------------------------------------------------------------------
; 儗僕僗僞戅旔
push edi
push esi
push edx
push ecx
push ebx
push eax
;-------------------------------------------------------------------
; 僷儔儊乕僞庴偗庢傝
mov esi, [esp+24+4] ; data
mov edx, [esp+24+8] ; width
mov eax, [esp+24+12] ; height
;-------------------------------------------------------------------
; 儘乕僇儖曄悢椞堟妋曐
sub esp, 12
;-------------------------------------------------------------------
; 儖乕僾僷儔儊乕僞嶌惉
;
; 嶌傞傋偒僷儔儊乕僞
; (height/4)-1 - eax
; width - edx
; width/16 - ecx & [esp+12]
; width*2-8 - ebx
; width/2*7 - [esp+4]
; width/2*3 - [esp+8]
; data+width/2 - edi
; all 5 - xmm7
mov ecx, edx
mov ebx, edx
pcmpeqw xmm7, xmm7
pcmpeqw xmm6, xmm6
shr eax, 2
shr ecx, 1
shl ebx, 1
psrlw xmm7, 14
psllw xmm6, 1
lea edi, [esi+ecx]
add ecx, edx
dec eax
add ebx, ecx
mov [esp+4], ebx
mov [esp+8], ecx
mov ecx, edx
mov ebx, edx
shr ecx, 4
shl ebx, 1
mov [esp+12], ecx
psubw xmm7, xmm6
sub ebx, 8
shl ecx, 1
;-------------------------------------------------------------------
; 曗娫椺奜 - 僩僢僾俀峴偦偺傑傑僐僺乕
cld
rep movsd
mov ecx, esi
mov esi, edi
lea edi, [ecx+edx]
mov ecx, [esp+12]
shl ecx, 1
rep movsd
lea edi, [esi+edx]
mov esi, [esp+12+24+4]
mov ecx, [esp+12]
;-------------------------------------------------------------------
; 廲曽岦儖乕僾
chroma420i_to_422_next_4_line:
;-------------------------------------------------------------------
; 墶曽岦儖乕僾
chroma420i_to_422_next_4_pixel:
;-------------------------------------------------------------------
; 曗娫僐傾
;
; 僐僐偱偺儗僕僗僞梡搑
; esi - 擖椡傾僪儗僗
; edi - 弌椡傾僪儗僗
; edx - 暆
; mm7 - all 5
movq xmm0, [esi] ; c[0][x]
lea esi, [esi+edx]
movq xmm1, [esi] ; c[1][x]
lea esi, [esi+edx]
movq xmm2, [esi] ; c[2][x]
movq xmm3, [esi+edx] ; c[3][x]
pxor xmm6, xmm6 ; all 0
pcmpeqw xmm4, xmm4
pcmpeqw xmm5, xmm5
psrlw xmm4, 14 ; all 3
psrlw xmm5, 13 ; all 7
punpcklbw xmm0, xmm6
punpcklbw xmm1, xmm6
punpcklbw xmm2, xmm6
punpcklbw xmm3, xmm6
movdqa xmm6, xmm4
pmullw xmm4, xmm2 ; 3*c[2][x]
pmullw xmm2, xmm5 ; c[2][x]*7
pmullw xmm5, xmm1 ; 7*c[1][x]
pmullw xmm1, xmm6 ; c[1][x]*3
paddw xmm2, xmm0 ; c[2][x]*7 + c[0][x]
paddw xmm5, xmm3 ; 7*c[1][x] + c[3][x]
pmullw xmm0, xmm7 ; c[0][x]*5
pmullw xmm3, xmm7 ; c[3][x]*5
paddw xmm0, xmm4 ; c[0][x]*5 + 3*c[2][x]
paddw xmm3, xmm1 ; c[3][x]*5 + c[1][x]*3
psraw xmm2, 3
psraw xmm5, 3
psraw xmm0, 3
psraw xmm3, 3
packuswb xmm0, xmm2
packuswb xmm5, xmm3
movq [edi], xmm0
lea edi, [edi+edx]
psrldq xmm0, 8
movq [edi], xmm5
lea edi, [edi+edx]
psrldq xmm5, 8
movq [edi], xmm0
movq [edi+edx], xmm5
;-------------------------------------------------------------------
; 墶曽岦儖乕僾廔抂僠僃僢僋
sub esi, ebx
sub edi, ebx
dec ecx
jnz chroma420i_to_422_next_4_pixel
;-------------------------------------------------------------------
; 廲曽岦儖乕僾廔抂僠僃僢僋
add edi, [esp+4]
add esi, [esp+8]
mov ecx, [esp+12]
dec eax
jnz chroma420i_to_422_next_4_line
;-------------------------------------------------------------------
; 曗娫椺奜 - 儃僩儉俀峴偦偺傑傑僐僺乕
shr edx, 1
shl ecx, 1
rep movsd
lea esi, [esi+edx]
lea edi, [edi+edx]
mov ecx, [esp+12]
shl ecx, 1
rep movsd
;-------------------------------------------------------------------
; 屻巒枛
add esp, 12 ; 儘乕僇儖曄悢夝曻
pop eax ; 儗僕僗僞暅尦
pop ebx
pop ecx
pop edx
pop esi
pop edi
ret 12
;-------------------------------------------------------------------
_chroma420i_to_422_sse2@12 ENDP
;-------------------------------------------------------------------
; 廔椆
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; chroma420p_to_422_sse2 - 僾儘僌儗僢僔僽 420 偐傜 422 傊偺曗娫
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; [o1] = ([i0] * 3 + [i1]) / 4
; [o2] = ([i0] + [i1] * 3) / 4
;-------------------------------------------------------------------
PUBLIC C _chroma420p_to_422_sse2@12
; void __stdcall chroma420p_to_422_sse2(
; [esp + 4] = unsigned char *data,
; [esp + 8] = int width,
; [esp +12] = int height
; )
;
_chroma420p_to_422_sse2@12 PROC
;-------------------------------------------------------------------
; 儗僕僗僞戅旔
push edi
push esi
push edx
push ecx
push ebx
push eax
;-------------------------------------------------------------------
; 堷悢偐傜偺僨乕僞庴偗庢傝
mov esi, [esp+24+4]
mov edx, [esp+24+8]
mov ebx, [esp+24+12]
;-------------------------------------------------------------------
; 儖乕僾僷儔儊乕僞嶌惉
;
; 嶌傞傋偒僷儔儊乕僞
;
; data - esi
; data+width/2 - edi
; width/2 - eax
; height/2-1 - ebx
; width - edx
; all 3 - xmm6
; all 0 - xmm7
;
mov eax, edx
mov ecx, edx
shr ebx, 1
shr eax, 1
shr ecx, 3
pcmpeqw xmm6, xmm6
pxor xmm7, xmm7
dec ebx
lea edi, [esi+eax]
psrlw xmm6, 14
;-------------------------------------------------------------------
; 曗娫椺奜 - 愭摢 1 峴僐僺乕
rep movsd
mov ecx, edx
sub esi, eax
add edi, eax
shr ecx, 4
;-------------------------------------------------------------------
; 廲曽岦儖乕僾
chroma420p_to_422_sse2_next_2_line:
;-------------------------------------------------------------------
; 墶曽岦儖乕僾
chroma420p_to_422_sse2_next_8_pixel:
;-------------------------------------------------------------------
; 曗娫僐傾
;
; 僐僐偱偺僷儔儊乕僞
; esi - 擖椡傾僪儗僗
; edi - 弌椡傾僪儗僗
; edx - 暆
; xmm6 - all 3
; xmm7 - all 0
movq xmm1, [esi] ; i[0][0:7]
movq xmm3, [esi+edx] ; i[1][0:7]
lea esi, [esi+8]
movdqa xmm0, xmm6
movdqa xmm2, xmm6
punpcklbw xmm1, xmm7
punpcklbw xmm3, xmm7
pmullw xmm0, xmm1
pmullw xmm2, xmm3
paddw xmm0, xmm3
paddw xmm1, xmm2
psrlw xmm0, 2
psrlw xmm1, 2
packuswb xmm0, xmm0 ; o[0][0:7]
packuswb xmm1, xmm1 ; o[1][0:7]
movq [edi], xmm0
movq [edi+edx], xmm1
lea edi, [edi+8]
;-------------------------------------------------------------------
; 墶曽岦儖乕僾廔抂僠僃僢僋
dec ecx
jnz chroma420p_to_422_sse2_next_8_pixel
;-------------------------------------------------------------------
; 廲曽岦儖乕僾廔抂僠僃僢僋
mov ecx, edx
add edi, edx
lea esi, [esi+eax]
lea edi, [edi+eax]
shr ecx, 4
dec ebx
jnz chroma420p_to_422_sse2_next_2_line
;-------------------------------------------------------------------
; 曗娫椺奜 - 儃僩儉 1 峴僐僺乕
shl ecx, 1
rep movsd
;-------------------------------------------------------------------
; 屻巒枛
pop eax; 儗僕僗僞暅尦
pop ebx
pop ecx
pop edx
pop esi
pop edi
ret 12
;-------------------------------------------------------------------
_chroma420p_to_422_sse2@12 ENDP
;-------------------------------------------------------------------
; 廔椆
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; yuv422_to_bgr_sse2 - YUV -> RGB 曄姺
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;-------------------------------------------------------------------
PUBLIC C _yuv422_to_bgr_sse2@16
; void __stdcall yuv422_to_bgr_sse2(
; [esp + 4] = FRAME *top,
; [esp + 8] = FRAME *bottom,
; [esp +12] = unsigned char *out,
; [esp +16] = BGR_CONVERSION_PARAMETER *prm
; )
_yuv422_to_bgr_sse2@16 PROC
;-------------------------------------------------------------------
; 巊梡偡傞儘乕僇儖曄悢
;
; [esp+ 8] - work
; [esp+ 32] - y_gain
; [esp+ 48] - half_13
; [esp+ 64] - bu
; [esp+ 80] - guv
; [esp+ 96] - rv
; [esp+112] - width/8
; [esp+116] - width%8*3
; [esp+120] - in_step*2
; [esp+124] - out_step
;
; total 128 + 兛
;-------------------------------------------------------------------
; 儗僕僗僞偺戅旔
push ebp
push edi
push esi
push edx
push ecx
push ebx
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -