📄 frame_mmx.asm
字号:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; 偍栺懇
.586
.mmx
.xmm
.model flat
_TEXT64 segment page public use32 'CODE'
align 16
;-------------------------------------------------------------------
; 掕悢
;-------------------------------------------------------------------
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; chroma420i_to_422_mmx - 僀儞僞儗乕僗 420 偐傜 422 傊偺曗娫
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; (y*4+2) = ((y*2 ) * 5 + (y*2+2) * 3 )>> 3
; (y*4+3) = ((y*2+1) * 7 + (y*2+3) * 1 )>> 3
; (y*4+4) = ((y*2 ) * 1 + (y*2+2) * 7 )>> 3
; (y*4+5) = ((y*2+1) * 3 + (y*2+3) * 5 )>> 3
;-------------------------------------------------------------------
PUBLIC C _chroma420i_to_422_mmx@12
; void __stdcall chroma420i_to_422_mmx(
; [esp + 4] = unsigned char *data,
; [esp + 8] = int width,
; [esp +12] = int height
; )
;
_chroma420i_to_422_mmx@12 PROC
;-------------------------------------------------------------------
; 儗僕僗僞戅旔
push edi
push esi
push edx
push ecx
push ebx
push eax
;-------------------------------------------------------------------
; 僷儔儊乕僞庴偗庢傝
mov esi, [esp+24+4] ; data
mov edx, [esp+24+8] ; width
mov eax, [esp+24+12] ; height
;-------------------------------------------------------------------
; 儘乕僇儖曄悢椞堟妋曐
sub esp, 12
;-------------------------------------------------------------------
; 儖乕僾僷儔儊乕僞嶌惉
;
; 嶌傞傋偒僷儔儊乕僞
; (height/4)-1 - eax
; width - edx
; width/8 - ecx & [esp+12]
; width*2-4 - ebx
; width/2*7 - [esp+4]
; width/2*3 - [esp+8]
; data+width/2 - edi
; all 5 - mm7
mov ecx, edx
mov ebx, edx
pcmpeqw mm7, mm7
pcmpeqw mm6, mm6
shr eax, 2
shr ecx, 1
shl ebx, 1
psrlw mm7, 14
psllw mm6, 1
lea edi, [esi+ecx]
add ecx, edx
dec eax
add ebx, ecx
mov [esp+4], ebx
mov [esp+8], ecx
mov ecx, edx
mov ebx, edx
shr ecx, 3
shl ebx, 1
mov [esp+12], ecx
psubw mm7, mm6
sub ebx, 4
;-------------------------------------------------------------------
; 曗娫椺奜 - 僩僢僾俀峴偦偺傑傑僐僺乕
cld
rep movsd
mov ecx, esi
mov esi, edi
lea edi, [ecx+edx]
mov ecx, [esp+12]
rep movsd
lea edi, [esi+edx]
mov esi, [esp+12+24+4]
mov ecx, [esp+12]
;-------------------------------------------------------------------
; 廲曽岦儖乕僾
chroma420i_to_422_next_4_line:
;-------------------------------------------------------------------
; 墶曽岦儖乕僾
chroma420i_to_422_next_4_pixel:
;-------------------------------------------------------------------
; 曗娫僐傾
;
; 僐僐偱偺儗僕僗僞梡搑
; esi - 擖椡傾僪儗僗
; edi - 弌椡傾僪儗僗
; edx - 暆
; mm7 - all 5
movd mm0, [esi] ; c[0][x]
lea esi, [esi+edx]
movd mm1, [esi] ; c[1][x]
lea esi, [esi+edx]
movd mm2, [esi] ; c[2][x]
movd mm3, [esi+edx] ; c[3][x]
pxor mm6, mm6 ; all 0
pcmpeqw mm4, mm4
pcmpeqw mm5, mm5
psrlw mm4, 14 ; all 3
psrlw mm5, 13 ; all 7
punpcklbw mm0, mm6
punpcklbw mm1, mm6
punpcklbw mm2, mm6
punpcklbw mm3, mm6
movq mm6, mm4
pmullw mm4, mm2 ; 3*c[2][x]
pmullw mm2, mm5 ; c[2][x]*7
pmullw mm5, mm1 ; 7*c[1][x]
pmullw mm1, mm6 ; c[1][x]*3
paddw mm2, mm0 ; c[2][x]*7 + c[0][x]
paddw mm5, mm3 ; 7*c[1][x] + c[3][x]
pmullw mm0, mm7 ; c[0][x]*5
pmullw mm3, mm7 ; c[3][x]*5
paddw mm0, mm4 ; c[0][x]*5 + 3*c[2][x]
paddw mm3, mm1 ; c[3][x]*5 + c[1][x]*3
psraw mm2, 3
psraw mm5, 3
psraw mm0, 3
psraw mm3, 3
packuswb mm0, mm2
packuswb mm5, mm3
movd [edi], mm0
lea edi, [edi+edx]
psrlq mm0, 32
movd [edi], mm5
lea edi, [edi+edx]
psrlq mm5, 32
movd [edi], mm0
movd [edi+edx], mm5
;-------------------------------------------------------------------
; 墶曽岦儖乕僾廔抂僠僃僢僋
sub esi, ebx
sub edi, ebx
dec ecx
jnz chroma420i_to_422_next_4_pixel
;-------------------------------------------------------------------
; 廲曽岦儖乕僾廔抂僠僃僢僋
add edi, [esp+4]
add esi, [esp+8]
mov ecx, [esp+12]
dec eax
jnz chroma420i_to_422_next_4_line
;-------------------------------------------------------------------
; 曗娫椺奜 - 儃僩儉俀峴偦偺傑傑僐僺乕
shr edx, 1
rep movsd
lea esi, [esi+edx]
lea edi, [edi+edx]
mov ecx, [esp+12]
rep movsd
;-------------------------------------------------------------------
; 屻巒枛
add esp, 12 ; 儘乕僇儖曄悢夝曻
pop eax ; 儗僕僗僞暅尦
pop ebx
pop ecx
pop edx
pop esi
pop edi
ret 12
;-------------------------------------------------------------------
_chroma420i_to_422_mmx@12 ENDP
;-------------------------------------------------------------------
; 廔椆
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; chroma420p_to_422_mmx - 僾儘僌儗僢僔僽 420 偐傜 422 傊偺曗娫
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; [o1] = ([i0] * 3 + [i1]) / 4
; [o2] = ([i0] + [i1] * 3) / 4
;-------------------------------------------------------------------
PUBLIC C _chroma420p_to_422_mmx@12
; void __stdcall chroma420p_to_422_mmx(
; [esp + 4] = unsigned char *data,
; [esp + 8] = int width,
; [esp +12] = int height
; )
;
_chroma420p_to_422_mmx@12 PROC
;-------------------------------------------------------------------
; 儗僕僗僞戅旔
push edi
push esi
push edx
push ecx
push ebx
push eax
;-------------------------------------------------------------------
; 堷悢偐傜偺僨乕僞庴偗庢傝
mov esi, [esp+24+4]
mov edx, [esp+24+8]
mov ebx, [esp+24+12]
;-------------------------------------------------------------------
; 儖乕僾僷儔儊乕僞嶌惉
;
; 嶌傞傋偒僷儔儊乕僞
;
; data - esi
; data+width/2 - edi
; width/2 - eax
; height/2-1 - ebx
; width - edx
; all 3 - mm6
; all 0 - mm7
;
mov eax, edx
mov ecx, edx
shr ebx, 1
shr eax, 1
shr ecx, 3
pcmpeqw mm6, mm6
pxor mm7, mm7
dec ebx
lea edi, [esi+eax]
psrlw mm6, 14
;-------------------------------------------------------------------
; 曗娫椺奜 - 愭摢 1 峴僐僺乕
rep movsd
mov ecx, edx
sub esi, eax
add edi, eax
shr ecx, 4
;-------------------------------------------------------------------
; 廲曽岦儖乕僾
chroma420p_to_422_mmx_next_2_line:
;-------------------------------------------------------------------
; 墶曽岦儖乕僾
chroma420p_to_422_mmx_next_8_pixel:
;-------------------------------------------------------------------
; 曗娫僐傾
;
; 僐僐偱偺僷儔儊乕僞
; esi - 擖椡傾僪儗僗
; edi - 弌椡傾僪儗僗
; edx - 暆
; mm6 - all 3
; mm7 - all 0
movd mm0, [esi] ; i[0][0:3]
movd mm1, [esi+4] ; i[0][4:7]
movd mm4, [esi+edx] ; i[1][0:3]
movd mm5, [esi+edx+4] ; i[1][4:7]
movq mm2, mm6
movq mm3, mm6
punpcklbw mm0, mm7
punpcklbw mm1, mm7
punpcklbw mm4, mm7
punpcklbw mm5, mm7
pmullw mm2, mm4
pmullw mm3, mm5
lea esi, [esi+8]
paddw mm2, mm0
paddw mm3, mm1
pmullw mm0, mm6
pmullw mm1, mm6
paddw mm0, mm4
paddw mm1, mm5
psrlw mm0, 2
psrlw mm1, 2
psrlw mm2, 2
psrlw mm3, 2
packuswb mm2, mm3 ; o[1][0:7]
packuswb mm0, mm1 ; o[0][0:7]
movq [edi], mm0
movq [edi+edx], mm2
lea edi, [edi+8]
;-------------------------------------------------------------------
; 墶曽岦儖乕僾廔抂僠僃僢僋
dec ecx
jnz chroma420p_to_422_mmx_next_8_pixel
;-------------------------------------------------------------------
; 廲曽岦儖乕僾廔抂僠僃僢僋
mov ecx, edx
add edi, edx
lea esi, [esi+eax]
lea edi, [edi+eax]
shr ecx, 4
dec ebx
jnz chroma420p_to_422_mmx_next_2_line
;-------------------------------------------------------------------
; 曗娫椺奜 - 儃僩儉 1 峴僐僺乕
shl ecx, 1
rep movsd
;-------------------------------------------------------------------
; 屻巒枛
pop eax; 儗僕僗僞暅尦
pop ebx
pop ecx
pop edx
pop esi
pop edi
ret 12
;-------------------------------------------------------------------
_chroma420p_to_422_mmx@12 ENDP
;-------------------------------------------------------------------
; 廔椆
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; yuv422_to_bgr_mmx - YUV -> RGB 曄姺
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;-------------------------------------------------------------------
PUBLIC C _yuv422_to_bgr_mmx@16
; void __stdcall yuv422_to_bgr_mmx(
; [esp + 4] = FRAME *top,
; [esp + 8] = FRAME *bottom,
; [esp +12] = unsigned char *out,
; [esp +16] = BGR_CONVERSION_PARAMETER *prm
; )
_yuv422_to_bgr_mmx@16 PROC
;-------------------------------------------------------------------
; 巊梡偡傞儘乕僇儖曄悢
;
; [esp+ 8] y_offset
; [esp+ 16] y_gain
; [esp+ 24] half_13
; [esp+ 32] c_offset
; [esp+ 40] bu
; [esp+ 48] guv
; [esp+ 56] rv
; [esp+ 64] work
; - 偙偙傑偱 8 byte alignment 曐徹
; [esp+ 96] y_src
; [esp+100] y_src_next
; [esp+104] u_src
; [esp+108] v_src
; [esp+112] u_src_next
; [esp+116] v_src_next
; [esp+120] out
; [esp+124] width/8
; [esp+128] width%8*3
; [esp+132] in_step*2
; [esp+136] out_step
;
; total 140 + 兛
;-------------------------------------------------------------------
; 儗僕僗僞偺戅旔
push ebp
push edi
push esi
push edx
push ecx
push ebx
push eax
;-------------------------------------------------------------------
; 儀乕僗億僀儞僞偵僗僞僢僋傾僪儗僗傪婰壇
mov ebp, esp
;-------------------------------------------------------------------
; 儘乕僇儖曄悢椞堟偺妋曐
sub esp, 140
and esp, 0fffffff8h
;-------------------------------------------------------------------
; 曄姺學悢偺嶌惉
;
; 嶌惉偡傋偒僷儔儊乕僞
;
; y_offset - [esp+8]
; y_gain - [esp+16]
; half_13 - [esp+24]
; c_offset - [esp+32]
; bu - [esp+40]
; guv - [esp+48]
; rv - [esp+56]
;
; y_src - esi & [esp+96]
; y_src_next - [esp+100]
; u_src - eax & [esp+104]
; v_src - ebx & [esp+108]
; u_src_next - [esp+112]
; v_src_next - [esp+116]
; out - edi & [esp+120]
; width/8 - ecx & [esp+124]
; width%8*3 - [esp+128]
; height - edx
; in_step*2 - [esp+132]
; out_step - [esp+136]
;
mov edx, [ebp+28+16]
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -