📄 mc_sse.asm
字号:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; 偍栺懇
.586
.mmx
.xmm
.model flat
_TEXT64 segment page public use32 'CODE'
align 16
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; 摦偒曗彏乮width 16, Half/Half, 2nd乯
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; 婎杮曽恓
; 16bit 惛搙偱係暲楍寁嶼
; 僉儍僢僔儏偵僸僢僩偡傞敜偩偐傜丄俀夞儊儌儕偐傜撉傫偱傕廩暘懍偄丠
;
;-------------------------------------------------------------------
PUBLIC C _prediction_w16_hh_2nd_sse@20
; void __stdcall prediction_w16_hh_2nd_sse(
; [esp+ 4] unsigned char *in,
; [esp+ 8] unsigned char *out,
; [esp+12] int in_step,
; [esp+16] int out_step,
; [esp+20] int height
; )
_prediction_w16_hh_2nd_sse@20 PROC
;-------------------------------------------------------------------
; 巊梡偡傞儗僕僗僞
; esi - 擖椡
; edi - 弌椡
; ecx - 儖乕僾僇僂儞僞
; eax - 擖椡僗僥僢僾
; ebx - 弌椡僗僥僢僾
;-------------------------------------------------------------------
; 巊梡偡傞儘乕僇儖曄悢
; 側偟
;-------------------------------------------------------------------
; 梡搑屌掕 sse 儗僕僗僞
; mm7 - 0
;-------------------------------------------------------------------
; 儗僕僗僞偺戅旔
push esi
push edi
push ecx
push ebx
push eax
;-------------------------------------------------------------------
; 堷悢偐傜僨乕僞傪庴偗庢偭偰偍偔
mov esi, [esp+20+ 4]
mov edi, [esp+20+ 8]
mov eax, [esp+20+12]
mov ebx, [esp+20+16]
mov ecx, [esp+20+20]
;-------------------------------------------------------------------
; 學悢嶌惉
pcmpeqw mm6, mm6
pxor mm7, mm7
psllw mm6, 1
;-------------------------------------------------------------------
; loop
prediction_w16_hh_2nd_loop:
dec ecx
;-------------------------------------------------------------------
; core
movd mm0, [esi]
movd mm1, [esi+1]
movd mm2, [esi+eax]
movd mm3, [esi+eax+1]
movd mm4, [esi+4]
movd mm5, [esi+5]
punpcklbw mm0, mm7
punpcklbw mm1, mm7
punpcklbw mm2, mm7
punpcklbw mm3, mm7
paddw mm6, mm2
paddw mm1, mm3
movd mm2, [esi+eax+4]
movd mm3, [esi+eax+5]
punpcklbw mm4, mm7
punpcklbw mm5, mm7
punpcklbw mm2, mm7
punpcklbw mm3, mm7
paddw mm4, mm2
paddw mm5, mm3
psubw mm0, mm6
paddw mm4, mm6
psrlw mm0, 2
psrlw mm4, 2
packuswb mm0, mm4
pavgb mm0, [edi]
movq [edi], mm6
; 0 - 7 finish
movd mm0, [esi]
movd mm1, [esi+1]
movd mm2, [esi+eax]
movd mm3, [esi+eax+1]
movd mm4, [esi+4]
movd mm5, [esi+5]
punpcklbw mm0, mm7
punpcklbw mm1, mm7
punpcklbw mm2, mm7
punpcklbw mm3, mm7
paddw mm6, mm2
paddw mm1, mm3
movd mm2, [esi+eax+4]
movd mm3, [esi+eax+5]
punpcklbw mm4, mm7
punpcklbw mm5, mm7
punpcklbw mm2, mm7
punpcklbw mm3, mm7
paddw mm4, mm2
paddw mm5, mm3
psubw mm0, mm6
paddw mm4, mm6
psrlw mm0, 2
psrlw mm4, 2
packuswb mm0, mm4
pavgb mm0, [edi]
movq [edi], mm6
; 7 - 15 finish
lea edi, [edi+ebx]
lea esi, [esi+eax]
;-------------------------------------------------------------------
; 儖乕僾廔抂僠僃僢僋
test ecx, ecx
jnz prediction_w16_hh_2nd_loop
;-------------------------------------------------------------------
; 儗僕僗僞暅尦側偳屻巒枛
pop eax
pop ebx
pop ecx
pop edi
pop esi
ret 20
_prediction_w16_hh_2nd_sse@20 ENDP
;-------------------------------------------------------------------
; 廔椆
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; 摦偒曗彏乮width 16, Full/Half, 1st乯悈暯 Full, 悅捈 Half
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; 婎杮曽恓
; pavgb 巊偭偰 8 暲楍張棟
;-------------------------------------------------------------------
PUBLIC C _prediction_w16_fh_1st_sse@20
; void __stdcall prediction_w16_fh_1st_sse(
; [esp+ 4] unsigned char *in,
; [esp+ 8] unsigned char *out,
; [esp+12] int in_step,
; [esp+16] int out_step,
; [esp+20] int height
; )
_prediction_w16_fh_1st_sse@20 PROC
;-------------------------------------------------------------------
; 巊梡偡傞儗僕僗僞
; esi - 擖椡
; edi - 弌椡
; ecx - 儖乕僾僇僂儞僞
; eax - 擖椡僗僥僢僾
; ebx - 弌椡僗僥僢僾
;-------------------------------------------------------------------
; 儗僕僗僞偺戅旔
push esi
push edi
push ecx
push ebx
push eax
;-------------------------------------------------------------------
; 堷悢偐傜僨乕僞傪庴偗庢偭偰偍偔
mov esi, [esp+20+ 4]
mov edi, [esp+20+ 8]
mov eax, [esp+20+12]
mov ebx, [esp+20+16]
mov ecx, [esp+20+20]
;-------------------------------------------------------------------
; 學悢嶌惉
shr ecx, 2
movq mm0, [esi]
movq mm1, [esi+8]
movq mm2, [esi+eax]
movq mm3, [esi+eax+8]
dec ecx
lea esi, [esi+eax*2]
;-------------------------------------------------------------------
; loop
prediction_w16_fh_1st_loop:
dec ecx
;-------------------------------------------------------------------
; core
movq mm4, [esi]
movq mm5, [esi+8]
movq mm6, [esi+eax]
movq mm7, [esi+eax+8]
lea esi, [esi+eax*2]
pavgb mm0, mm2
pavgb mm1, mm3
pavgb mm2, mm4
pavgb mm3, mm5
movq [edi], mm0
movq [edi+8], mm1
movq [edi+ebx], mm2
movq [edi+ebx+8], mm3
lea edi, [edi+ebx*2]
movq mm0, [esi]
movq mm1, [esi+8]
movq mm2, [esi+eax]
movq mm3, [esi+eax+8]
lea esi, [esi+eax*2]
pavgb mm4, mm6
pavgb mm5, mm7
pavgb mm6, mm0
pavgb mm7, mm1
movq [edi], mm4
movq [edi+8], mm5
movq [edi+ebx], mm6
movq [edi+ebx+8], mm7
lea edi, [edi+ebx*2]
;-------------------------------------------------------------------
; 儖乕僾廔抂僠僃僢僋
test ecx, ecx
jnz prediction_w16_fh_1st_loop
;-------------------------------------------------------------------
; 嵟屻偺係峴弌椡
movq mm4, [esi]
movq mm5, [esi+8]
movq mm6, [esi+eax]
movq mm7, [esi+eax+8]
lea esi, [esi+eax*2]
pavgb mm0, mm2
pavgb mm1, mm3
pavgb mm2, mm4
pavgb mm3, mm5
movq [edi], mm0
movq [edi+8], mm1
movq [edi+ebx], mm2
movq [edi+ebx+8], mm3
lea edi, [edi+ebx*2]
movq mm0, [esi]
movq mm1, [esi+8]
pavgb mm4, mm6
pavgb mm5, mm7
pavgb mm6, mm0
pavgb mm7, mm1
movq [edi], mm4
movq [edi+8], mm5
movq [edi+ebx], mm6
movq [edi+ebx+8], mm7
;-------------------------------------------------------------------
; 儗僕僗僞暅尦側偳屻巒枛
pop eax
pop ebx
pop ecx
pop edi
pop esi
ret 20
_prediction_w16_fh_1st_sse@20 ENDP
;-------------------------------------------------------------------
; 廔椆
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; 摦偒曗彏乮width 16, Full/Half, 2nd乯悈暯 Full, 悅捈 Half
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
PUBLIC C _prediction_w16_fh_2nd_sse@20
; void __stdcall prediction_w16_fh_2nd_sse(
; [esp+ 4] unsigned char *in,
; [esp+ 8] unsigned char *out,
; [esp+12] int in_step,
; [esp+16] int out_step,
; [esp+20] int height
; )
_prediction_w16_fh_2nd_sse@20 PROC
;-------------------------------------------------------------------
; 巊梡偡傞儗僕僗僞
; esi - 擖椡
; edi - 弌椡
; ecx - 儖乕僾僇僂儞僞
; eax - 擖椡僗僥僢僾
; ebx - 弌椡僗僥僢僾
;-------------------------------------------------------------------
; 儗僕僗僞偺戅旔
push esi
push edi
push ecx
push ebx
push eax
;-------------------------------------------------------------------
; 堷悢偐傜僨乕僞傪庴偗庢偭偰偍偔
mov esi, [esp+20+ 4]
mov edi, [esp+20+ 8]
mov eax, [esp+20+12]
mov ebx, [esp+20+16]
mov ecx, [esp+20+20]
;-------------------------------------------------------------------
; 學悢嶌惉
shr ecx, 2
movq mm0, [esi]
movq mm1, [esi+8]
movq mm2, [esi+eax]
movq mm3, [esi+eax+8]
lea esi, [esi+eax*2]
dec ecx
;-------------------------------------------------------------------
; loop
prediction_w16_fh_2nd_loop:
dec ecx
;-------------------------------------------------------------------
; core
movq mm4, [esi]
movq mm5, [esi+8]
movq mm6, [esi+eax]
movq mm7, [esi+eax+8]
lea esi, [esi+eax*2]
pavgb mm0, mm2
pavgb mm1, mm3
pavgb mm2, mm4
pavgb mm3, mm5
pavgb mm0, [edi]
pavgb mm1, [edi+8]
pavgb mm2, [edi+ebx]
pavgb mm3, [edi+ebx+8]
movq [edi], mm0
movq [edi+8], mm1
movq [edi+ebx], mm2
movq [edi+ebx+8], mm3
lea edi, [edi+ebx*2]
movq mm0, [esi]
movq mm1, [esi+8]
movq mm2, [esi+eax]
movq mm3, [esi+eax+8]
lea esi, [esi+eax*2]
pavgb mm4, mm6
pavgb mm5, mm7
pavgb mm6, mm0
pavgb mm7, mm1
pavgb mm4, [edi]
pavgb mm5, [edi+8]
pavgb mm6, [edi+ebx]
pavgb mm7, [edi+ebx+8]
movq [edi], mm4
movq [edi+8], mm5
movq [edi+ebx], mm6
movq [edi+ebx+8], mm7
lea edi, [edi+ebx*2]
;-------------------------------------------------------------------
; 儖乕僾廔抂僠僃僢僋
test ecx, ecx
jnz prediction_w16_fh_2nd_loop
;-------------------------------------------------------------------
; 嵟屻偺係峴弌椡
movq mm4, [esi]
movq mm5, [esi+8]
movq mm6, [esi+eax]
movq mm7, [esi+eax+8]
lea esi, [esi+eax*2]
pavgb mm0, mm2
pavgb mm1, mm3
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -