⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 skl_mb_mmx.asm

📁 mpeg4编解码器
💻 ASM
📖 第 1 页 / 共 3 页
字号:
  retalign 16Skl_Copy_8x8_FH_Rnd1_MMX:  PROLOG Rounder0_MMX, 0  COPY_FH_MMX  lea ecx,[ecx+edx]  COPY_FH_MMX  lea ecx,[ecx+edx]  COPY_FH_MMX  lea ecx,[ecx+edx]  COPY_FH_MMX  lea ecx,[ecx+edx]  COPY_FH_MMX  lea ecx,[ecx+edx]  COPY_FH_MMX  lea ecx,[ecx+edx]  COPY_FH_MMX  lea ecx,[ecx+edx]  COPY_FH_MMX  ret%macro COPY_16x8_FH_MMX 0  movq mm0, [eax]  movq mm2, [eax+1]  movq mm1, mm0  movq mm3, mm2  MIX2  movq [ecx], mm0  movq mm0, [eax+8]  movq mm2, [eax+9]  movq mm1, mm0  movq mm3, mm2  lea eax,[eax+edx]  MIX2  movq [ecx+8], mm0%endmacroalign 16Skl_Copy_16x8_FH_Rnd0_MMX:  PROLOG Rounder1_MMX, 0  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  retalign 16Skl_Copy_16x8_FH_Rnd1_MMX:  PROLOG Rounder0_MMX, 0  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  retalign 16Skl_H_Pass_2Taps_MMX:Skl_Copy_16x16_FH_Rnd0_MMX:  PROLOG Rounder1_MMX, 0  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  lea ecx,[ecx+edx]  COPY_16x8_FH_MMX  ret;//////////////////////////////////////////////////////////////////////%macro COPY_HF_MMX 0  movq mm0, [eax]  movq mm2, [eax+edx]  movq mm1, mm0  movq mm3, mm2  lea eax,[eax+edx]  MIX2  movq [ecx], mm0%endmacroalign 16Skl_Copy_8x4_HF_Rnd0_MMX:  PROLOG Rounder1_MMX, 0  COPY_HF_MMX  lea ecx,[ecx+edx]  COPY_HF_MMX  lea ecx,[ecx+edx]  COPY_HF_MMX  lea ecx,[ecx+edx]  COPY_HF_MMX  retalign 16Skl_Copy_8x4_HF_Rnd1_MMX:  PROLOG Rounder0_MMX, 0  COPY_HF_MMX  lea ecx,[ecx+edx]  COPY_HF_MMX  lea ecx,[ecx+edx]  COPY_HF_MMX  lea ecx,[ecx+edx]  COPY_HF_MMX  retalign 16Skl_Copy_8x8_HF_Rnd0_MMX:  PROLOG Rounder1_MMX, 0  COPY_HF_MMX  lea ecx,[ecx+edx]  COPY_HF_MMX  lea ecx,[ecx+edx]  COPY_HF_MMX  lea ecx,[ecx+edx]  COPY_HF_MMX  lea ecx,[ecx+edx]  COPY_HF_MMX  lea ecx,[ecx+edx]  COPY_HF_MMX  lea ecx,[ecx+edx]  COPY_HF_MMX  lea ecx,[ecx+edx]  COPY_HF_MMX  retalign 16Skl_Copy_8x8_HF_Rnd1_MMX:  PROLOG Rounder0_MMX, 0  COPY_HF_MMX  lea ecx,[ecx+edx]  COPY_HF_MMX  lea ecx,[ecx+edx]  COPY_HF_MMX  lea ecx,[ecx+edx]  COPY_HF_MMX  lea ecx,[ecx+edx]  COPY_HF_MMX  lea ecx,[ecx+edx]  COPY_HF_MMX  lea ecx,[ecx+edx]  COPY_HF_MMX  lea ecx,[ecx+edx]  COPY_HF_MMX  ret%macro COPY_16x8_HF_MMX 0  movq mm0, [eax]  movq mm2, [eax+edx]  movq mm1, mm0  movq mm3, mm2  MIX2  movq [ecx], mm0  movq mm0, [eax+8]  movq mm2, [eax+edx+8]  movq mm1, mm0  movq mm3, mm2  lea eax,[eax+edx]  MIX2  movq [ecx+8], mm0%endmacroalign 16Skl_Copy_16x8_HF_Rnd0_MMX:  PROLOG Rounder1_MMX, 0  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  retalign 16Skl_Copy_16x8_HF_Rnd1_MMX:  PROLOG Rounder0_MMX, 0  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  retalign 16Skl_V_Pass_2Taps_MMX:Skl_Copy_16x16_HF_Rnd0_MMX:  PROLOG Rounder1_MMX, 0  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  lea ecx,[ecx+edx]  COPY_16x8_HF_MMX  ret;//////////////////////////////////////////////////////////////////////%macro COPY_HH_MMX 0  lea eax,[eax+edx]    ; transfert prev line to mm0/mm1  movq mm0, mm2  movq mm1, mm3    ; load new line in mm2/mm3  movq mm2, [eax]  movq mm4, [eax+1]  movq mm3, mm2  movq mm5, mm4  punpcklbw mm2, mm6  paddusw mm0, mm7    ; rounder  punpcklbw mm4, mm6  paddusw mm1, mm7    ; rounder  punpckhbw mm3, mm6  paddusw mm2, mm4  punpckhbw mm5, mm6  paddusw mm0, mm2  paddusw mm3, mm5  psrlw mm0, 2  paddusw mm1, mm3  psrlw mm1, 2  packuswb mm0, mm1  movq [ecx], mm0%endmacroalign 16Skl_Copy_8x4_HH_Rnd0_MMX:  PROLOG Rounder2_MMX, 0  ; preprocess first line  movq mm0, [eax]  movq mm1, mm0  movq mm2, [eax+1]  movq mm3, mm2  punpcklbw mm0, mm6  punpcklbw mm2, mm6  paddusw mm2, mm0  punpckhbw mm1, mm6  punpckhbw mm3, mm6  paddusw mm3, mm1    ; Input: mm2/mm3 contains the value (Src[0]+Src[1]) of previous line  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  retalign 16Skl_Copy_8x4_HH_Rnd1_MMX:  PROLOG Rounder1_MMX, 0  ; preprocess first line  movq mm0, [eax]  movq mm1, mm0  movq mm2, [eax+1]  movq mm3, mm2  punpcklbw mm0, mm6  punpcklbw mm2, mm6  paddusw mm2, mm0  punpckhbw mm1, mm6  punpckhbw mm3, mm6  paddusw mm3, mm1    ; Input: mm2/mm3 contains the value (Src[0]+Src[1]) of previous line  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  retalign 16Skl_Copy_8x8_HH_Rnd0_MMX:  PROLOG Rounder2_MMX, 0  ; preprocess first line  movq mm0, [eax]  movq mm1, mm0  movq mm2, [eax+1]  movq mm3, mm2  punpcklbw mm0, mm6  punpcklbw mm2, mm6  paddusw mm2, mm0  punpckhbw mm1, mm6  punpckhbw mm3, mm6  paddusw mm3, mm1    ; Input: mm2/mm3 contains the value (Src[0]+Src[1]) of previous line  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  retalign 16Skl_Copy_8x8_HH_Rnd1_MMX:  PROLOG Rounder1_MMX, 0  ; preprocess first line  movq mm0, [eax]  movq mm1, mm0  movq mm2, [eax+1]  movq mm3, mm2  punpcklbw mm0, mm6  punpcklbw mm2, mm6  paddusw mm2, mm0  punpckhbw mm1, mm6  punpckhbw mm3, mm6  paddusw mm3, mm1    ; Input: mm2/mm3 contains the value (Src[0]+Src[1]) of previous line  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  retalign 16Skl_Copy_16x8_HH_Rnd0_MMX:  PROLOG Rounder2_MMX, 0  ; preprocess first line  movq mm0, [eax]  movq mm1, mm0  movq mm2, [eax+1]  movq mm3, mm2  punpcklbw mm0, mm6  punpcklbw mm2, mm6  paddusw mm2, mm0  punpckhbw mm1, mm6  punpckhbw mm3, mm6  paddusw mm3, mm1   ; Input: mm2/mm3 contains the value (Src[0]+Src[1]) of previous line  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX    ; second column  mov ecx, [esp+ 4] ; Dst  mov eax, [esp+ 8] ; Src  lea ecx, [ecx+8]  lea eax, [eax+8]  ; preprocess first line  movq mm0, [eax]  movq mm1, mm0  movq mm2, [eax+1]  movq mm3, mm2  punpcklbw mm0, mm6  punpcklbw mm2, mm6  paddusw mm2, mm0  punpckhbw mm1, mm6  punpckhbw mm3, mm6  paddusw mm3, mm1   ; Input: mm2/mm3 contains the value (Src[0]+Src[1]) of previous line  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  retalign 16Skl_Copy_16x8_HH_Rnd1_MMX:  PROLOG Rounder1_MMX, 0  ; preprocess first line  movq mm0, [eax]  movq mm1, mm0  movq mm2, [eax+1]  movq mm3, mm2  punpcklbw mm0, mm6  punpcklbw mm2, mm6  paddusw mm2, mm0  punpckhbw mm1, mm6  punpckhbw mm3, mm6  paddusw mm3, mm1   ; Input: mm2/mm3 contains the value (Src[0]+Src[1]) of previous line  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX    ; second column  mov ecx, [esp+ 4] ; Dst  mov eax, [esp+ 8] ; Src  lea ecx, [ecx+8]  lea eax, [eax+8]  ; preprocess first line  movq mm0, [eax]  movq mm1, mm0  movq mm2, [eax+1]  movq mm3, mm2  punpcklbw mm0, mm6  punpcklbw mm2, mm6  paddusw mm2, mm0  punpckhbw mm1, mm6  punpckhbw mm3, mm6  paddusw mm3, mm1   ; Input: mm2/mm3 contains the value (Src[0]+Src[1]) of previous line  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  retalign 16Skl_HV_Pass_2Taps_MMX:Skl_Copy_16x16_HH_Rnd0_MMX:  PROLOG Rounder2_MMX, 0  ; preprocess first line  movq mm0, [eax]  movq mm1, mm0  movq mm2, [eax+1]  movq mm3, mm2  punpcklbw mm0, mm6  punpcklbw mm2, mm6  paddusw mm2, mm0  punpckhbw mm1, mm6  punpckhbw mm3, mm6  paddusw mm3, mm1   ; Input: mm2/mm3 contains the value (Src[0]+Src[1]) of previous line  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX    ; second column  mov ecx, [esp+ 4] ; Dst  mov eax, [esp+ 8] ; Src  lea ecx, [ecx+8]  lea eax, [eax+8]  ; preprocess first line  movq mm0, [eax]  movq mm1, mm0  movq mm2, [eax+1]  movq mm3, mm2  punpcklbw mm0, mm6  punpcklbw mm2, mm6  paddusw mm2, mm0  punpckhbw mm1, mm6  punpckhbw mm3, mm6  paddusw mm3, mm1   ; Input: mm2/mm3 contains the value (Src[0]+Src[1]) of previous line  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  lea ecx,[ecx+edx]  COPY_HH_MMX  ret;//////////////////////////////////////////////////////////////////////;//;// 8b to 16b transfer ops;//;//////////////////////////////////////////////////////////////////////%macro UPLOAD 2  movq mm0, [eax+%2]  movq mm2, mm0  movq mm1, [eax+%2+edx]  movq mm3, mm1  punpcklbw mm0, mm7  punpckhbw mm2, mm7    movq [ecx+%1*32], mm0    movq [ecx+%1*32+8], mm2    punpcklbw mm1, mm7  punpckhbw mm3, mm7  movq [ecx+%1*32+16], mm1  movq [ecx+%1*32+24], mm3%endmacroalign 16Skl_Copy_16x8_8To16_MMX:   ;  PROLOG0  pxor mm7, mm7  UPLOAD 0, 0  UPLOAD 4, 8  lea eax,[eax+2*edx]  UPLOAD 1, 0  UPLOAD 5, 8  lea eax,[eax+2*edx]  UPLOAD 2, 0  UPLOAD 6, 8  lea eax,[eax+2*edx]  UPLOAD 3, 0  UPLOAD 7, 8  retalign 16Skl_Copy_8x8_8To16_MMX:   ; 31c  PROLOG0  pxor mm7, mm7  UPLOAD 0, 0  lea eax,[eax+2*edx]  UPLOAD 1, 0  lea eax,[eax+2*edx]  UPLOAD 2, 0  lea eax,[eax+2*edx]  UPLOAD 3, 0  ret;//////////////////////////////////////////////////////////////////////;//;// Diffs (8b->16b);//;//////////////////////////////////////////////////////////////////////%macro DIFF 2  movq mm0, [eax+%2]    ; Src  movq mm1, mm0  movq mm2, [eax+%2+edx]  movq mm3, mm2  punpcklbw mm0, mm7  punpcklbw mm2, mm7

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -