⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 skl_mb_mmx.asm

📁 mpeg4编解码器
💻 ASM
📖 第 1 页 / 共 3 页
字号:
;/********************************************************; * Some code. Copyright (C) 2003 by Pascal Massimino.   *; * All Rights Reserved.      (http://skal.planet-d.net) *; * For Educational/Academic use ONLY. See 'LICENSE.TXT'.*; ********************************************************/;//////////////////////////////////////////////////////////////////////;// Macro-block processing;//////////////////////////////////////////////////////////////////////; [BITS 32]%include "../../include/skl_syst/skl_nasm.h"globl Skl_Add_8x4_FF_MMXglobl Skl_Add_8x4_FH_Rnd0_MMXglobl Skl_Add_8x4_HF_Rnd0_MMXglobl Skl_Add_8x4_HH_Rnd0_MMXglobl Skl_Add_8x8_FF_MMXglobl Skl_Add_8x8_FH_Rnd0_MMXglobl Skl_Add_8x8_HF_Rnd0_MMXglobl Skl_Add_8x8_HH_Rnd0_MMXglobl Skl_Add_16x8_FF_MMXglobl Skl_Add_16x8_FH_Rnd0_MMXglobl Skl_Add_16x8_HF_Rnd0_MMXglobl Skl_Add_16x8_HH_Rnd0_MMXglobl Skl_Copy_8x4_FF_MMXglobl Skl_Copy_8x4_FH_Rnd1_MMXglobl Skl_Copy_8x4_HF_Rnd1_MMXglobl Skl_Copy_8x4_HH_Rnd1_MMXglobl Skl_Copy_8x4_FH_Rnd0_MMXglobl Skl_Copy_8x4_HF_Rnd0_MMXglobl Skl_Copy_8x4_HH_Rnd0_MMXglobl Skl_Copy_8x8_FF_MMXglobl Skl_Copy_8x8_FH_Rnd1_MMXglobl Skl_Copy_8x8_HF_Rnd1_MMXglobl Skl_Copy_8x8_HH_Rnd1_MMXglobl Skl_Copy_8x8_FH_Rnd0_MMXglobl Skl_Copy_8x8_HF_Rnd0_MMXglobl Skl_Copy_8x8_HH_Rnd0_MMXglobl Skl_Copy_16x8_FF_MMXglobl Skl_Copy_16x8_FH_Rnd1_MMXglobl Skl_Copy_16x8_HF_Rnd1_MMXglobl Skl_Copy_16x8_HH_Rnd1_MMXglobl Skl_Copy_16x8_FH_Rnd0_MMXglobl Skl_Copy_16x8_HF_Rnd0_MMXglobl Skl_Copy_16x8_HH_Rnd0_MMXglobl Skl_H_Pass_2Taps_MMXglobl Skl_V_Pass_2Taps_MMXglobl Skl_HV_Pass_2Taps_MMXglobl Skl_Copy_16x8_8To16_MMXglobl Skl_Copy_8x8_8To16_MMXglobl Skl_Diff_16x8_8To16_MMXglobl Skl_Diff_8x8_8To16_MMXglobl Skl_Diff_16x8_88To16_MMXglobl Skl_Diff_8x8_88To16_MMXglobl Skl_Move_16x8_MMXglobl Skl_Move_8x8_MMXglobl Skl_Copy_Upsampled_8x8_16To8_MMXglobl Skl_Add_Upsampled_8x8_16To8_MMXglobl Skl_HFilter_31_MMXglobl Skl_VFilter_31_x86globl Skl_HFilter_31_x86globl Skl_Filter_18x18_To_8x8_MMXglobl Skl_Filter_Diff_18x18_To_8x8_MMX;//////////////////////////////////////////////////////////////////////DATAalign 16Rounder2_MMX times 4 dw 2Rounder1_MMX times 4 dw 1Rounder0_MMX times 4 dw 0Up31 dw  3, 1, 3, 1Up13 dw  1, 3, 1, 3Cst0 dw  0, 0, 0, 0Cst2 dw  2, 2, 2, 2Cst3 dw  3, 3, 3, 3Cst32 dw 32,32,32,32Cst2000 dw  2, 0, 0, 0Cst0002 dw  0, 0, 0, 2Mask_ff dw 0xff,0xff,0xff,0xffTEXT;//////////////////////////////////////////////////////////////////////;//;//   Half-pixel interpolation functions;//;//////////////////////////////////////////////////////////////////////%macro PROLOG0 0  mov ecx, [esp+ 4] ; Dst  mov eax, [esp+ 8] ; Src  mov edx, [esp+12] ; BpS%endmacro%macro PROLOG 2   ; %1: Rounder, %2 load Dst-Rounder  pxor mm6, mm6  movq mm7, [%1]    ; TODO: dangerous! (eax isn't checked)%if %2  movq mm5, [Rounder1_MMX]%endif  PROLOG0%endmacro  ; performs: mm0 == (mm0+mm2)  mm1 == (mm1+mm3)%macro MIX 0  punpcklbw mm0, mm6  punpcklbw mm2, mm6  punpckhbw mm1, mm6  punpckhbw mm3, mm6  paddusw mm0, mm2  paddusw mm1, mm3%endmacro%macro MIX_DST 0  movq mm3, mm2  paddusw mm0, mm7  ; rounder  paddusw mm1, mm7  ; rounder  punpcklbw mm2, mm6  punpckhbw mm3, mm6  psrlw mm0, 1  psrlw mm1, 1  paddusw mm0, mm2  ; mix Src(mm0/mm1) with Dst(mm2/mm3)  paddusw mm1, mm3  paddusw mm0, mm5  paddusw mm1, mm5  psrlw mm0, 1  psrlw mm1, 1  packuswb mm0, mm1%endmacro%macro MIX2 0  punpcklbw mm0, mm6  punpcklbw mm2, mm6  paddusw mm0, mm2  paddusw mm0, mm7  punpckhbw mm1, mm6  punpckhbw mm3, mm6  paddusw mm1, mm7  paddusw mm1, mm3  psrlw mm0, 1  psrlw mm1, 1  packuswb mm0, mm1%endmacro;//////////////////////////////////////////////////////////////////////;// Add functions;//////////////////////////////////////////////////////////////////////%macro ADD_FF_MMX 1  movq mm0, [eax]  movq mm2, [ecx]  movq mm1, mm0  movq mm3, mm2%if (%1!=0)  lea eax,[eax+%1*edx]%endif  MIX  paddusw mm0, mm5  ; rounder  paddusw mm1, mm5  ; rounder  psrlw mm0, 1  psrlw mm1, 1  packuswb mm0, mm1  movq [ecx], mm0%if (%1!=0)  lea ecx,[ecx+%1*edx]%endif%endmacroalign 16Skl_Add_8x4_FF_MMX:  PROLOG Rounder1_MMX, 1  ADD_FF_MMX 1  ADD_FF_MMX 1  ADD_FF_MMX 1  ADD_FF_MMX 0  retalign 16Skl_Add_8x8_FF_MMX:  PROLOG Rounder1_MMX, 1  ADD_FF_MMX 1  ADD_FF_MMX 1  ADD_FF_MMX 1  ADD_FF_MMX 1  ADD_FF_MMX 1  ADD_FF_MMX 1  ADD_FF_MMX 1  ADD_FF_MMX 0  ret%macro ADD_16x8_FF_MMX 0  movq mm0, [eax]  movq mm2, [ecx]  movq mm1, mm0  movq mm3, mm2  MIX  paddusw mm0, mm5  ; dst-rounder  paddusw mm1, mm5  ; dst-rounder  psrlw mm0, 1  psrlw mm1, 1  packuswb mm0, mm1  movq [ecx], mm0  movq mm0, [eax+8]  movq mm2, [ecx+8]  movq mm1, mm0  movq mm3, mm2  lea eax,[eax+edx]  MIX  paddusw mm0, mm5  ; dst-rounder  paddusw mm1, mm5  ; dst-rounder  psrlw mm0, 1  psrlw mm1, 1  packuswb mm0, mm1  movq [ecx+8], mm0%endmacroalign 16Skl_Add_16x8_FF_MMX:  PROLOG Rounder1_MMX, 1  ADD_16x8_FF_MMX  lea ecx,[ecx+edx]  ADD_16x8_FF_MMX  lea ecx,[ecx+edx]  ADD_16x8_FF_MMX  lea ecx,[ecx+edx]  ADD_16x8_FF_MMX  lea ecx,[ecx+edx]  ADD_16x8_FF_MMX  lea ecx,[ecx+edx]  ADD_16x8_FF_MMX  lea ecx,[ecx+edx]  ADD_16x8_FF_MMX  lea ecx,[ecx+edx]  ADD_16x8_FF_MMX  ret;//////////////////////////////////////////////////////////////////////%macro ADD_FH_MMX 0  movq mm0, [eax]  movq mm2, [eax+1]  movq mm1, mm0  movq mm3, mm2  lea eax,[eax+edx]  MIX  movq mm2, [ecx]   ; prepare mix with Dst[0]  MIX_DST  movq [ecx], mm0%endmacroalign 16Skl_Add_8x4_FH_Rnd0_MMX:  PROLOG Rounder1_MMX, 1  ADD_FH_MMX  lea ecx,[ecx+edx]  ADD_FH_MMX  lea ecx,[ecx+edx]  ADD_FH_MMX  lea ecx,[ecx+edx]  ADD_FH_MMX  retalign 16Skl_Add_8x8_FH_Rnd0_MMX:  PROLOG Rounder1_MMX, 1  ADD_FH_MMX  lea ecx,[ecx+edx]  ADD_FH_MMX  lea ecx,[ecx+edx]  ADD_FH_MMX  lea ecx,[ecx+edx]  ADD_FH_MMX  lea ecx,[ecx+edx]  ADD_FH_MMX  lea ecx,[ecx+edx]  ADD_FH_MMX  lea ecx,[ecx+edx]  ADD_FH_MMX  lea ecx,[ecx+edx]  ADD_FH_MMX  ret%macro ADD_16x8_FH_MMX 0  movq mm0, [eax]  movq mm2, [eax+1]  movq mm1, mm0  movq mm3, mm2  MIX  movq mm2, [ecx]   ; prepare mix with Dst[0]  MIX_DST  movq [ecx], mm0  movq mm0, [eax+8]  movq mm2, [eax+9]  movq mm1, mm0  movq mm3, mm2  lea eax,[eax+edx]  MIX  movq mm2, [ecx+8]   ; prepare mix with Dst[0]  MIX_DST  movq [ecx+8], mm0%endmacroalign 16Skl_Add_16x8_FH_Rnd0_MMX:  PROLOG Rounder1_MMX, 1  ADD_16x8_FH_MMX  lea ecx,[ecx+edx]  ADD_16x8_FH_MMX  lea ecx,[ecx+edx]  ADD_16x8_FH_MMX  lea ecx,[ecx+edx]  ADD_16x8_FH_MMX  lea ecx,[ecx+edx]  ADD_16x8_FH_MMX  lea ecx,[ecx+edx]  ADD_16x8_FH_MMX  lea ecx,[ecx+edx]  ADD_16x8_FH_MMX  lea ecx,[ecx+edx]  ADD_16x8_FH_MMX  ret;//////////////////////////////////////////////////////////////////////%macro ADD_HF_MMX 0  movq mm0, [eax]  movq mm2, [eax+edx]  movq mm1, mm0  movq mm3, mm2  lea eax,[eax+edx]  MIX  movq mm2, [ecx]   ; prepare mix with Dst[0]  MIX_DST  movq [ecx], mm0%endmacroalign 16Skl_Add_8x4_HF_Rnd0_MMX:  PROLOG Rounder1_MMX, 1  ADD_HF_MMX  lea ecx,[ecx+edx]  ADD_HF_MMX  lea ecx,[ecx+edx]  ADD_HF_MMX  lea ecx,[ecx+edx]  ADD_HF_MMX  retalign 16Skl_Add_8x8_HF_Rnd0_MMX:  PROLOG Rounder1_MMX, 1  ADD_HF_MMX  lea ecx,[ecx+edx]  ADD_HF_MMX  lea ecx,[ecx+edx]  ADD_HF_MMX  lea ecx,[ecx+edx]  ADD_HF_MMX  lea ecx,[ecx+edx]  ADD_HF_MMX  lea ecx,[ecx+edx]  ADD_HF_MMX  lea ecx,[ecx+edx]  ADD_HF_MMX  lea ecx,[ecx+edx]  ADD_HF_MMX  ret%macro ADD_16x8_HF_MMX 0  movq mm0, [eax]  movq mm2, [eax+edx]  movq mm1, mm0  movq mm3, mm2  MIX  movq mm2, [ecx]   ; prepare mix with Dst[0]  MIX_DST  movq [ecx], mm0  movq mm0, [eax+8]  movq mm2, [eax+edx+8]  movq mm1, mm0  movq mm3, mm2  lea eax,[eax+edx]  MIX  movq mm2, [ecx+8]   ; prepare mix with Dst[0]  MIX_DST  movq [ecx+8], mm0%endmacroalign 16Skl_Add_16x8_HF_Rnd0_MMX:  PROLOG Rounder1_MMX, 1  ADD_16x8_HF_MMX  lea ecx,[ecx+edx]  ADD_16x8_HF_MMX  lea ecx,[ecx+edx]  ADD_16x8_HF_MMX  lea ecx,[ecx+edx]  ADD_16x8_HF_MMX  lea ecx,[ecx+edx]  ADD_16x8_HF_MMX  lea ecx,[ecx+edx]  ADD_16x8_HF_MMX  lea ecx,[ecx+edx]  ADD_16x8_HF_MMX  lea ecx,[ecx+edx]  ADD_16x8_HF_MMX  ret;//////////////////////////////////////////////////////////////////////%macro ADD_HH_MMX 0  lea eax,[eax+edx]    ; transfert prev line to mm0/mm1  movq mm0, mm2  movq mm1, mm3    ; load new line in mm2/mm3  movq mm2, [eax]  movq mm4, [eax+1]  movq mm3, mm2  movq mm5, mm4  punpcklbw mm2, mm6  punpcklbw mm4, mm6  paddusw mm2, mm4  punpckhbw mm3, mm6  punpckhbw mm5, mm6  paddusw mm3, mm5    ; mix current line (mm2/mm3) with previous (mm0,mm1);     ; we'll preserve mm2/mm3 for next line...  paddusw mm0, mm2    paddusw mm1, mm3    movq mm4, [ecx]   ; prepare mix with Dst[0]  movq mm5, mm4  paddusw mm0, mm7  ; finish mixing current line  paddusw mm1, mm7  punpcklbw mm4, mm6  punpckhbw mm5, mm6  psrlw mm0, 2  psrlw mm1, 2  paddusw mm0, mm4  ; mix Src(mm0/mm1) with Dst(mm2/mm3)  paddusw mm1, mm5  paddusw mm0, [Rounder1_MMX]  paddusw mm1, [Rounder1_MMX]  psrlw mm0, 1  psrlw mm1, 1  packuswb mm0, mm1  movq [ecx], mm0%endmacroalign 16Skl_Add_8x4_HH_Rnd0_MMX:  PROLOG Rounder2_MMX, 0    ; mm5 is busy. Don't load dst-rounder    ; preprocess first line  movq mm0, [eax]  movq mm2, [eax+1]  movq mm1, mm0  movq mm3, mm2  punpcklbw mm0, mm6  punpcklbw mm2, mm6  punpckhbw mm1, mm6  punpckhbw mm3, mm6  paddusw mm2, mm0  paddusw mm3, mm1   ; Input: mm2/mm3 contains the value (Src[0]+Src[1]) of previous line  ADD_HH_MMX  lea ecx,[ecx+edx]  ADD_HH_MMX  lea ecx,[ecx+edx]  ADD_HH_MMX  lea ecx,[ecx+edx]  ADD_HH_MMX  retalign 16Skl_Add_8x8_HH_Rnd0_MMX:  PROLOG Rounder2_MMX, 0    ; mm5 is busy. Don't load dst-rounder    ; preprocess first line  movq mm0, [eax]  movq mm2, [eax+1]  movq mm1, mm0  movq mm3, mm2  punpcklbw mm0, mm6  punpcklbw mm2, mm6  punpckhbw mm1, mm6  punpckhbw mm3, mm6  paddusw mm2, mm0  paddusw mm3, mm1   ; Input: mm2/mm3 contains the value (Src[0]+Src[1]) of previous line  ADD_HH_MMX  lea ecx,[ecx+edx]  ADD_HH_MMX  lea ecx,[ecx+edx]  ADD_HH_MMX  lea ecx,[ecx+edx]  ADD_HH_MMX  lea ecx,[ecx+edx]  ADD_HH_MMX  lea ecx,[ecx+edx]  ADD_HH_MMX  lea ecx,[ecx+edx]  ADD_HH_MMX  lea ecx,[ecx+edx]  ADD_HH_MMX  retalign 16Skl_Add_16x8_HH_Rnd0_MMX:  PROLOG Rounder2_MMX, 0    ; preprocess first line  movq mm0, [eax]  movq mm2, [eax+1]  movq mm1, mm0  movq mm3, mm2  punpcklbw mm0, mm6  punpcklbw mm2, mm6  punpckhbw mm1, mm6  punpckhbw mm3, mm6  paddusw mm2, mm0  paddusw mm3, mm1   ; Input: mm2/mm3 contains the value (Src[0]+Src[1]) of previous line  ADD_HH_MMX  lea ecx,[ecx+edx]  ADD_HH_MMX  lea ecx,[ecx+edx]  ADD_HH_MMX  lea ecx,[ecx+edx]  ADD_HH_MMX  lea ecx,[ecx+edx]  ADD_HH_MMX  lea ecx,[ecx+edx]  ADD_HH_MMX  lea ecx,[ecx+edx]  ADD_HH_MMX  lea ecx,[ecx+edx]  ADD_HH_MMX    ; second column  mov ecx, [esp+ 4] ; Dst  mov eax, [esp+ 8] ; Src  lea ecx, [ecx+8]  lea eax, [eax+8]    ; preprocess first line  movq mm0, [eax]  movq mm2, [eax+1]  movq mm1, mm0  movq mm3, mm2  punpcklbw mm0, mm6  punpcklbw mm2, mm6  punpckhbw mm1, mm6  punpckhbw mm3, mm6  paddusw mm2, mm0  paddusw mm3, mm1   ; Input: mm2/mm3 contains the value (Src[0]+Src[1]) of previous line  ADD_HH_MMX  lea ecx,[ecx+edx]  ADD_HH_MMX  lea ecx,[ecx+edx]  ADD_HH_MMX  lea ecx,[ecx+edx]  ADD_HH_MMX  lea ecx,[ecx+edx]  ADD_HH_MMX  lea ecx,[ecx+edx]  ADD_HH_MMX  lea ecx,[ecx+edx]  ADD_HH_MMX  lea ecx,[ecx+edx]  ADD_HH_MMX  ret;//////////////////////////////////////////////////////////////////////;// Copy functions;//////////////////////////////////////////////////////////////////////%macro COPY_FF_8 1      ; %1:phase    movq mm0,  [eax]    movq mm1,  [eax+edx]    movq [ecx], mm0%if (%1!=1)    lea eax, [eax+2*edx]%endif    movq [ecx+edx], mm1%if (%1!=1)    lea ecx, [ecx+2*edx]%endif%endmacroalign 16Skl_Copy_8x4_FF_MMX:  ; 9c  PROLOG0  movq mm0,  [eax      ]  movq mm1,  [eax+edx  ]  movq mm2,  [eax+2*edx]  movq [ecx      ], mm0  movq [ecx+edx  ], mm1  movq [ecx+2*edx], mm2  lea edx, [edx+2*edx]  movq mm1,  [eax+edx]  movq [ecx+edx], mm1  retalign 16Skl_Copy_8x8_FF_MMX:  ; 14c  PROLOG0  COPY_FF_8 0  COPY_FF_8 0  COPY_FF_8 0  COPY_FF_8 1  ret%macro COPY_FF_16 1  movq mm0, [eax]  movq mm1, [eax+8]  movq mm2, [eax+edx]  movq mm3, [eax+edx+8]  movq [ecx], mm0  movq [ecx+8], mm1  movq [ecx+edx], mm2%if (%1!=1)  lea eax, [eax+2*edx]%endif  movq [ecx+edx+8], mm3%if (%1!=1)  lea ecx, [ecx+2*edx]%endif%endmacroalign 16Skl_Copy_16x8_FF_MMX: ; 26c  PROLOG0  COPY_FF_16 0  COPY_FF_16 0  COPY_FF_16 0  COPY_FF_16 1  ret;//////////////////////////////////////////////////////////////////////%macro COPY_FH_MMX 0  movq mm0, [eax]  movq mm2, [eax+1]  movq mm1, mm0  movq mm3, mm2  lea eax,[eax+edx]  MIX2  movq [ecx], mm0%endmacroalign 16Skl_Copy_8x4_FH_Rnd0_MMX:  PROLOG Rounder1_MMX, 0  COPY_FH_MMX  lea ecx,[ecx+edx]  COPY_FH_MMX  lea ecx,[ecx+edx]  COPY_FH_MMX  lea ecx,[ecx+edx]  COPY_FH_MMX  retalign 16Skl_Copy_8x4_FH_Rnd1_MMX:  PROLOG Rounder0_MMX, 0  COPY_FH_MMX  lea ecx,[ecx+edx]  COPY_FH_MMX  lea ecx,[ecx+edx]  COPY_FH_MMX  lea ecx,[ecx+edx]  COPY_FH_MMX  retalign 16Skl_Copy_8x8_FH_Rnd0_MMX:  PROLOG Rounder1_MMX, 0  COPY_FH_MMX  lea ecx,[ecx+edx]  COPY_FH_MMX  lea ecx,[ecx+edx]  COPY_FH_MMX  lea ecx,[ecx+edx]  COPY_FH_MMX  lea ecx,[ecx+edx]  COPY_FH_MMX  lea ecx,[ecx+edx]  COPY_FH_MMX  lea ecx,[ecx+edx]  COPY_FH_MMX  lea ecx,[ecx+edx]  COPY_FH_MMX

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -