⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 skl_img_x86.asm

📁 mpeg4编解码器
💻 ASM
📖 第 1 页 / 共 3 页
字号:
;/********************************************************; * Some code. Copyright (C) 2003 by Pascal Massimino.   *; * All Rights Reserved.      (http://skal.planet-d.net) *; * For Educational/Academic use ONLY. See 'LICENSE.TXT'.*; ********************************************************/;//////////////////////////////////////////////////////////////////////; [BITS 32]%include "../../include/skl_syst/skl_nasm.h"globl Skl_SAD_4x4_MMXglobl Skl_SAD_4x8_MMXglobl Skl_SAD_8x4_MMXglobl Skl_SAD_8x8_MMXglobl Skl_SAD_8x16_MMXglobl Skl_SAD_16x8_MMXglobl Skl_SAD_16x16_MMXglobl Skl_SAD_16x8_Field_MMXglobl Skl_SAD_16x7_Self_MMXglobl Skl_SSD_4x4_MMXglobl Skl_SSD_8x8_MMXglobl Skl_SSD_16x16_MMXglobl Skl_SSD_16x8_Field_MMXglobl Skl_SAD_4x4_SSEglobl Skl_SAD_4x8_SSEglobl Skl_SAD_8x4_SSEglobl Skl_SAD_8x8_SSEglobl Skl_SAD_8x16_SSEglobl Skl_SAD_16x8_SSEglobl Skl_SAD_16x16_SSEglobl Skl_SAD_16x8_Field_SSEglobl Skl_SAD_16x7_Self_SSEglobl Skl_SAD_Avrg_16x16_SSEglobl Skl_SAD_Avrg_16x8_SSEglobl Skl_SAD_Avrg_8x16_SSEglobl Skl_SAD_Avrg_8x8_SSEglobl Skl_Mean_16x16_MMXglobl Skl_Mean_8x8_MMXglobl Skl_Mean_4x4_MMXglobl Skl_Sqr_16x16_MMXglobl Skl_Sqr_8x8_MMXglobl Skl_Sqr_4x4_MMXglobl Skl_Mean_16x16_SSEglobl Skl_Mean_8x8_SSEglobl Skl_Mean_4x4_SSEglobl Skl_Abs_Dev_16x16_SSEglobl Skl_Sqr_Dev_16x16_SSEglobl Skl_SAD_16x16_SSE2globl Skl_SAD_16x8_Field_SSE2globl Skl_SAD_16x7_Self_SSE2globl Skl_Mean_16x16_SSE2globl Skl_Sqr_16x16_SSE2globl Skl_Abs_Dev_16x16_SSE2DATAalign 16One:     times 8  dw 1     ; for summing 4 wordsTEXT%macro COLLAPSE_MMX 0  movq    mm7, mm6  pmaddwd mm6, [One]  psrlq   mm7, 32  pmaddwd mm7, [One]  paddd   mm6, mm7  movd    eax, mm6%endmacro%macro COLLAPSE_4_MMX 0  COLLAPSE_MMX%endmacro;//////////////////////////////////////////////////////////////////////;//;//  MMX impl;//;//////////////////////////////////////////////////////////////////////;//////////////////////////////////////////////////////////////////////; Skl_SAD_16x16_MMX;//////////////////////////////////////////////////////////////////////%macro SAD_16x16_MMX 0  movq mm0, [eax]  movq mm1, [edx]  movq mm2, [eax+8]  movq mm3, [edx+8]  lea edx,[edx+ecx]    ; we do our best *not* to go 16b, here  movq    mm4, mm0  psubusb mm0, mm1  movq    mm5, mm2  psubusb mm2, mm3  psubusb mm1, mm4  por     mm0, mm1  psubusb mm3, mm5  por     mm2, mm3  movq      mm1,mm0  punpcklbw mm0,mm7  punpckhbw mm1,mm7  movq      mm3,mm2  punpcklbw mm2,mm7  paddusw   mm0,mm1  lea eax,[eax+ecx]  punpckhbw mm3,mm7  paddusw   mm6,mm0  paddusw   mm2,mm3  paddusw   mm6,mm2%endmacroalign 16Skl_SAD_16x16_MMX:  ; 179c  mov eax, [esp+ 4] ; Src1  mov edx, [esp+ 8] ; Src2  mov ecx, [esp+12] ; BpS  pxor mm6, mm6 ; accum  pxor mm7, mm7 ; zero  SAD_16x16_MMX  SAD_16x16_MMX  SAD_16x16_MMX  SAD_16x16_MMX  SAD_16x16_MMX  SAD_16x16_MMX  SAD_16x16_MMX  SAD_16x16_MMX  SAD_16x16_MMX  SAD_16x16_MMX  SAD_16x16_MMX  SAD_16x16_MMX  SAD_16x16_MMX  SAD_16x16_MMX  SAD_16x16_MMX  SAD_16x16_MMX  COLLAPSE_MMX  retalign 16Skl_SAD_16x8_MMX:  mov eax, [esp+ 4] ; Src1  mov edx, [esp+ 8] ; Src2  mov ecx, [esp+12] ; BpS  pxor mm6, mm6 ; accum  pxor mm7, mm7 ; zero  SAD_16x16_MMX  SAD_16x16_MMX  SAD_16x16_MMX  SAD_16x16_MMX  SAD_16x16_MMX  SAD_16x16_MMX  SAD_16x16_MMX  SAD_16x16_MMX  COLLAPSE_MMX  retalign 16Skl_SAD_16x8_Field_MMX:  ; 179c  mov eax, [esp+ 4] ; Src1  mov edx, [esp+ 8] ; Src2  mov ecx, [esp+12] ; BpS  lea ecx, [ecx+ecx]  ; 2.BpS  pxor mm6, mm6 ; accum  pxor mm7, mm7 ; zero  SAD_16x16_MMX  SAD_16x16_MMX  SAD_16x16_MMX  SAD_16x16_MMX  SAD_16x16_MMX  SAD_16x16_MMX  SAD_16x16_MMX  SAD_16x16_MMX  COLLAPSE_MMX  ret;//////////////////////////////////////////////////////////////////////; Skl_SAD_16x7_Self_MMX;//////////////////////////////////////////////////////////////////////%macro SAD_16x7_MMX 0  movq mm0, [eax]  movq mm1, [edx]  movq mm2, [eax+8]  movq mm3, [edx+8]  lea edx,[edx+ecx]    ; we do our best *not* to go 16b, here  movq    mm4, mm0  psubusb mm0, mm1  movq    mm5, mm2  psubusb mm2, mm3  psubusb mm1, mm4  por     mm0, mm1  psubusb mm3, mm5  por     mm2, mm3  movq      mm1,mm0  punpcklbw mm0,mm7  punpckhbw mm1,mm7  movq      mm3,mm2  punpcklbw mm2,mm7  paddusw   mm0,mm1  lea eax,[eax+ecx]  punpckhbw mm3,mm7  paddusw   mm6,mm0  paddusw   mm2,mm3  paddusw   mm6,mm2%endmacroalign 16Skl_SAD_16x7_Self_MMX:  mov eax, [esp+ 4] ; Src  mov ecx, [esp+ 8] ; BpS  lea edx, [eax+ecx]; Src2  pxor mm6, mm6 ; accum  pxor mm7, mm7 ; zero  SAD_16x7_MMX  SAD_16x7_MMX  SAD_16x7_MMX  SAD_16x7_MMX  SAD_16x7_MMX  SAD_16x7_MMX  SAD_16x7_MMX  COLLAPSE_MMX  ret;//////////////////////////////////////////////////////////////////////; Skl_SAD_8x8_MMX;//////////////////////////////////////////////////////////////////////%macro SAD_8x8_MMX  0  movq mm0, [eax]  movq mm1, [edx]  movq mm2, [eax+ecx]  movq mm3, [edx+ecx]  lea edx,[edx+2*ecx]    ; we do our best *not* to go 16b, here  movq    mm4, mm0  psubusb mm0, mm1  movq    mm5, mm2  psubusb mm2, mm3  psubusb mm1, mm4  por     mm0, mm1  psubusb mm3, mm5  por     mm2, mm3  movq      mm1,mm0  punpcklbw mm0,mm7  punpckhbw mm1,mm7  movq      mm3,mm2  punpcklbw mm2,mm7  paddusw   mm0,mm1  lea eax,[eax+2*ecx]  punpckhbw mm3,mm7  paddusw   mm6,mm0  paddusw   mm2,mm3  paddusw   mm6,mm2%endmacroalign 16Skl_SAD_8x4_MMX:  mov eax, [esp+ 4] ; Src1  mov edx, [esp+ 8] ; Src2  mov ecx, [esp+12] ; BpS  pxor mm6, mm6 ; accum  pxor mm7, mm7 ; zero  SAD_8x8_MMX  SAD_8x8_MMX  COLLAPSE_MMX  retalign 16Skl_SAD_8x8_MMX:    ; 57c  mov eax, [esp+ 4] ; Src1  mov edx, [esp+ 8] ; Src2  mov ecx, [esp+12] ; BpS  pxor mm6, mm6 ; accum  pxor mm7, mm7 ; zero  SAD_8x8_MMX  SAD_8x8_MMX  SAD_8x8_MMX  SAD_8x8_MMX  COLLAPSE_MMX  retalign 16Skl_SAD_8x16_MMX:  mov eax, [esp+ 4] ; Src1  mov edx, [esp+ 8] ; Src2  mov ecx, [esp+12] ; BpS  pxor mm6, mm6 ; accum  pxor mm7, mm7 ; zero  SAD_8x8_MMX  SAD_8x8_MMX  SAD_8x8_MMX  SAD_8x8_MMX  SAD_8x8_MMX  SAD_8x8_MMX  SAD_8x8_MMX  SAD_8x8_MMX  COLLAPSE_MMX  ret;//////////////////////////////////////////////////////////////////////; Skl_SAD_4x4_MMX;//////////////////////////////////////////////////////////////////////%macro SAD_4x4_MMX  0  movd mm0, [eax]  movd mm1, [edx]  movd mm2, [eax+ecx]  movd mm3, [edx+ecx]  lea edx,[edx+2*ecx]    ; we do our best *not* to go 16b, here  movq    mm4, mm0  psubusb mm0, mm1  movq    mm5, mm2  psubusb mm2, mm3  psubusb mm1, mm4  por     mm0, mm1  psubusb mm3, mm5  por     mm2, mm3  punpcklbw mm0,mm7  punpcklbw mm2,mm7  paddusw  mm6,mm0  lea eax,[eax+2*ecx]  paddusw  mm6,mm2%endmacroalign 16Skl_SAD_4x4_MMX:    ; 57c  mov eax, [esp+ 4] ; Src1  mov edx, [esp+ 8] ; Src2  mov ecx, [esp+12] ; BpS  pxor mm6, mm6 ; accum  pxor mm7, mm7 ; zero  SAD_4x4_MMX  SAD_4x4_MMX  COLLAPSE_4_MMX  retalign 16Skl_SAD_4x8_MMX:  mov eax, [esp+ 4] ; Src1  mov edx, [esp+ 8] ; Src2  mov ecx, [esp+12] ; BpS  pxor mm6, mm6 ; accum  pxor mm7, mm7 ; zero  SAD_4x4_MMX  SAD_4x4_MMX  SAD_4x4_MMX  SAD_4x4_MMX  COLLAPSE_4_MMX  ret;//////////////////////////////////////////////////////////////////////; Skl_SSD_16x16_MMX;//////////////////////////////////////////////////////////////////////%macro SSD_16x16_MMX 0  movq mm0, [eax]  movq mm1, [ecx]  movq mm2, [eax+8]  movq mm3, [ecx+8]  lea eax,[eax+edx]  lea ecx,[ecx+edx]  movq mm4, mm0  movq mm5, mm1  punpcklbw mm0, mm6  punpcklbw mm1, mm6  punpckhbw mm4, mm6  punpckhbw mm5, mm6  psubw mm0, mm1  psubw mm4, mm5  pmaddwd mm0, mm0  pmaddwd mm4, mm4  paddd mm7, mm0  paddd mm7, mm4  movq mm4, mm2  movq mm5, mm3  punpcklbw mm2, mm6  punpcklbw mm3, mm6  punpckhbw mm4, mm6  punpckhbw mm5, mm6  psubw mm2, mm3  psubw mm4, mm5  pmaddwd mm2, mm2  pmaddwd mm4, mm4  paddd mm7, mm2  paddd mm7, mm4%endmacroalign 16Skl_SSD_16x16_MMX:  mov eax, [esp+ 4] ; Src1  mov ecx, [esp+ 8] ; Src2  mov edx, [esp+12] ; BpS  pxor mm7, mm7 ; accum  pxor mm6, mm6 ; zero  SSD_16x16_MMX  SSD_16x16_MMX  SSD_16x16_MMX  SSD_16x16_MMX  SSD_16x16_MMX  SSD_16x16_MMX  SSD_16x16_MMX  SSD_16x16_MMX  SSD_16x16_MMX  SSD_16x16_MMX  SSD_16x16_MMX  SSD_16x16_MMX  SSD_16x16_MMX  SSD_16x16_MMX  SSD_16x16_MMX  SSD_16x16_MMX  movq mm6, mm7  psrlq mm7, 32  paddd mm6, mm7  movd eax, mm6  retalign 16Skl_SSD_16x8_Field_MMX:  mov eax, [esp+ 4] ; Src1  mov ecx, [esp+ 8] ; Src2  mov edx, [esp+12] ; BpS  lea edx, [edx+edx]  ; 2.BpS  pxor mm7, mm7 ; accum  pxor mm6, mm6 ; zero  SSD_16x16_MMX  SSD_16x16_MMX  SSD_16x16_MMX  SSD_16x16_MMX  SSD_16x16_MMX  SSD_16x16_MMX  SSD_16x16_MMX  SSD_16x16_MMX  movq mm6, mm7  psrlq mm7, 32  paddd mm6, mm7  movd eax, mm6  ret;//////////////////////////////////////////////////////////////////////; Skl_SSD_8x8_MMX;//////////////////////////////////////////////////////////////////////%macro SSD_8x8_MMX  0  movq mm0, [eax]  movq mm1, [ecx]  movq mm2, [eax+edx]  movq mm3, [ecx+edx]  lea eax,[eax+2*edx]  lea ecx,[ecx+2*edx]  movq mm4, mm0  movq mm5, mm1  punpcklbw mm0, mm6  punpcklbw mm1, mm6  punpckhbw mm4, mm6  punpckhbw mm5, mm6  psubw mm0, mm1  psubw mm4, mm5  pmaddwd mm0, mm0  pmaddwd mm4, mm4  paddd mm7, mm0  paddd mm7, mm4  movq mm4, mm2  movq mm5, mm3  punpcklbw mm2, mm6  punpcklbw mm3, mm6  punpckhbw mm4, mm6  punpckhbw mm5, mm6  psubw mm2, mm3  psubw mm4, mm5  pmaddwd mm2, mm2  pmaddwd mm4, mm4  paddd mm7, mm2  paddd mm7, mm4%endmacroalign 16Skl_SSD_8x8_MMX:  mov eax, [esp+ 4] ; Src1  mov ecx, [esp+ 8] ; Src2  mov edx, [esp+12] ; BpS  pxor mm7, mm7 ; accum  pxor mm6, mm6 ; zero  SSD_8x8_MMX  SSD_8x8_MMX  SSD_8x8_MMX  SSD_8x8_MMX  movq mm6, mm7  psrlq mm7, 32  paddd mm6, mm7  movd eax, mm6  ret;//////////////////////////////////////////////////////////////////////; Skl_SSD_4x4_MMX;//////////////////////////////////////////////////////////////////////%macro SSD_4x4_MMX  0  movd mm0, [eax]  movd mm1, [ecx]  movd mm2, [eax+edx]  movd mm3, [ecx+edx]  punpcklbw mm0, mm6  punpcklbw mm1, mm6  punpcklbw mm2, mm6  punpcklbw mm3, mm6  psubw mm0, mm1  psubw mm2, mm3  pmaddwd mm0, mm0  pmaddwd mm2, mm2  paddd mm7, mm0  paddd mm7, mm2%endmacroalign 16Skl_SSD_4x4_MMX:  mov eax, [esp+ 4] ; Src1  mov ecx, [esp+ 8] ; Src2  mov edx, [esp+12] ; BpS  pxor mm7, mm7 ; accum  pxor mm6, mm6 ; zero

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -