⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 skl_img_x86.asm

📁 mpeg4编解码器
💻 ASM
📖 第 1 页 / 共 3 页
字号:
  pop ebx  paddusw mm6,mm0  paddusw mm6,mm1  movd eax, mm6  retalign 16Skl_SAD_8x16_SSE:  mov eax, [esp+ 4] ; Src1  mov edx, [esp+ 8] ; Src2  mov ecx, [esp+12] ; BpS  push ebx  lea ebx, [ecx+ecx]  pxor mm5, mm5 ; this is a NOP  pxor mm6, mm6 ; accum2  SAD_8x8_SSE  SAD_8x8_SSE  SAD_8x8_SSE  SAD_8x8_SSE  SAD_8x8_SSE  SAD_8x8_SSE  SAD_8x8_SSE  movq mm0, [eax]  psadbw mm0, [edx]  movq mm1, [eax+ecx]  psadbw mm1, [edx+ecx]  pop ebx  paddusw mm6,mm0  paddusw mm6,mm1  movd eax, mm6  ret;//////////////////////////////////////////////////////////////////////; Skl_SAD_4x4_SSE;//////////////////////////////////////////////////////////////////////%macro SAD_4x4_SSE 0  movd    mm0, [eax]  movd    mm1, [edx]  psadbw  mm0, mm1  movd    mm1, [eax+ecx]  add eax, ebx  paddusw mm6,mm0  movd    mm0, [edx+ecx]  lea edx, [edx+ebx]  psadbw  mm1, mm0  paddusw mm6,mm1%endmacroalign 16Skl_SAD_4x4_SSE:    ;   mov eax, [esp+ 4] ; Src1  mov edx, [esp+ 8] ; Src2  mov ecx, [esp+12] ; BpS  push ebx  lea ebx, [ecx+ecx]  pxor mm6, mm6 ; accum2  pxor mm0, mm0  pxor mm1, mm1  SAD_4x4_SSE  movd    mm0, [eax]  movd    mm1, [edx]  psadbw  mm0, mm1  movd    mm1, [eax+ecx]  pop ebx  paddusw mm6,mm0  movd    mm0, [edx+ecx]  psadbw  mm1, mm0  paddusw mm6,mm1  movd eax, mm6  retalign 16Skl_SAD_4x8_SSE:    ;   mov eax, [esp+ 4] ; Src1  mov edx, [esp+ 8] ; Src2  mov ecx, [esp+12] ; BpS  push ebx  lea ebx, [ecx+ecx]  pxor mm6, mm6 ; accum2  pxor mm0, mm0  pxor mm1, mm1  SAD_4x4_SSE  SAD_4x4_SSE  SAD_4x4_SSE  movd    mm0, [eax]  movd    mm1, [edx]  psadbw  mm0, mm1  movd    mm1, [eax+ecx]  pop ebx  paddusw mm6,mm0  movd    mm0, [edx+ecx]  psadbw  mm1, mm0  paddusw mm6,mm1  movd eax, mm6  ret;//////////////////////////////////////////////////////////////////////; Skl_Mean_16x16_SSE;//////////////////////////////////////////////////////////////////////%macro MEAN_16x16_SSE 0  movq    mm0, [eax]  psadbw  mm0, mm7  movq    mm1, [eax+8]  psadbw  mm1, mm7  paddusw  mm5, mm0  add eax, ecx  paddusw   mm6, mm1%endmacroalign 16Skl_Mean_16x16_SSE:   ; 97c  mov eax, [esp+ 4] ; Src  mov ecx, [esp+ 8] ; BpS  pxor mm5, mm5 ; accum  pxor mm6, mm6 ; accum  pxor mm7, mm7 ; zero  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  paddusw mm6,mm5  movd    eax,mm6  shr     eax, 8  ret;//////////////////////////////////////////////////////////////////////; Skl_Mean_8x8_SSE;//////////////////////////////////////////////////////////////////////%macro MEAN_8x8_SSE  0  movq mm0, [eax]  movq mm1, [eax+ecx]  psadbw mm0, mm7  add eax, edx  psadbw mm1, mm7  paddw mm6, mm0  paddw mm6, mm1%endmacroalign 16Skl_Mean_8x8_SSE: ; 34c  mov eax, [esp+ 4] ; Src  mov ecx, [esp+ 8] ; BpS  pxor mm6, mm6 ; accum  pxor mm7, mm7 ; zero  lea edx, [ecx+ecx]  pxor mm6, mm6 ; this is a NOP  MEAN_8x8_SSE  MEAN_8x8_SSE  MEAN_8x8_SSE  MEAN_8x8_SSE  COLLAPSE_MMX  shr eax,6  ret;//////////////////////////////////////////////////////////////////////; Skl_Mean_4x4_SSE;//////////////////////////////////////////////////////////////////////%macro MEAN_4x4_SSE  0  movd mm0, [eax]  movd mm1, [eax+ecx]  psadbw mm0, mm7  add eax, edx  psadbw mm1, mm7  paddw mm6, mm0  paddw mm6, mm1%endmacroalign 16Skl_Mean_4x4_SSE:   ;  mov eax, [esp+ 4] ; Src  mov ecx, [esp+ 8] ; BpS  pxor mm6, mm6 ; accum  pxor mm7, mm7 ; zero  pxor mm0, mm0  pxor mm1, mm1  lea edx, [ecx+ecx]  pxor mm6, mm6 ; this is a NOP  MEAN_4x4_SSE  MEAN_4x4_SSE  COLLAPSE_4_MMX  shr eax,4  ret;//////////////////////////////////////////////////////////////////////; Skl_Abs_Dev_16x16_SSE;//////////////////////////////////////////////////////////////////////align 16Skl_Abs_Dev_16x16_SSE:    ; 191c  mov eax, [esp+ 4] ; Src  mov ecx, [esp+ 8] ; BpS  pxor mm5, mm5 ; accum  pxor mm6, mm6 ; accum  pxor mm7, mm7 ; zero  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  mov eax, [esp+ 4]   ; Src  paddusw  mm5,mm6  pxor     mm6, mm6     ; accum #1  psrlw    mm5, 8       ; => Mean  pshufw   mm7, mm5, 0  ; replicate Mean  pxor     mm5, mm5     ; accum #2  packuswb mm7,mm7  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  MEAN_16x16_SSE  paddusw mm6, mm5  movd eax, mm6  ret;//////////////////////////////////////////////////////////////////////; Skl_Sqr_Dev_16x16_SSE;//////////////////////////////////////////////////////////////////////%macro SQR_DEV_16x16_SSE 0  movq   mm0, [eax]  movq   mm2, mm0  movq   mm1, [eax+8]  movq   mm3, mm1  psadbw mm0, mm7  lea eax, [eax+ecx]  psadbw mm1, mm7  paddw  mm6, mm0  movq   mm0, mm2  paddw  mm6, mm1  movq   mm1, mm3  punpcklbw mm0, mm7  punpcklbw mm1, mm7  punpckhbw mm2, mm7  punpckhbw mm3, mm7  pmaddwd mm0, mm0  pmaddwd mm1, mm1  pmaddwd mm2, mm2  pmaddwd mm3, mm3  paddd mm5, mm0  paddd mm5, mm1  paddd mm5, mm2  paddd mm5, mm3%endmacroalign 16Skl_Sqr_Dev_16x16_SSE:    ; 237c  mov eax, [esp+ 4] ; Src  mov ecx, [esp+ 8] ; BpS  pxor mm5, mm5 ; accum for sqr  pxor mm6, mm6 ; accum for mean  pxor mm7, mm7 ; zero  SQR_DEV_16x16_SSE  SQR_DEV_16x16_SSE  SQR_DEV_16x16_SSE  SQR_DEV_16x16_SSE  SQR_DEV_16x16_SSE  SQR_DEV_16x16_SSE  SQR_DEV_16x16_SSE  SQR_DEV_16x16_SSE  SQR_DEV_16x16_SSE  SQR_DEV_16x16_SSE  SQR_DEV_16x16_SSE  SQR_DEV_16x16_SSE  SQR_DEV_16x16_SSE  SQR_DEV_16x16_SSE  SQR_DEV_16x16_SSE  SQR_DEV_16x16_SSE    ; we can't use a *signed* 'pmulhw mm6,mm6' here => pmaddw instead  psrlq   mm6, 8    pmaddwd mm6, mm6  movq    mm7, mm5  psrlq   mm5, 32  paddd   mm5, mm7  psrld   mm5,8      ; (Sqr)>>8    psubd mm5, mm6  movd eax,mm5  ret;//////////////////////////////////////////////////////////////////////;//;//  SSE2 impl;//;//////////////////////////////////////////////////////////////////////;//////////////////////////////////////////////////////////////////////; Skl_SAD_16x16_SSE2;//////////////////////////////////////////////////////////////////////%macro SAD_16x16_SSE2 0  movdqu  xmm0, [edx]  movdqu  xmm1, [edx+ecx]  lea edx,[edx+2*ecx]  movdqa  xmm2, [eax]  movdqa  xmm3, [eax+ecx]  lea eax,[eax+2*ecx]  psadbw  xmm0, xmm2  paddusw xmm6,xmm0  psadbw  xmm1, xmm3  paddusw xmm6,xmm1%endmacroalign 16Skl_SAD_16x16_SSE2:  mov eax, [esp+ 4] ; Src1 (assumed aligned)  mov edx, [esp+ 8] ; Src2  mov ecx, [esp+12] ; BpS  pxor xmm6, xmm6 ; accum  SAD_16x16_SSE2  SAD_16x16_SSE2  SAD_16x16_SSE2  SAD_16x16_SSE2  SAD_16x16_SSE2  SAD_16x16_SSE2  SAD_16x16_SSE2  SAD_16x16_SSE2  pshufd  xmm5, xmm6, 00000010b  paddusw xmm6, xmm5  pextrw  eax, xmm6, 0  retalign 16Skl_SAD_16x8_Field_SSE2:  mov eax, [esp+ 4] ; Src1 (assumed aligned)  mov edx, [esp+ 8] ; Src2  mov ecx, [esp+12] ; BpS  lea ecx, [ecx+ecx]  ; 2.BpS  pxor xmm6, xmm6 ; accum  SAD_16x16_SSE2  SAD_16x16_SSE2  SAD_16x16_SSE2  SAD_16x16_SSE2  pshufd  xmm5, xmm6, 00000010b  paddusw xmm6, xmm5  pextrw  eax, xmm6, 0  ret;//////////////////////////////////////////////////////////////////////; Skl_SAD_16x7_Self_SSE2;//////////////////////////////////////////////////////////////////////%macro SAD_16x7_SSE2 0  movdqa  xmm0, [eax]  psadbw  xmm0, [edx]  movdqa  xmm1, [eax+ecx]  lea eax, [eax+2*ecx]  psadbw  xmm1, [edx+ecx]  paddusw xmm6,xmm0  lea edx, [edx+2*ecx]  paddusw xmm6,xmm1%endmacroalign 16Skl_SAD_16x7_Self_SSE2:  mov eax, [esp+ 4] ; Src (assumed aligned)  mov ecx, [esp+ 8] ; BpS  lea edx, [eax+ecx]  pxor xmm7, xmm7 ; this is a NOP  pxor xmm6, xmm6 ; accum2  SAD_16x7_SSE2  SAD_16x7_SSE2  SAD_16x7_SSE2  movdqa  xmm0, [eax]  psadbw  xmm0, [edx]  paddusw xmm6,xmm0  pshufd  xmm5, xmm6, 00000010b  paddusw xmm6, xmm5  pextrw  eax, xmm6, 0  ret;//////////////////////////////////////////////////////////////////////; Skl_Mean_16x16_SSE2;//////////////////////////////////////////////////////////////////////%macro MEAN_16x16_SSE2 0  movdqu  xmm0, [eax]  movdqu  xmm1, [eax+ecx]  lea eax, [eax+2*ecx]    ; + 2*BpS  psadbw  xmm0, xmm7  paddusw xmm6, xmm0  psadbw  xmm1, xmm7  paddusw xmm6, xmm1%endmacroalign 16Skl_Mean_16x16_SSE2:  mov eax, [esp+ 4] ; Src  mov ecx, [esp+ 8] ; BpS  pxor xmm6, xmm6 ; accum  pxor xmm7, xmm7 ; zero  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  pshufd  xmm5, xmm6, 10b  paddusw xmm6, xmm5  pextrw  eax, xmm6, 0  shr eax, 8  ret;//////////////////////////////////////////////////////////////////////; Skl_Abs_Dev_16x16_SSE2;//////////////////////////////////////////////////////////////////////align 16Skl_Abs_Dev_16x16_SSE2:  mov eax, [esp+ 4] ; Src  mov ecx, [esp+ 8] ; BpS  pxor xmm6, xmm6 ; accum  pxor xmm7, xmm7 ; zero  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  mov eax, [esp+ 4]   ; Src  pshufd   xmm7, xmm6, 10b  paddusw  xmm7, xmm6  pxor     xmm6, xmm6     ; zero accum  psrlw    xmm7, 8        ; => Mean  pshuflw  xmm7, xmm7, 0  ; replicate Mean  packuswb xmm7, xmm7  pshufd   xmm7, xmm7, 00000000b  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  MEAN_16x16_SSE2  pshufd   xmm7, xmm6, 10b  paddusw  xmm7, xmm6  pextrw eax, xmm7, 0  ret;//////////////////////////////////////////////////////////////////////; Skl_Sqr_16x16_SSE2;//////////////////////////////////////////////////////////////////////%macro SQR_16x16_SSE2 0  movdqu xmm0, [eax]  movdqu xmm1, [eax+ecx]  lea eax,[eax+2*ecx]  movdqa xmm2, xmm0  movdqa xmm3, xmm1  punpcklbw xmm0, xmm6  punpcklbw xmm1, xmm6  punpckhbw xmm2, xmm6  punpckhbw xmm3, xmm6  pmaddwd xmm0, xmm0  pmaddwd xmm1, xmm1  pmaddwd xmm2, xmm2  pmaddwd xmm3, xmm3  paddd   xmm7, xmm0  paddd   xmm7, xmm1  paddd   xmm7, xmm2  paddd   xmm7, xmm3%endmacroalign 16Skl_Sqr_16x16_SSE2:   ; 287c  mov eax, [esp+ 4] ; Src  mov ecx, [esp+ 8] ; BpS  pxor xmm7, xmm7 ; accum  pxor xmm6, xmm6 ; zero  SQR_16x16_SSE2  SQR_16x16_SSE2  SQR_16x16_SSE2  SQR_16x16_SSE2  SQR_16x16_SSE2  SQR_16x16_SSE2  SQR_16x16_SSE2  SQR_16x16_SSE2  pshufd xmm6, xmm7, 1110b  paddd  xmm7, xmm6  pshufd xmm6, xmm7, 01b  paddd  xmm7, xmm6  movd   eax, xmm7  shr    eax, 8  ret;//////////////////////////////////////////////////////////////////////

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -