⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 skl_mb_sse.asm

📁 mpeg4编解码器
💻 ASM
📖 第 1 页 / 共 2 页
字号:
%endif  movq [ecx+edx+%2], mm0%endmacro;//////////////////////////////////////////////////////////////////////align 16Skl_Add_8x4_HH_Rnd0_SSE:  PROLOG0  movq mm7, [Mask]  HH_SETUP mm2, mm3, 0  COPY_ADD_HH_RND0 1,0, mm2,mm3  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  COPY_ADD_HH_RND0 1,0, mm2,mm3  retalign 16Skl_Copy_8x4_HH_Rnd0_SSE:  PROLOG0  movq mm7, [Mask]  HH_SETUP mm2, mm3, 0  COPY_ADD_HH_RND0 0,0,  mm2,mm3  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  COPY_ADD_HH_RND0 0,0,  mm2,mm3  retalign 16Skl_Add_8x8_HH_Rnd0_SSE:  PROLOG0  movq mm7, [Mask]  HH_SETUP mm2, mm3, 0  COPY_ADD_HH_RND0 1,0,  mm2,mm3  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  COPY_ADD_HH_RND0 1,0,  mm2,mm3  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  COPY_ADD_HH_RND0 1,0,  mm2,mm3  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  COPY_ADD_HH_RND0 1,0,  mm2,mm3  retalign 16Skl_Copy_8x8_HH_Rnd0_SSE:  PROLOG0  movq mm7, [Mask]  HH_SETUP mm2, mm3, 0    ; 55c  nop  COPY_ADD_HH_RND0 0,0,  mm2,mm3  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  COPY_ADD_HH_RND0 0,0,  mm2,mm3  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  COPY_ADD_HH_RND0 0,0,  mm2,mm3  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  COPY_ADD_HH_RND0 0,0,  mm2,mm3  retalign 16Skl_Add_16x8_HH_Rnd0_SSE:  PROLOG0  movq mm7, [Mask]  HH_SETUP mm2, mm3, 0  HH_SETUP mm4, mm5, 8  COPY_ADD_HH_RND0 1,0,  mm2,mm3  COPY_ADD_HH_RND0 1,8,  mm4,mm5  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  COPY_ADD_HH_RND0 1,0,  mm2,mm3  COPY_ADD_HH_RND0 1,8,  mm4,mm5  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  COPY_ADD_HH_RND0 1,0,  mm2,mm3  COPY_ADD_HH_RND0 1,8,  mm4,mm5  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  COPY_ADD_HH_RND0 1,0,  mm2,mm3  COPY_ADD_HH_RND0 1,8,  mm4,mm5  retalign 16Skl_Copy_16x8_HH_Rnd0_SSE:  PROLOG0  movq mm7, [Mask]  HH_SETUP mm2, mm3, 0  HH_SETUP mm4, mm5, 8    ; 103c  nop  COPY_ADD_HH_RND0 0,0,  mm2,mm3  COPY_ADD_HH_RND0 0,8,  mm4,mm5  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  COPY_ADD_HH_RND0 0,0,  mm2,mm3  COPY_ADD_HH_RND0 0,8,  mm4,mm5  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  nop  COPY_ADD_HH_RND0 0,0,  mm2,mm3  COPY_ADD_HH_RND0 0,8,  mm4,mm5  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  COPY_ADD_HH_RND0 0,0,  mm2,mm3  COPY_ADD_HH_RND0 0,8,  mm4,mm5  retalign 16Skl_HV_Pass_2Taps_SSE:Skl_Copy_16x16_HH_Rnd0_SSE:  PROLOG0  movq mm7, [Mask]  HH_SETUP mm2, mm3, 0  HH_SETUP mm4, mm5, 8  nop  COPY_ADD_HH_RND0 0,0,  mm2,mm3  COPY_ADD_HH_RND0 0,8,  mm4,mm5  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  COPY_ADD_HH_RND0 0,0,  mm2,mm3  COPY_ADD_HH_RND0 0,8,  mm4,mm5  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  nop  COPY_ADD_HH_RND0 0,0,  mm2,mm3  COPY_ADD_HH_RND0 0,8,  mm4,mm5  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  COPY_ADD_HH_RND0 0,0,  mm2,mm3  COPY_ADD_HH_RND0 0,8,  mm4,mm5  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  nop  COPY_ADD_HH_RND0 0,0,  mm2,mm3  COPY_ADD_HH_RND0 0,8,  mm4,mm5  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  COPY_ADD_HH_RND0 0,0,  mm2,mm3  COPY_ADD_HH_RND0 0,8,  mm4,mm5  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  nop  COPY_ADD_HH_RND0 0,0,  mm2,mm3  COPY_ADD_HH_RND0 0,8,  mm4,mm5  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  COPY_ADD_HH_RND0 0,0,  mm2,mm3  COPY_ADD_HH_RND0 0,8,  mm4,mm5  ret;//////////////////////////////////////////////////////////////////////align 16Skl_Copy_8x4_HH_Rnd1_SSE:  PROLOG0  movq mm7, [Mask]  HH_SETUP mm2, mm3, 0  COPY_ADD_HH_RND1 0,0, mm2,mm3  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  COPY_ADD_HH_RND1 0,0, mm2,mm3  retalign 16Skl_Copy_8x8_HH_Rnd1_SSE:  PROLOG0  movq mm7, [Mask]  HH_SETUP mm2, mm3, 0    ; 53c  COPY_ADD_HH_RND1 0,0, mm2,mm3  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  COPY_ADD_HH_RND1 0,0, mm2,mm3  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  COPY_ADD_HH_RND1 0,0, mm2,mm3  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  COPY_ADD_HH_RND1 0,0, mm2,mm3  retalign 16Skl_Copy_16x8_HH_Rnd1_SSE:  PROLOG0  movq mm7, [Mask]  HH_SETUP mm2, mm3, 0  HH_SETUP mm4, mm5, 8    ; 103c  COPY_ADD_HH_RND1 0,0, mm2,mm3  COPY_ADD_HH_RND1 0,8, mm4,mm5  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  COPY_ADD_HH_RND1 0,0, mm2,mm3  COPY_ADD_HH_RND1 0,8, mm4,mm5  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  COPY_ADD_HH_RND1 0,0, mm2,mm3  COPY_ADD_HH_RND1 0,8, mm4,mm5  lea eax, [eax+2*edx]  lea ecx, [ecx+2*edx]  COPY_ADD_HH_RND1 0,0, mm2,mm3  COPY_ADD_HH_RND1 0,8, mm4,mm5  ret;//////////////////////////////////////////////////////////////////////;//;// 8x8 -> 16x16 upsampling (16b);//;//////////////////////////////////////////////////////////////////////%macro MUL_PACK 4     ; %1/%2: regs   %3/%4/%5: Up13/Up31  pmullw %1,  %3 ; [Up13]  pmullw mm4, %4 ; [Up31]  pmullw %2,  %3 ; [Up13]  pmullw mm5, %4 ; [Up31]  paddsw %1, [Cst2]  paddsw %2, [Cst2]  paddsw %1, mm4  paddsw %2, mm5%endmacro%macro COL03 3    ;%1/%2: regs, %3: row   -trashes mm4/mm5  movq %2, [edx+%3*16+0*2]               ; <- 0|1|2|3  pshufw %1,  %2,  (0+0*4+0*16+1*64)     ; %1 = 0|0|0|1  pshufw mm4, %2,  (0+1*4+1*16+2*64)     ; mm4= 0|1|1|2  pshufw %2,  %2,  (1+2*4+2*16+3*64)     ; %2 = 1|2|2|3  pshufw mm5, [edx+%3*16+2*2],  (0+1*4+1*16+2*64) ; mm5 = 2|3|3|4%endmacro%macro COL47 3    ;%1-%2: regs, %3: row   -trashes mm4/mm5  pshufw %1, [edx+%3*16+2*2],  (1+2*4+2*16+3*64) ; 3|4|4|5  movq mm5, [edx+%3*16+2*4]                      ; <- 4|5|6|7  pshufw mm4, mm5,  (0+1*4+1*16+2*64)            ; 4|5|5|6  pshufw %2,  mm5,  (1+2*4+2*16+3*64)            ; 5|6|6|7  pshufw mm5, mm5,  (2+3*4+3*16+3*64)            ; 6|7|7|7%endmacro%macro MIX_ROWS 4   ; %1/%2:prev %3/4:cur (preserved)  mm4/mm5: output  ; we need to perform: (%1,%3) -> (%1 = 3*%1+%3, mm4 = 3*%3+%1), %3 preserved.  movq mm4, [Cst3]  movq mm5, [Cst3]  pmullw mm4, %3  pmullw mm5, %4  paddsw mm4, %1  paddsw mm5, %2  pmullw %1, [Cst3]  pmullw %2, [Cst3]  paddsw %1, %3  paddsw %2, %4%endmacro;//////////////////////////////////////////////////////////////////////  ; Note: we can use ">>2" instead of "/4" here, since we   ; are (supposed to be) averaging positive values%macro STORE_1 2  psraw %1, 2  psraw %2, 2  packuswb %1,%2     movq [ecx], %1%endmacro%macro STORE_2 2    ; pack and store (%1,%2) + (mm4,mm5)  psraw %1, 4  psraw %2, 4  psraw mm4, 4  psraw mm5, 4  packuswb %1,%2  packuswb mm4, mm5  movq [ecx], %1  movq [ecx+eax], mm4  lea ecx, [ecx+2*eax]%endmacroalign 16Skl_Copy_Upsampled_8x8_16To8_SSE:  ; 315c  mov ecx, [esp+4]  ; Dst  mov edx, [esp+8]  ; Src  mov eax, [esp+12] ; BpS  movq mm6, [Up13]  movq mm7, [Up31]  COL03 mm0, mm1, 0  MUL_PACK mm0,mm1, mm6, mm7  movq mm4, mm0  movq mm5, mm1  STORE_1 mm4, mm5  add ecx, eax  COL03 mm2, mm3, 1  MUL_PACK mm2,mm3, mm6, mm7  MIX_ROWS mm0, mm1, mm2, mm3  STORE_2 mm0, mm1  COL03 mm0, mm1, 2  MUL_PACK mm0,mm1, mm6, mm7  MIX_ROWS mm2, mm3, mm0, mm1  STORE_2 mm2, mm3  COL03 mm2, mm3, 3  MUL_PACK mm2,mm3, mm6, mm7  MIX_ROWS mm0, mm1, mm2, mm3  STORE_2 mm0, mm1  COL03 mm0, mm1, 4  MUL_PACK mm0,mm1, mm6, mm7  MIX_ROWS mm2, mm3, mm0, mm1  STORE_2 mm2, mm3  COL03 mm2, mm3, 5  MUL_PACK mm2,mm3, mm6, mm7  MIX_ROWS mm0, mm1, mm2, mm3  STORE_2 mm0, mm1  COL03 mm0, mm1, 6  MUL_PACK mm0,mm1, mm6, mm7  MIX_ROWS mm2, mm3, mm0, mm1  STORE_2 mm2, mm3  COL03 mm2, mm3, 7  MUL_PACK mm2,mm3, mm6, mm7  MIX_ROWS mm0, mm1, mm2, mm3  STORE_2 mm0, mm1  STORE_1 mm2, mm3  mov ecx, [esp+4]  add ecx, 8  COL47 mm0, mm1, 0  MUL_PACK mm0,mm1, mm6, mm7  movq mm4, mm0  movq mm5, mm1  STORE_1 mm4, mm5  add ecx, eax  COL47 mm2, mm3, 1  MUL_PACK mm2,mm3, mm6, mm7  MIX_ROWS mm0, mm1, mm2, mm3  STORE_2 mm0, mm1  COL47 mm0, mm1, 2  MUL_PACK mm0,mm1, mm6, mm7  MIX_ROWS mm2, mm3, mm0, mm1  STORE_2 mm2, mm3  COL47 mm2, mm3, 3  MUL_PACK mm2,mm3, mm6, mm7  MIX_ROWS mm0, mm1, mm2, mm3  STORE_2 mm0, mm1  COL47 mm0, mm1, 4  MUL_PACK mm0,mm1, mm6, mm7  MIX_ROWS mm2, mm3, mm0, mm1  STORE_2 mm2, mm3  COL47 mm2, mm3, 5  MUL_PACK mm2,mm3, mm6, mm7  MIX_ROWS mm0, mm1, mm2, mm3  STORE_2 mm0, mm1  COL47 mm0, mm1, 6  MUL_PACK mm0,mm1, mm6, mm7  MIX_ROWS mm2, mm3, mm0, mm1  STORE_2 mm2, mm3  COL47 mm2, mm3, 7  MUL_PACK mm2,mm3, mm6, mm7  MIX_ROWS mm0, mm1, mm2, mm3  STORE_2 mm0, mm1  STORE_1 mm2, mm3  ret;//////////////////////////////////////////////////////////////////////    ; Note: grrr... the 'pcmpgtw' stuff are the "/4" and "/16" operators    ; implemented with ">>2" and ">>4" using:     ;       x/4  = ( (x-(x<0))>>2 ) + (x<0)    ;       x/16 = ( (x-(x<0))>>4 ) + (x<0)%macro STORE_ADD_1 2    ; We substract the rounder '2' for corner pixels,    ; since when 'x' is negative, (x*4 + 2)/4 is *not*    ; equal to 'x'. In fact, the correct relation is:    ;         (x*4 + 2)/4 = x - (x<0)    ; So, better revert to (x*4)/4 = x.  psubsw %1, [Cst2000]  psubsw %2, [Cst0002]  pxor mm6, mm6  pxor mm7, mm7  pcmpgtw mm6, %1  pcmpgtw mm7, %2  paddsw %1, mm6  paddsw %2, mm7  psraw %1, 2  psraw %2, 2  psubsw %1, mm6  psubsw %2, mm7    ; mix with destination [ecx]  movq mm6, [ecx]  movq mm7, [ecx]  punpcklbw mm6, [Cst0]  punpckhbw mm7, [Cst0]  paddsw %1, mm6  paddsw %2, mm7  packuswb %1,%2  movq [ecx], %1%endmacro%macro STORE_ADD_2 2  pxor mm6, mm6  pxor mm7, mm7  pcmpgtw mm6, %1  pcmpgtw mm7, %2  paddsw %1, mm6  paddsw %2, mm7  psraw %1, 4  psraw %2, 4  psubsw %1, mm6  psubsw %2, mm7  pxor mm6, mm6  pxor mm7, mm7  pcmpgtw mm6, mm4  pcmpgtw mm7, mm5  paddsw mm4, mm6  paddsw mm5, mm7  psraw mm4, 4  psraw mm5, 4  psubsw mm4, mm6  psubsw mm5, mm7    ; mix with destination  movq mm6, [ecx]  movq mm7, [ecx]  punpcklbw mm6, [Cst0]  punpckhbw mm7, [Cst0]  paddsw %1, mm6  paddsw %2, mm7  movq mm6, [ecx+eax]  movq mm7, [ecx+eax]  punpcklbw mm6, [Cst0]  punpckhbw mm7, [Cst0]  paddsw mm4, mm6  paddsw mm5, mm7  packuswb %1,%2  packuswb mm4, mm5  movq [ecx], %1  movq [ecx+eax], mm4  lea ecx, [ecx+2*eax]%endmacroalign 16Skl_Add_Upsampled_8x8_16To8_SSE:  ; 549c  mov ecx, [esp+4]  ; Dst  mov edx, [esp+8]  ; Src  mov eax, [esp+12] ; BpS  COL03 mm0, mm1, 0  MUL_PACK mm0,mm1, [Up13], [Up31]    movq mm4, mm0  movq mm5, mm1  STORE_ADD_1 mm4, mm5  add ecx, eax  COL03 mm2, mm3, 1  MUL_PACK mm2,mm3, [Up13], [Up31]  MIX_ROWS mm0, mm1, mm2, mm3  STORE_ADD_2 mm0, mm1  COL03 mm0, mm1, 2  MUL_PACK mm0,mm1, [Up13], [Up31]  MIX_ROWS mm2, mm3, mm0, mm1  STORE_ADD_2 mm2, mm3  COL03 mm2, mm3, 3  MUL_PACK mm2,mm3, [Up13], [Up31]  MIX_ROWS mm0, mm1, mm2, mm3  STORE_ADD_2 mm0, mm1  COL03 mm0, mm1, 4  MUL_PACK mm0,mm1, [Up13], [Up31]  MIX_ROWS mm2, mm3, mm0, mm1  STORE_ADD_2 mm2, mm3  COL03 mm2, mm3, 5  MUL_PACK mm2,mm3, [Up13], [Up31]  MIX_ROWS mm0, mm1, mm2, mm3  STORE_ADD_2 mm0, mm1  COL03 mm0, mm1, 6  MUL_PACK mm0,mm1, [Up13], [Up31]  MIX_ROWS mm2, mm3, mm0, mm1  STORE_ADD_2 mm2, mm3  COL03 mm2, mm3, 7  MUL_PACK mm2,mm3, [Up13], [Up31]  MIX_ROWS mm0, mm1, mm2, mm3  STORE_ADD_2 mm0, mm1  STORE_ADD_1 mm2, mm3  mov ecx, [esp+4]  add ecx, 8  COL47 mm0, mm1, 0  MUL_PACK mm0,mm1, [Up13], [Up31]    movq mm4, mm0  movq mm5, mm1    STORE_ADD_1 mm4, mm5  add ecx, eax  COL47 mm2, mm3, 1  MUL_PACK mm2,mm3, [Up13], [Up31]  MIX_ROWS mm0, mm1, mm2, mm3  STORE_ADD_2 mm0, mm1  COL47 mm0, mm1, 2  MUL_PACK mm0,mm1, [Up13], [Up31]  MIX_ROWS mm2, mm3, mm0, mm1  STORE_ADD_2 mm2, mm3  COL47 mm2, mm3, 3  MUL_PACK mm2,mm3, [Up13], [Up31]  MIX_ROWS mm0, mm1, mm2, mm3  STORE_ADD_2 mm0, mm1  COL47 mm0, mm1, 4  MUL_PACK mm0,mm1, [Up13], [Up31]  MIX_ROWS mm2, mm3, mm0, mm1  STORE_ADD_2 mm2, mm3  COL47 mm2, mm3, 5  MUL_PACK mm2,mm3, [Up13], [Up31]  MIX_ROWS mm0, mm1, mm2, mm3  STORE_ADD_2 mm0, mm1  COL47 mm0, mm1, 6  MUL_PACK mm0,mm1, [Up13], [Up31]  MIX_ROWS mm2, mm3, mm0, mm1  STORE_ADD_2 mm2, mm3  COL47 mm2, mm3, 7  MUL_PACK mm2,mm3, [Up13], [Up31]  MIX_ROWS mm0, mm1, mm2, mm3  STORE_ADD_2 mm0, mm1  STORE_ADD_1 mm2, mm3  ret;//////////////////////////////////////////////////////////////////////  ; pfeewwww... Never Do That On Stage Again. :)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -