📄 mc-a.asm
字号:
add parm1q, parm2q add parm3q, parm4q dec r11d jnz .height_loop retALIGN 16;-----------------------------------------------------------------------------; int x264_pixel_avg_weight_w8_mmxext( uint8_t *, int, uint8_t *, int, int, int );-----------------------------------------------------------------------------x264_pixel_avg_weight_w8_mmxext: BIWEIGHT_START_MMX BIWEIGHT_4P_MMX [parm1q ], [parm3q ] BIWEIGHT_4P_MMX [parm1q+4 ], [parm3q+4 ] BIWEIGHT_4P_MMX [parm1q+parm2q ], [parm3q+parm4q ] BIWEIGHT_4P_MMX [parm1q+parm2q+4], [parm3q+parm4q+4] lea parm1q, [parm1q+parm2q*2] lea parm3q, [parm3q+parm4q*2] sub r11d, byte 2 jnz .height_loop retALIGN 16;-----------------------------------------------------------------------------; int x264_pixel_avg_weight_4x4_mmxext( uint8_t *, int, uint8_t *, int, int );-----------------------------------------------------------------------------x264_pixel_avg_weight_4x4_mmxext: BIWEIGHT_START_MMX BIWEIGHT_4P_MMX [parm1q ], [parm3q ] BIWEIGHT_4P_MMX [parm1q+parm2q ], [parm3q+parm4q ] BIWEIGHT_4P_MMX [parm1q+parm2q*2], [parm3q+parm4q*2] add parm1q, parm2q add parm3q, parm4q BIWEIGHT_4P_MMX [parm1q+parm2q*2], [parm3q+parm4q*2] ret;=============================================================================; pixel copy;=============================================================================ALIGN 16;-----------------------------------------------------------------------------; void x264_mc_copy_w4_mmxext( uint8_t *src, int i_src_stride,; uint8_t *dst, int i_dst_stride, int i_height );-----------------------------------------------------------------------------x264_mc_copy_w4_mmxext: mov eax, parm5d ; i_height ALIGN 4.height_loop mov r10d, [parm1q] mov r11d, [parm1q+parm2q] mov [parm3q], r10d mov [parm3q+parm4q], r11d lea parm1q, [parm1q+parm2q*2] lea parm3q, [parm3q+parm4q*2] dec eax dec eax jne .height_loop retALIGN 16;-----------------------------------------------------------------------------; void x264_mc_copy_w8_mmxext( uint8_t *src, int i_src_stride,; uint8_t *dst, int i_dst_stride, int i_height );-----------------------------------------------------------------------------x264_mc_copy_w8_mmxext: mov eax, parm5d ; i_height lea r10, [parm2q+parm2q*2] ; 3 * i_src_stride lea r11, [parm4q+parm4q*2] ; 3 * i_dst_strideALIGN 4.height_loop movq mm0, [parm1q] movq mm1, [parm1q+parm2q] movq mm2, [parm1q+parm2q*2] movq mm3, [parm1q+r10] movq [parm3q], mm0 movq [parm3q+parm4q], mm1 movq [parm3q+parm4q*2], mm2 movq [parm3q+r11], mm3 lea parm1q, [parm1q+parm2q*4] lea parm3q, [parm3q+parm4q*4] sub eax, byte 4 jnz .height_loop retALIGN 16;-----------------------------------------------------------------------------; void x264_mc_copy_w16_mmxext( uint8_t *src, int i_src_stride,; uint8_t *dst, int i_dst_stride, int i_height );-----------------------------------------------------------------------------x264_mc_copy_w16_mmxext: mov eax, parm5d ; i_height lea r10, [parm2q+parm2q*2] ; 3 * i_src_stride lea r11, [parm4q+parm4q*2] ; 3 * i_dst_strideALIGN 4.height_loop movq mm0, [parm1q] movq mm1, [parm1q+8] movq mm2, [parm1q+parm2q] movq mm3, [parm1q+parm2q+8] movq mm4, [parm1q+parm2q*2] movq mm5, [parm1q+parm2q*2+8] movq mm6, [parm1q+r10] movq mm7, [parm1q+r10+8] movq [parm3q], mm0 movq [parm3q+8], mm1 movq [parm3q+parm4q], mm2 movq [parm3q+parm4q+8], mm3 movq [parm3q+parm4q*2], mm4 movq [parm3q+parm4q*2+8], mm5 movq [parm3q+r11], mm6 movq [parm3q+r11+8], mm7 lea parm1q, [parm1q+parm2q*4] lea parm3q, [parm3q+parm4q*4] sub eax, byte 4 jnz .height_loop retALIGN 16;-----------------------------------------------------------------------------; void x264_mc_copy_w16_sse2( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height );-----------------------------------------------------------------------------x264_mc_copy_w16_sse2: mov eax, parm5d ; i_heightALIGN 4.height_loop movdqu xmm0, [parm1q] movdqu xmm1, [parm1q+parm2q] movdqu [parm3q], xmm0 movdqu [parm3q+parm4q], xmm1 dec eax dec eax lea parm1q, [parm1q+parm2q*2] lea parm3q, [parm3q+parm4q*2] jnz .height_loop ret;=============================================================================; chroma MC;=============================================================================ALIGN 16;-----------------------------------------------------------------------------; void x264_mc_chroma_sse( uint8_t *src, int i_src_stride,; uint8_t *dst, int i_dst_stride,; int dx, int dy,; int i_height, int i_width );-----------------------------------------------------------------------------x264_mc_chroma_sse: movd mm0, parm5d movd mm1, parm6d pxor mm3, mm3 pshufw mm5, mm0, 0 ; mm5 - dx pshufw mm6, mm1, 0 ; mm6 - dy movq mm4, [pw_8 GLOBAL] movq mm0, mm4 psubw mm4, mm5 ; mm4 - 8-dx psubw mm0, mm6 ; mm0 - 8-dy movq mm7, mm5 pmullw mm5, mm0 ; mm5 = dx*(8-dy) = cB pmullw mm7, mm6 ; mm7 = dx*dy = cD pmullw mm6, mm4 ; mm6 = (8-dx)*dy = cC pmullw mm4, mm0 ; mm4 = (8-dx)*(8-dy) = cA mov rax, parm1q mov r10, parm3q mov r11d, parm7dALIGN 4.height_loop movd mm1, [rax+parm2q] movd mm0, [rax] punpcklbw mm1, mm3 ; 00 px1 | 00 px2 | 00 px3 | 00 px4 punpcklbw mm0, mm3 pmullw mm1, mm6 ; 2nd line * cC pmullw mm0, mm4 ; 1st line * cA paddw mm0, mm1 ; mm0 <- result movd mm2, [rax+1] movd mm1, [rax+parm2q+1] punpcklbw mm2, mm3 punpcklbw mm1, mm3 paddw mm0, [pw_32 GLOBAL] pmullw mm2, mm5 ; line * cB pmullw mm1, mm7 ; line * cD paddw mm0, mm2 paddw mm0, mm1 psrlw mm0, 6 packuswb mm0, mm3 ; 00 00 00 00 px1 px2 px3 px4 movd [r10], mm0 add rax, parm2q add r10, parm4q ; i_dst_stride dec r11d jnz .height_loop mov eax, parm8d ; i_width sub eax, 8 jnz .finish ; width != 8 so assume 4 mov parm8d, eax ; i_width mov r10, parm3q ; dst mov rax, parm1q ; src mov r11d, parm7d ; i_height add r10, 4 add rax, 4 jmp .height_loop.finish ret
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -