📄 mc-c.c.svn-base
字号:
MC_CH( mc_xy21, mmxext, 4, 8, 0 )MC_CH( mc_xy23, mmxext, 4, 8, i_src_stride )MC_CV( mc_xy12, mmxext, 4, 8, 0 )MC_CV( mc_xy32, mmxext, 4, 8, 1 )#if 0static void mc_xy10_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp[4*8]; mc_hh_w4( src, i_src_stride, tmp, 4, i_height ); pixel_avg_w4( dst, i_dst_stride, src, i_src_stride, tmp, 4, i_height );}static void mc_xy30_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp[4*8]; mc_hh_w4( src, i_src_stride, tmp, 4, i_height ); pixel_avg_w4( dst, i_dst_stride, src+1, i_src_stride, tmp, 4, i_height );}static void mc_xy01_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp[4*8]; mc_hv_w4( src, i_src_stride, tmp, 4, i_height ); pixel_avg_w4( dst, i_dst_stride, src, i_src_stride, tmp, 4, i_height );}static void mc_xy03_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp[4*8]; mc_hv_w4( src, i_src_stride, tmp, 4, i_height ); pixel_avg_w4( dst, i_dst_stride, src+i_src_stride, i_src_stride, tmp, 4, i_height );}static void mc_xy11_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp1[4*8]; uint8_t tmp2[4*8]; mc_hv_w4( src, i_src_stride, tmp1, 4, i_height ); mc_hh_w4( src, i_src_stride, tmp2, 4, i_height ); pixel_avg_w4( dst, i_dst_stride, tmp1, 4, tmp2, 4, i_height );}static void mc_xy31_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp1[4*8]; uint8_t tmp2[4*8]; mc_hv_w4( src+1, i_src_stride, tmp1, 4, i_height ); mc_hh_w4( src, i_src_stride, tmp2, 4, i_height ); pixel_avg_w4( dst, i_dst_stride, tmp1, 4, tmp2, 4, i_height );}static void mc_xy13_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp1[4*8]; uint8_t tmp2[4*8]; mc_hv_w4( src, i_src_stride, tmp1, 4, i_height ); mc_hh_w4( src+i_src_stride, i_src_stride, tmp2, 4, i_height ); pixel_avg_w4( dst, i_dst_stride, tmp1, 4, tmp2, 4, i_height );}static void mc_xy33_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp1[4*8]; uint8_t tmp2[4*8]; mc_hv_w4( src+1, i_src_stride, tmp1, 4, i_height ); mc_hh_w4( src+i_src_stride, i_src_stride, tmp2, 4, i_height ); pixel_avg_w4( dst, i_dst_stride, tmp1, 4, tmp2, 4, i_height );}static void mc_xy21_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp1[4*8]; uint8_t tmp2[4*8]; mc_hc_w4( src, i_src_stride, tmp1, 4, i_height ); mc_hh_w4( src, i_src_stride, tmp2, 4, i_height ); pixel_avg_w4( dst, i_dst_stride, tmp1, 4, tmp2, 4, i_height );}static void mc_xy23_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp1[4*8]; uint8_t tmp2[4*8]; mc_hc_w4( src, i_src_stride, tmp1, 4, i_height ); mc_hh_w4( src+i_src_stride, i_src_stride, tmp2, 4, i_height ); pixel_avg_w4( dst, i_dst_stride, tmp1, 4, tmp2, 4, i_height );}static void mc_xy12_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp1[4*8]; uint8_t tmp2[4*8]; mc_hc_w4( src, i_src_stride, tmp1, 4, i_height ); mc_hv_w4( src, i_src_stride, tmp2, 4, i_height ); pixel_avg_w4( dst, i_dst_stride, tmp1, 4, tmp2, 4, i_height );}static void mc_xy32_w4( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp1[4*8]; uint8_t tmp2[4*8]; mc_hc_w4( src, i_src_stride, tmp1, 4, i_height ); mc_hv_w4( src+1, i_src_stride, tmp2, 4, i_height ); pixel_avg_w4( dst, i_dst_stride, tmp1, 4, tmp2, 4, i_height );}#endif/***************************************************************************** * MC with width == 8 (height <= 16) *****************************************************************************/static inline void mc_hh_w8( uint8_t *src, int i_src, uint8_t *dst, int i_dst, int i_height ){ mc_hh_w4( &src[0], i_src, &dst[0], i_dst, i_height ); mc_hh_w4( &src[4], i_src, &dst[4], i_dst, i_height );}static inline void mc_hv_w8( uint8_t *src, int i_src, uint8_t *dst, int i_dst, int i_height ){ int y; src -= 2 * i_src; MMX_ZERO( %%mm7 ); MMX_INIT( %%mm6, x264_w0x10 ); for( y = 0; y < i_height; y++ ) { MMX_LOAD_2x8( %%mm0, %%mm5, %%mm1, %%mm2, %%mm7, &src[0*i_src], i_src ); MMX_FILTERTAP2_P1( %%mm0, %%mm1, %%mm5, %%mm2 ); MMX_LOAD_2x8( %%mm1, %%mm3, %%mm2, %%mm4, %%mm7, &src[2*i_src], i_src ); MMX_FILTERTAP2_P2( %%mm0, %%mm1, %%mm2, %%mm5, %%mm3, %%mm4 ); MMX_LOAD_2x8( %%mm1, %%mm3, %%mm2, %%mm4, %%mm7, &src[4*i_src], i_src ); MMX_FILTERTAP_P3( %%mm0, %%mm1, %%mm2, %%mm6, %%mm7 ); MMX_FILTERTAP_P3( %%mm5, %%mm3, %%mm4, %%mm6, %%mm7 ); MMX_SAVEPACK_8P( %%mm0, %%mm5, %%mm7, dst ); src += i_src; dst += i_dst; }}static inline void mc_hc_w8( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ int x, y; asm volatile( "pxor %%mm7, %%mm7\n" : : ); for( y = 0; y < i_height; y++ ) { int16_t tap[5+8]; /* first 8 */ asm volatile( "leal (%0, %1), %%eax\n" "movq (%0), %%mm0\n" /* load pix-2 */ "movq %%mm0, %%mm2\n" "punpcklbw %%mm7, %%mm0\n" "punpckhbw %%mm7, %%mm2\n" "movq (%%eax),%%mm1\n" /* load pix-1 */ "movq %%mm1, %%mm3\n" "punpcklbw %%mm7, %%mm1\n" "punpckhbw %%mm7, %%mm3\n" "psubw %%mm1, %%mm0\n" "psllw $2, %%mm1\n" "psubw %%mm1, %%mm0\n" "psubw %%mm3, %%mm2\n" "psllw $2, %%mm3\n" "psubw %%mm3, %%mm2\n" "movq (%%eax,%1),%%mm1\n" /* load pix */ "movq %%mm1, %%mm3\n" "punpcklbw %%mm7, %%mm1\n" "punpckhbw %%mm7, %%mm3\n" "psllw $2, %%mm1\n" "paddw %%mm1, %%mm0\n" "psllw $2, %%mm1\n" "paddw %%mm1, %%mm0\n" "psllw $2, %%mm3\n" "paddw %%mm3, %%mm2\n" "psllw $2, %%mm3\n" "paddw %%mm3, %%mm2\n" "movq (%%eax,%1,2),%%mm1\n" /* load pix+1 */ "movq %%mm1, %%mm3\n" "punpcklbw %%mm7, %%mm1\n" "punpckhbw %%mm7, %%mm3\n" "psllw $2, %%mm1\n" "paddw %%mm1, %%mm0\n" "psllw $2, %%mm1\n" "paddw %%mm1, %%mm0\n" "psllw $2, %%mm3\n" "paddw %%mm3, %%mm2\n" "psllw $2, %%mm3\n" "paddw %%mm3, %%mm2\n" "movq (%0,%1,4),%%mm1\n" /* load pix+2 */ "movq %%mm1, %%mm3\n" "punpcklbw %%mm7, %%mm1\n" "punpckhbw %%mm7, %%mm3\n" "psubw %%mm1, %%mm0\n" "psllw $2, %%mm1\n" "psubw %%mm1, %%mm0\n" "psubw %%mm3, %%mm2\n" "psllw $2, %%mm3\n" "psubw %%mm3, %%mm2\n" "movq (%%eax,%1,4),%%mm1\n" /* load pix+3 */ "movq %%mm1, %%mm3\n" "punpcklbw %%mm7, %%mm1\n" "punpckhbw %%mm7, %%mm3\n" "paddw %%mm1, %%mm0\n" "paddw %%mm3, %%mm2\n" "movq %%mm0, (%2)\n" "movq %%mm2, 8(%2)\n" "addl $8, %%eax\n" "addl $8, %0\n" "movd (%0), %%mm0\n" /* load pix-2 */ "punpcklbw %%mm7, %%mm0\n" "movd (%%eax),%%mm1\n" /* load pix-1 */ "punpcklbw %%mm7, %%mm1\n" "psubw %%mm1, %%mm0\n" "psllw $2, %%mm1\n" "psubw %%mm1, %%mm0\n" "movd (%%eax,%1),%%mm1\n" /* load pix */ "punpcklbw %%mm7, %%mm1\n" "psllw $2, %%mm1\n" "paddw %%mm1, %%mm0\n" "psllw $2, %%mm1\n" "paddw %%mm1, %%mm0\n" "movd (%%eax,%1,2),%%mm1\n" /* load pix+1 */ "punpcklbw %%mm7, %%mm1\n" "psllw $2, %%mm1\n" "paddw %%mm1, %%mm0\n" "psllw $2, %%mm1\n" "paddw %%mm1, %%mm0\n" "movd (%0,%1,4),%%mm1\n" /* load pix+2 */ "punpcklbw %%mm7, %%mm1\n" "psubw %%mm1, %%mm0\n" "psllw $2, %%mm1\n" "psubw %%mm1, %%mm0\n" "movd (%%eax,%1,4),%%mm1\n" /* load pix+3 */ "punpcklbw %%mm7, %%mm1\n" "paddw %%mm1, %%mm0\n" "movq %%mm0, 16(%2)\n" : : "r"(src-2*i_src_stride-2), "r"(i_src_stride), "r"(&tap[0]) : "%eax" ); /* last one */ tap[8+4] = x264_tapfilter( &src[-2+8+4], i_src_stride ); for( x = 0; x < 8; x++ ) { dst[x] = x264_mc_clip1( ( tap[0+x] - 5*tap[1+x] + 20 * tap[2+x] + 20 * tap[3+x] -5*tap[4+x] + tap[5+x] + 512 ) >> 10 ); } src += i_src_stride; dst += i_dst_stride; }}MC_IH( mc_xy10, mmxext, 8, 16, 0 )MC_IH( mc_xy30, mmxext, 8, 16, 1 )MC_IV( mc_xy01, mmxext, 8, 16, 0 )MC_IV( mc_xy03, mmxext, 8, 16, i_src_stride )MC_HV( mc_xy11, mmxext, 8, 16, 0, 0 )MC_HV( mc_xy31, mmxext, 8, 16, 1, 0 )MC_HV( mc_xy13, mmxext, 8, 16, 0, i_src_stride )MC_HV( mc_xy33, mmxext, 8, 16, 1, i_src_stride )MC_CH( mc_xy21, mmxext, 8, 16, 0 )MC_CH( mc_xy23, mmxext, 8, 16, i_src_stride )MC_CV( mc_xy12, mmxext, 8, 16, 0 )MC_CV( mc_xy32, mmxext, 8, 16, 1 )#if 0/* mc I+H */static void mc_xy10_w8( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp[8*16]; mc_hh_w8( src, i_src_stride, tmp, 8, i_height ); pixel_avg_w8( dst, i_dst_stride, src, i_src_stride, tmp, 8, i_height );}static void mc_xy30_w8( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp[8*16]; mc_hh_w8( src, i_src_stride, tmp, 8, i_height ); pixel_avg_w8( dst, i_dst_stride, src+1, i_src_stride, tmp, 8, i_height );}/* mc I+V */static void mc_xy01_w8( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp[8*16]; mc_hv_w8( src, i_src_stride, tmp, 8, i_height ); pixel_avg_w8( dst, i_dst_stride, src, i_src_stride, tmp, 8, i_height );}static void mc_xy03_w8( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp[8*16]; mc_hv_w8( src, i_src_stride, tmp, 8, i_height ); pixel_avg_w8( dst, i_dst_stride, src+i_src_stride, i_src_stride, tmp, 8, i_height );}/* H+V */static void mc_xy11_w8( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp1[8*16]; uint8_t tmp2[8*16]; mc_hv_w8( src, i_src_stride, tmp1, 8, i_height ); mc_hh_w8( src, i_src_stride, tmp2, 8, i_height ); pixel_avg_w8( dst, i_dst_stride, tmp1, 8, tmp2, 8, i_height );}static void mc_xy31_w8( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp1[8*16]; uint8_t tmp2[8*16]; mc_hv_w8( src+1, i_src_stride, tmp1, 8, i_height ); mc_hh_w8( src, i_src_stride, tmp2, 8, i_height ); pixel_avg_w8( dst, i_dst_stride, tmp1, 8, tmp2, 8, i_height );}static void mc_xy13_w8( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp1[8*16]; uint8_t tmp2[8*16]; mc_hv_w8( src, i_src_stride, tmp1, 8, i_height ); mc_hh_w8( src+i_src_stride, i_src_stride, tmp2, 8, i_height ); pixel_avg_w8( dst, i_dst_stride, tmp1, 8, tmp2, 8, i_height );}static void mc_xy33_w8( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp1[8*16]; uint8_t tmp2[8*16]; mc_hv_w8( src+1, i_src_stride, tmp1, 8, i_height ); mc_hh_w8( src+i_src_stride, i_src_stride, tmp2, 8, i_height ); pixel_avg_w8( dst, i_dst_stride, tmp1, 8, tmp2, 8, i_height );}static void mc_xy21_w8( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp1[8*16]; uint8_t tmp2[8*16]; mc_hc_w8( src, i_src_stride, tmp1, 8, i_height ); mc_hh_w8( src, i_src_stride, tmp2, 8, i_height ); pixel_avg_w8( dst, i_dst_stride, tmp1, 8, tmp2, 8, i_height );}static void mc_xy12_w8( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp1[8*16]; uint8_t tmp2[8*16]; mc_hc_w8( src, i_src_stride, tmp1, 8, i_height ); mc_hv_w8( src, i_src_stride, tmp2, 8, i_height ); pixel_avg_w8( dst, i_dst_stride, tmp1, 8, tmp2, 8, i_height );}static void mc_xy32_w8( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp1[8*16]; uint8_t tmp2[8*16]; mc_hc_w8( src, i_src_stride, tmp1, 8, i_height ); mc_hv_w8( src+1, i_src_stride, tmp2, 8, i_height );
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -