📄 mc.c
字号:
"psubw %%mm3, %%mm2\n" "psllw $2, %%mm3\n" "psubw %%mm3, %%mm2\n" "movq (%%eax,%1),%%mm1\n" /* load pix */ "movq %%mm1, %%mm3\n" "punpcklbw %%mm7, %%mm1\n" "punpckhbw %%mm7, %%mm3\n" "psllw $2, %%mm1\n" "paddw %%mm1, %%mm0\n" "psllw $2, %%mm1\n" "paddw %%mm1, %%mm0\n" "psllw $2, %%mm3\n" "paddw %%mm3, %%mm2\n" "psllw $2, %%mm3\n" "paddw %%mm3, %%mm2\n" "movq (%%eax,%1,2),%%mm1\n" /* load pix+1 */ "movq %%mm1, %%mm3\n" "punpcklbw %%mm7, %%mm1\n" "punpckhbw %%mm7, %%mm3\n" "psllw $2, %%mm1\n" "paddw %%mm1, %%mm0\n" "psllw $2, %%mm1\n" "paddw %%mm1, %%mm0\n" "psllw $2, %%mm3\n" "paddw %%mm3, %%mm2\n" "psllw $2, %%mm3\n" "paddw %%mm3, %%mm2\n" "movq (%0,%1,4),%%mm1\n" /* load pix+2 */ "movq %%mm1, %%mm3\n" "punpcklbw %%mm7, %%mm1\n" "punpckhbw %%mm7, %%mm3\n" "psubw %%mm1, %%mm0\n" "psllw $2, %%mm1\n" "psubw %%mm1, %%mm0\n" "psubw %%mm3, %%mm2\n" "psllw $2, %%mm3\n" "psubw %%mm3, %%mm2\n" "movq (%%eax,%1,4),%%mm1\n" /* load pix+3 */ "movq %%mm1, %%mm3\n" "punpcklbw %%mm7, %%mm1\n" "punpckhbw %%mm7, %%mm3\n" "paddw %%mm1, %%mm0\n" "paddw %%mm3, %%mm2\n" "movq %%mm0, (%2)\n" "movq %%mm2, 8(%2)\n" "addl $8, %%eax\n" "addl $8, %0\n" "movq (%0), %%mm0\n" /* load pix-2 */ "movq %%mm0, %%mm2\n" "punpcklbw %%mm7, %%mm0\n" "punpckhbw %%mm7, %%mm2\n" "movq (%%eax),%%mm1\n" /* load pix-1 */ "movq %%mm1, %%mm3\n" "punpcklbw %%mm7, %%mm1\n" "punpckhbw %%mm7, %%mm3\n" "psubw %%mm1, %%mm0\n" "psllw $2, %%mm1\n" "psubw %%mm1, %%mm0\n" "psubw %%mm3, %%mm2\n" "psllw $2, %%mm3\n" "psubw %%mm3, %%mm2\n" "movq (%%eax,%1),%%mm1\n" /* load pix */ "movq %%mm1, %%mm3\n" "punpcklbw %%mm7, %%mm1\n" "punpckhbw %%mm7, %%mm3\n" "psllw $2, %%mm1\n" "paddw %%mm1, %%mm0\n" "psllw $2, %%mm1\n" "paddw %%mm1, %%mm0\n" "psllw $2, %%mm3\n" "paddw %%mm3, %%mm2\n" "psllw $2, %%mm3\n" "paddw %%mm3, %%mm2\n" "movq (%%eax,%1,2),%%mm1\n" /* load pix+1 */ "movq %%mm1, %%mm3\n" "punpcklbw %%mm7, %%mm1\n" "punpckhbw %%mm7, %%mm3\n" "psllw $2, %%mm1\n" "paddw %%mm1, %%mm0\n" "psllw $2, %%mm1\n" "paddw %%mm1, %%mm0\n" "psllw $2, %%mm3\n" "paddw %%mm3, %%mm2\n" "psllw $2, %%mm3\n" "paddw %%mm3, %%mm2\n" "movq (%0,%1,4),%%mm1\n" /* load pix+2 */ "movq %%mm1, %%mm3\n" "punpcklbw %%mm7, %%mm1\n" "punpckhbw %%mm7, %%mm3\n" "psubw %%mm1, %%mm0\n" "psllw $2, %%mm1\n" "psubw %%mm1, %%mm0\n" "psubw %%mm3, %%mm2\n" "psllw $2, %%mm3\n" "psubw %%mm3, %%mm2\n" "movq (%%eax,%1,4),%%mm1\n" /* load pix+3 */ "movq %%mm1, %%mm3\n" "punpcklbw %%mm7, %%mm1\n" "punpckhbw %%mm7, %%mm3\n" "paddw %%mm1, %%mm0\n" "paddw %%mm3, %%mm2\n" "movq %%mm0, 16(%2)\n" "movq %%mm2, 24(%2)\n" "addl $8, %%eax\n" "addl $8, %0\n" "movd (%0), %%mm0\n" /* load pix-2 */ "punpcklbw %%mm7, %%mm0\n" "movd (%%eax),%%mm1\n" /* load pix-1 */ "punpcklbw %%mm7, %%mm1\n" "psubw %%mm1, %%mm0\n" "psllw $2, %%mm1\n" "psubw %%mm1, %%mm0\n" "movd (%%eax,%1),%%mm1\n" /* load pix */ "punpcklbw %%mm7, %%mm1\n" "psllw $2, %%mm1\n" "paddw %%mm1, %%mm0\n" "psllw $2, %%mm1\n" "paddw %%mm1, %%mm0\n" "movd (%%eax,%1,2),%%mm1\n" /* load pix+1 */ "punpcklbw %%mm7, %%mm1\n" "psllw $2, %%mm1\n" "paddw %%mm1, %%mm0\n" "psllw $2, %%mm1\n" "paddw %%mm1, %%mm0\n" "movd (%0,%1,4),%%mm1\n" /* load pix+2 */ "punpcklbw %%mm7, %%mm1\n" "psubw %%mm1, %%mm0\n" "psllw $2, %%mm1\n" "psubw %%mm1, %%mm0\n" "movd (%%eax,%1,4),%%mm1\n" /* load pix+3 */ "punpcklbw %%mm7, %%mm1\n" "paddw %%mm1, %%mm0\n" "movq %%mm0, 32(%2)\n" : : "r"(src-2*i_src_stride-2), "r"(i_src_stride), "r"(&tap[0]) : "%eax" ); /* last one */ tap[16+4] = x264_tapfilter( &src[-2+16+4], i_src_stride ); for( x = 0; x < 16; x++ ) { dst[x] = x264_mc_clip1( ( tap[0+x] - 5*tap[1+x] + 20 * tap[2+x] + 20 * tap[3+x] -5*tap[4+x] + tap[5+x] + 512 ) >> 10 ); } src += i_src_stride; dst += i_dst_stride; }#if 0#if 0 int i, x, y; for( y = 0; y < i_height; y++ ) { int16_t tap[5+16]; for( i = 0; i < 5+16; i++ ) { tap[i] = x264_tapfilter( &src[-2+i], i_src_stride ); } for( x = 0; x < 16; x++ ) { dst[x] = x264_mc_clip1( ( tap[0+x] - 5*tap[1+x] + 20 * tap[2+x] + 20 * tap[3+x] -5*tap[4+x] + tap[5+x] + 512 ) >> 10 ); } src += i_src_stride; dst += i_dst_stride; }#else mc_hc_w8( src, i_src_stride, dst, i_dst_stride, i_height ); mc_hc_w8( &src[8], i_src_stride, &dst[8], i_dst_stride, i_height );#endif#endif}/* mc I+H */static void mc_xy10_w16( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp[16*16]; mc_hh_w16( src, i_src_stride, tmp, 16, i_height ); pixel_avg_w16( dst, i_dst_stride, src, i_src_stride, tmp, 16, i_height );}static void mc_xy30_w16( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp[16*16]; mc_hh_w16( src, i_src_stride, tmp, 16, i_height ); pixel_avg_w16( dst, i_dst_stride, src+1, i_src_stride, tmp, 16, i_height );}/* mc I+V */static void mc_xy01_w16( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp[16*16]; mc_hv_w16( src, i_src_stride, tmp, 16, i_height ); pixel_avg_w16( dst, i_dst_stride, src, i_src_stride, tmp, 16, i_height );}static void mc_xy03_w16( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp[16*16]; mc_hv_w16( src, i_src_stride, tmp, 16, i_height ); pixel_avg_w16( dst, i_dst_stride, src+i_src_stride, i_src_stride, tmp, 16, i_height );}/* H+V */static void mc_xy11_w16( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp1[16*16]; uint8_t tmp2[16*16]; mc_hv_w16( src, i_src_stride, tmp1, 16, i_height ); mc_hh_w16( src, i_src_stride, tmp2, 16, i_height ); pixel_avg_w16( dst, i_dst_stride, tmp1, 16, tmp2, 16, i_height );}static void mc_xy31_w16( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp1[16*16]; uint8_t tmp2[16*16]; mc_hv_w16( src+1, i_src_stride, tmp1, 16, i_height ); mc_hh_w16( src, i_src_stride, tmp2, 16, i_height ); pixel_avg_w16( dst, i_dst_stride, tmp1, 16, tmp2, 16, i_height );}static void mc_xy13_w16( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp1[16*16]; uint8_t tmp2[16*16]; mc_hv_w16( src, i_src_stride, tmp1, 16, i_height ); mc_hh_w16( src+i_src_stride, i_src_stride, tmp2, 16, i_height ); pixel_avg_w16( dst, i_dst_stride, tmp1, 16, tmp2, 16, i_height );}static void mc_xy33_w16( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp1[16*16]; uint8_t tmp2[16*16]; mc_hv_w16( src+1, i_src_stride, tmp1, 16, i_height ); mc_hh_w16( src+i_src_stride, i_src_stride, tmp2, 16, i_height ); pixel_avg_w16( dst, i_dst_stride, tmp1, 16, tmp2, 16, i_height );}static void mc_xy21_w16( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp1[16*16]; uint8_t tmp2[16*16]; mc_hc_w16( src, i_src_stride, tmp1, 16, i_height ); mc_hh_w16( src, i_src_stride, tmp2, 16, i_height ); pixel_avg_w16( dst, i_dst_stride, tmp1, 16, tmp2, 16, i_height );}static void mc_xy12_w16( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp1[16*16]; uint8_t tmp2[16*16]; mc_hc_w16( src, i_src_stride, tmp1, 16, i_height ); mc_hv_w16( src, i_src_stride, tmp2, 16, i_height ); pixel_avg_w16( dst, i_dst_stride, tmp1, 16, tmp2, 16, i_height );}static void mc_xy32_w16( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp1[16*16]; uint8_t tmp2[16*16]; mc_hc_w16( src, i_src_stride, tmp1, 16, i_height ); mc_hv_w16( src+1, i_src_stride, tmp2, 16, i_height ); pixel_avg_w16( dst, i_dst_stride, tmp1, 16, tmp2, 16, i_height );}static void mc_xy23_w16( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height ){ uint8_t tmp1[16*16]; uint8_t tmp2[16*16]; mc_hc_w16( src, i_src_stride, tmp1, 16, i_height ); mc_hh_w16( src+i_src_stride, i_src_stride, tmp2, 16, i_height ); pixel_avg_w16( dst, i_dst_stride, tmp1, 16, tmp2, 16, i_height );}static void motion_compensation_luma( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int mvx,int mvy, int i_width, int i_height ){ static const pf_mc_t pf_mc[3][4][4] = /*XXX [dqy][dqx] */ { { { mc_copy_w4, mc_xy10_w4, mc_hh_w4, mc_xy30_w4 }, { mc_xy01_w4, mc_xy11_w4, mc_xy21_w4, mc_xy31_w4 }, { mc_hv_w4, mc_xy12_w4, mc_hc_w4, mc_xy32_w4 }, { mc_xy03_w4, mc_xy13_w4, mc_xy23_w4, mc_xy33_w4 }, }, { { mc_copy_w8, mc_xy10_w8, mc_hh_w8, mc_xy30_w8 }, { mc_xy01_w8, mc_xy11_w8, mc_xy21_w8, mc_xy31_w8 }, { mc_hv_w8, mc_xy12_w8, mc_hc_w8, mc_xy32_w8 }, { mc_xy03_w8, mc_xy13_w8, mc_xy23_w8, mc_xy33_w8 }, }, { { mc_copy_w16, mc_xy10_w16, mc_hh_w16, mc_xy30_w16 }, { mc_xy01_w16, mc_xy11_w16, mc_xy21_w16, mc_xy31_w16 }, { mc_hv_w16, mc_xy12_w16, mc_hc_w16, mc_xy32_w16 }, { mc_xy03_w16, mc_xy13_w16, mc_xy23_w16, mc_xy33_w16 }, } }; src += (mvy >> 2) * i_src_stride + (mvx >> 2); if( i_width == 4 ) { pf_mc[0][mvy&0x03][mvx&0x03]( src, i_src_stride, dst, i_dst_stride, i_height ); } else if( i_width == 8 ) { pf_mc[1][mvy&0x03][mvx&0x03]( src, i_src_stride, dst, i_dst_stride, i_height ); } else if( i_width == 16 ) { pf_mc[2][mvy&0x03][mvx&0x03]( src, i_src_stride, dst, i_dst_stride, i_height ); } else { fprintf( stderr, "Error: motion_compensation_luma called with invalid width" ); }}#if 0/* full chroma mc (ie until 1/8 pixel)*/static void motion_compensation_chroma( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int mvx, int mvy, int i_width, int i_height ){ int x, y; int d8x = mvx&0x07; int d8y = mvy&0x07; int cA = (8-d8x)*(8-d8y); int cB = d8x *(8-d8y); int cC = (8-d8x)*d8y; int cD = d8x *d8y; src += (mvy >> 3) * i_dst_stride + (mvx >> 3); for( y = 0; y < i_height; y++ ) { for( x = 0; x < i_width; x++ ) { dst[x] = ( cA*src[x] + cB*src[x+1] + cC*src[i_dst_stride+x] + cD*src[i_dst_stride+x+1] + 32 ) >> 6; } dst += i_src_stride; src += i_dst_stride; }}#endifvoid x264_mc_mmxext_init( x264_mc_t pf[2] ){ pf[MC_LUMA] = motion_compensation_luma;#if 0 pf[MC_CHROMA] = motion_compensation_chroma;#endif}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -