📄 me.c
字号:
const int dh = x264_pixel_size[i_pixel].h * stride; static uint8_t zero[16*16] = {0,}; const int enc_dc = h->pixf.sad[i_pixel]( m->p_fenc[0], stride, zero, 16 ); const uint16_t *integral_base = &m->integral[ -1 - 1*stride ]; if( h->pixf.sad_pde[i_pixel] ) { for( my = min_y; my <= max_y; my++ ) for( mx = min_x; mx <= max_x; mx++ ) { const uint16_t *integral = &integral_base[ mx + my * stride ]; const uint16_t ref_dc = integral[ 0 ] + integral[ dh + dw ] - integral[ dw ] - integral[ dh ]; const int bsad = bcost - p_cost_mvx[ (mx)<<2 ] - p_cost_mvy[ (my)<<2 ]; if( abs( ref_dc - enc_dc ) < bsad ) COST_MV_PDE( mx, my ); } } else { for( my = min_y; my <= max_y; my++ ) for( mx = min_x; mx <= max_x; mx++ ) { const uint16_t *integral = &integral_base[ mx + my * stride ]; const uint16_t ref_dc = integral[ 0 ] + integral[ dh + dw ] - integral[ dw ] - integral[ dh ]; const int bsad = bcost - p_cost_mvx[ (mx)<<2 ] - p_cost_mvy[ (my)<<2 ]; if( abs( ref_dc - enc_dc ) < bsad ) COST_MV( mx, my ); } }#endif } break; } /* -> qpel mv */ m->mv[0] = bmx << 2; m->mv[1] = bmy << 2; /* compute the real cost */ m->cost_mv = p_cost_mvx[ m->mv[0] ] + p_cost_mvy[ m->mv[1] ]; m->cost = bcost; if( bmx == pmx && bmy == pmy ) m->cost += m->cost_mv; /* subpel refine */ if( h->mb.i_subpel_refine >= 2 ) { int hpel = subpel_iterations[h->mb.i_subpel_refine][2]; int qpel = subpel_iterations[h->mb.i_subpel_refine][3]; refine_subpel( h, m, hpel, qpel, p_halfpel_thresh, 0 ); }}#undef COST_MVvoid x264_me_refine_qpel( x264_t *h, x264_me_t *m ){ int hpel = subpel_iterations[h->mb.i_subpel_refine][0]; int qpel = subpel_iterations[h->mb.i_subpel_refine][1]; if( m->i_pixel <= PIXEL_8x8 && h->sh.i_type == SLICE_TYPE_P ) m->cost -= m->i_ref_cost; refine_subpel( h, m, hpel, qpel, NULL, 1 );}#define COST_MV_SAD( mx, my, dir ) \if( b_refine_qpel || (dir^1) != odir ) \{ \ int stride = 16; \ uint8_t *src = h->mc.get_ref( m->p_fref, m->i_stride[0], pix, &stride, mx, my, bw, bh ); \ int cost = h->pixf.sad[i_pixel]( m->p_fenc[0], m->i_stride[0], src, stride ) \ + p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \ if( cost < bcost ) \ { \ bcost = cost; \ bmx = mx; \ bmy = my; \ bdir = dir; \ } \}#define COST_MV_SATD( mx, my, dir ) \if( b_refine_qpel || (dir^1) != odir ) \{ \ int stride = 16; \ uint8_t *src = h->mc.get_ref( m->p_fref, m->i_stride[0], pix, &stride, mx, my, bw, bh ); \ int cost = h->pixf.mbcmp[i_pixel]( m->p_fenc[0], m->i_stride[0], src, stride ) \ + p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \ if( b_chroma_me && cost < bcost ) \ { \ h->mc.mc_chroma( m->p_fref[4], m->i_stride[1], pix, 8, mx, my, bw/2, bh/2 ); \ cost += h->pixf.mbcmp[i_pixel+3]( m->p_fenc[1], m->i_stride[1], pix, 8 ); \ if( cost < bcost ) \ { \ h->mc.mc_chroma( m->p_fref[5], m->i_stride[1], pix, 8, mx, my, bw/2, bh/2 ); \ cost += h->pixf.mbcmp[i_pixel+3]( m->p_fenc[2], m->i_stride[1], pix, 8 ); \ } \ } \ if( cost < bcost ) \ { \ bcost = cost; \ bmx = mx; \ bmy = my; \ bdir = dir; \ } \}static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters, int *p_halfpel_thresh, int b_refine_qpel ){ const int bw = x264_pixel_size[m->i_pixel].w; const int bh = x264_pixel_size[m->i_pixel].h; const int16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0]; const int16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1]; const int i_pixel = m->i_pixel; const int b_chroma_me = h->mb.b_chroma_me && i_pixel <= PIXEL_8x8; DECLARE_ALIGNED( uint8_t, pix[16*16], 16 ); int omx, omy; int i; int bmx = m->mv[0]; int bmy = m->mv[1]; int bcost = m->cost; int odir = -1, bdir; /* try the subpel component of the predicted mv */ if( hpel_iters ) { int mx = x264_clip3( m->mvp[0], h->mb.mv_min[0], h->mb.mv_max[0] ); int my = x264_clip3( m->mvp[1], h->mb.mv_min[1], h->mb.mv_max[1] ); if( mx != bmx || my != bmy ) COST_MV_SAD( mx, my, -1 ); } /* hpel search */ bdir = -1; for( i = hpel_iters; i > 0; i-- ) { odir = bdir; omx = bmx; omy = bmy; COST_MV_SAD( omx, omy - 2, 0 ); COST_MV_SAD( omx, omy + 2, 1 ); COST_MV_SAD( omx - 2, omy, 2 ); COST_MV_SAD( omx + 2, omy, 3 ); if( bmx == omx && bmy == omy ) break; } if( !b_refine_qpel ) { bcost = COST_MAX; COST_MV_SATD( bmx, bmy, -1 ); } /* early termination when examining multiple reference frames */ if( p_halfpel_thresh ) { if( (bcost*7)>>3 > *p_halfpel_thresh ) { m->cost = bcost; m->mv[0] = bmx; m->mv[1] = bmy; // don't need cost_mv return; } else if( bcost < *p_halfpel_thresh ) *p_halfpel_thresh = bcost; } /* qpel search */ bdir = -1; for( i = qpel_iters; i > 0; i-- ) { odir = bdir; omx = bmx; omy = bmy; COST_MV_SATD( omx, omy - 1, 0 ); COST_MV_SATD( omx, omy + 1, 1 ); COST_MV_SATD( omx - 1, omy, 2 ); COST_MV_SATD( omx + 1, omy, 3 ); if( bmx == omx && bmy == omy ) break; } m->cost = bcost; m->mv[0] = bmx; m->mv[1] = bmy; m->cost_mv = p_cost_mvx[ bmx ] + p_cost_mvy[ bmy ];}#define BIME_CACHE( dx, dy ) \{ \ int i = 4 + 3*dx + dy; \ h->mc.mc_luma( m0->p_fref, m0->i_stride[0], pix0[i], bw, om0x+dx, om0y+dy, bw, bh ); \ h->mc.mc_luma( m1->p_fref, m1->i_stride[0], pix1[i], bw, om1x+dx, om1y+dy, bw, bh ); \}#define BIME_CACHE2(a,b) \ BIME_CACHE(a,b) \ BIME_CACHE(-(a),-(b))#define COST_BIMV_SATD( m0x, m0y, m1x, m1y ) \if( pass == 0 || !visited[(m0x)&7][(m0y)&7][(m1x)&7][(m1y)&7] ) \{ \ int cost; \ int i0 = 4 + 3*(m0x-om0x) + (m0y-om0y); \ int i1 = 4 + 3*(m1x-om1x) + (m1y-om1y); \ visited[(m0x)&7][(m0y)&7][(m1x)&7][(m1y)&7] = 1; \ memcpy( pix, pix0[i0], bs ); \ if( i_weight == 32 ) \ h->mc.avg[i_pixel]( pix, bw, pix1[i1], bw ); \ else \ h->mc.avg_weight[i_pixel]( pix, bw, pix1[i1], bw, i_weight ); \ cost = h->pixf.mbcmp[i_pixel]( m0->p_fenc[0], m0->i_stride[0], pix, bw ) \ + p_cost_m0x[ m0x ] + p_cost_m0y[ m0y ] \ + p_cost_m1x[ m1x ] + p_cost_m1y[ m1y ]; \ if( cost < bcost ) \ { \ bcost = cost; \ bm0x = m0x; \ bm0y = m0y; \ bm1x = m1x; \ bm1y = m1y; \ } \}#define CHECK_BIDIR(a,b,c,d) \ COST_BIMV_SATD(om0x+a, om0y+b, om1x+c, om1y+d)#define CHECK_BIDIR2(a,b,c,d) \ CHECK_BIDIR(a,b,c,d) \ CHECK_BIDIR(-(a),-(b),-(c),-(d))#define CHECK_BIDIR8(a,b,c,d) \ CHECK_BIDIR2(a,b,c,d) \ CHECK_BIDIR2(b,c,d,a) \ CHECK_BIDIR2(c,d,a,b) \ CHECK_BIDIR2(d,a,b,c)int x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight ){ const int i_pixel = m0->i_pixel; const int bw = x264_pixel_size[i_pixel].w; const int bh = x264_pixel_size[i_pixel].h; const int bs = bw*bh; const int16_t *p_cost_m0x = m0->p_cost_mv - x264_clip3( m0->mvp[0], h->mb.mv_min[0], h->mb.mv_max[0] ); const int16_t *p_cost_m0y = m0->p_cost_mv - x264_clip3( m0->mvp[1], h->mb.mv_min[0], h->mb.mv_max[0] ); const int16_t *p_cost_m1x = m1->p_cost_mv - x264_clip3( m1->mvp[0], h->mb.mv_min[0], h->mb.mv_max[0] ); const int16_t *p_cost_m1y = m1->p_cost_mv - x264_clip3( m1->mvp[1], h->mb.mv_min[0], h->mb.mv_max[0] ); DECLARE_ALIGNED( uint8_t, pix0[9][16*16], 16 ); DECLARE_ALIGNED( uint8_t, pix1[9][16*16], 16 ); DECLARE_ALIGNED( uint8_t, pix[16*16], 16 ); int bm0x = m0->mv[0], om0x = bm0x; int bm0y = m0->mv[1], om0y = bm0y; int bm1x = m1->mv[0], om1x = bm1x; int bm1y = m1->mv[1], om1y = bm1y; int bcost = COST_MAX; int pass = 0; uint8_t visited[8][8][8][8]; memset( visited, 0, sizeof(visited) ); BIME_CACHE( 0, 0 ); CHECK_BIDIR( 0, 0, 0, 0 ); for( pass = 0; pass < 8; pass++ ) { /* check all mv pairs that differ in at most 2 components from the current mvs. */ /* doesn't do chroma ME. this probably doesn't matter, as the gains * from bidir ME are the same with and without chroma ME. */ BIME_CACHE2( 1, 0 ); BIME_CACHE2( 0, 1 ); BIME_CACHE2( 1, 1 ); BIME_CACHE2( 1,-1 ); CHECK_BIDIR8( 0, 0, 0, 1 ); CHECK_BIDIR8( 0, 0, 1, 1 ); CHECK_BIDIR2( 0, 1, 0, 1 ); CHECK_BIDIR2( 1, 0, 1, 0 ); CHECK_BIDIR8( 0, 0,-1, 1 ); CHECK_BIDIR2( 0,-1, 0, 1 ); CHECK_BIDIR2(-1, 0, 1, 0 ); if( om0x == bm0x && om0y == bm0y && om1x == bm1x && om1y == bm1y ) break; om0x = bm0x; om0y = bm0y; om1x = bm1x; om1y = bm1y; BIME_CACHE( 0, 0 ); } m0->mv[0] = bm0x; m0->mv[1] = bm0y; m1->mv[0] = bm1x; m1->mv[1] = bm1y; return bcost;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -