📄 me.c.svn-base
字号:
/* try the subpel component of the predicted mv */ if( hpel_iters && h->mb.i_subpel_refine < 3 ) { int mx = x264_clip3( m->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] ); int my = x264_clip3( m->mvp[1], h->mb.mv_min_spel[1], h->mb.mv_max_spel[1] ); if( mx != bmx || my != bmy ) COST_MV_SAD( mx, my ); } /* halfpel diamond search */ for( i = hpel_iters; i > 0; i-- ) { int omx = bmx, omy = bmy; int costs[4]; int stride = 32; // candidates are either all hpel or all qpel, so one stride is enough uint8_t *src0, *src1, *src2, *src3; src0 = h->mc.get_ref( m->p_fref, m->i_stride[0], pix[0], &stride, omx, omy-2, bw, bh+1 ); src2 = h->mc.get_ref( m->p_fref, m->i_stride[0], pix[1], &stride, omx-2, omy, bw+4, bh ); src1 = src0 + stride; src3 = src2 + 1; h->pixf.sad_x4[i_pixel]( m->p_fenc[0], src0, src1, src2, src3, stride, costs ); COPY2_IF_LT( bcost, costs[0] + p_cost_mvx[omx ] + p_cost_mvy[omy-2], bmy, omy-2 ); COPY2_IF_LT( bcost, costs[1] + p_cost_mvx[omx ] + p_cost_mvy[omy+2], bmy, omy+2 ); COPY3_IF_LT( bcost, costs[2] + p_cost_mvx[omx-2] + p_cost_mvy[omy ], bmx, omx-2, bmy, omy ); COPY3_IF_LT( bcost, costs[3] + p_cost_mvx[omx+2] + p_cost_mvy[omy ], bmx, omx+2, bmy, omy ); if( bmx == omx && bmy == omy ) break; } if( !b_refine_qpel ) { /* check for mvrange */ if( bmy > h->mb.mv_max_spel[1] ) bmy = h->mb.mv_max_spel[1]; bcost = COST_MAX; COST_MV_SATD( bmx, bmy, -1 ); } /* early termination when examining multiple reference frames */ if( p_halfpel_thresh ) { if( (bcost*7)>>3 > *p_halfpel_thresh ) { m->cost = bcost; m->mv[0] = bmx; m->mv[1] = bmy; // don't need cost_mv return; } else if( bcost < *p_halfpel_thresh ) *p_halfpel_thresh = bcost; } /* quarterpel diamond search */ bdir = -1; for( i = qpel_iters; i > 0; i-- ) { odir = bdir; omx = bmx; omy = bmy; COST_MV_SATD( omx, omy - 1, 0 ); COST_MV_SATD( omx, omy + 1, 1 ); COST_MV_SATD( omx - 1, omy, 2 ); COST_MV_SATD( omx + 1, omy, 3 ); if( bmx == omx && bmy == omy ) break; } /* check for mvrange */ if( bmy > h->mb.mv_max_spel[1] ) { bmy = h->mb.mv_max_spel[1]; bcost = COST_MAX; COST_MV_SATD( bmx, bmy, -1 ); } m->cost = bcost; m->mv[0] = bmx; m->mv[1] = bmy; m->cost_mv = p_cost_mvx[ bmx ] + p_cost_mvy[ bmy ];}#define BIME_CACHE( dx, dy ) \{ \ int i = 4 + 3*dx + dy; \ h->mc.mc_luma( m0->p_fref, m0->i_stride[0], pix0[i], bw, om0x+dx, om0y+dy, bw, bh ); \ h->mc.mc_luma( m1->p_fref, m1->i_stride[0], pix1[i], bw, om1x+dx, om1y+dy, bw, bh ); \}#define BIME_CACHE2(a,b) \ BIME_CACHE(a,b) \ BIME_CACHE(-(a),-(b))#define COST_BIMV_SATD( m0x, m0y, m1x, m1y ) \if( pass == 0 || !visited[(m0x)&7][(m0y)&7][(m1x)&7][(m1y)&7] ) \{ \ int cost; \ int i0 = 4 + 3*(m0x-om0x) + (m0y-om0y); \ int i1 = 4 + 3*(m1x-om1x) + (m1y-om1y); \ visited[(m0x)&7][(m0y)&7][(m1x)&7][(m1y)&7] = 1; \ memcpy( pix, pix0[i0], bs ); \ if( i_weight == 32 ) \ h->mc.avg[i_pixel]( pix, bw, pix1[i1], bw ); \ else \ h->mc.avg_weight[i_pixel]( pix, bw, pix1[i1], bw, i_weight ); \ cost = h->pixf.mbcmp[i_pixel]( m0->p_fenc[0], FENC_STRIDE, pix, bw ) \ + p_cost_m0x[ m0x ] + p_cost_m0y[ m0y ] \ + p_cost_m1x[ m1x ] + p_cost_m1y[ m1y ]; \ if( cost < bcost ) \ { \ bcost = cost; \ bm0x = m0x; \ bm0y = m0y; \ bm1x = m1x; \ bm1y = m1y; \ } \}#define CHECK_BIDIR(a,b,c,d) \ COST_BIMV_SATD(om0x+a, om0y+b, om1x+c, om1y+d)#define CHECK_BIDIR2(a,b,c,d) \ CHECK_BIDIR(a,b,c,d) \ CHECK_BIDIR(-(a),-(b),-(c),-(d))#define CHECK_BIDIR8(a,b,c,d) \ CHECK_BIDIR2(a,b,c,d) \ CHECK_BIDIR2(b,c,d,a) \ CHECK_BIDIR2(c,d,a,b) \ CHECK_BIDIR2(d,a,b,c)int x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight ){ const int i_pixel = m0->i_pixel; const int bw = x264_pixel_size[i_pixel].w; const int bh = x264_pixel_size[i_pixel].h; const int bs = bw*bh; const int16_t *p_cost_m0x = m0->p_cost_mv - x264_clip3( m0->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] ); const int16_t *p_cost_m0y = m0->p_cost_mv - x264_clip3( m0->mvp[1], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] ); const int16_t *p_cost_m1x = m1->p_cost_mv - x264_clip3( m1->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] ); const int16_t *p_cost_m1y = m1->p_cost_mv - x264_clip3( m1->mvp[1], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] ); DECLARE_ALIGNED( uint8_t, pix0[9][16*16], 16 ); DECLARE_ALIGNED( uint8_t, pix1[9][16*16], 16 ); DECLARE_ALIGNED( uint8_t, pix[16*16], 16 ); int bm0x = m0->mv[0], om0x = bm0x; int bm0y = m0->mv[1], om0y = bm0y; int bm1x = m1->mv[0], om1x = bm1x; int bm1y = m1->mv[1], om1y = bm1y; int bcost = COST_MAX; int pass = 0; uint8_t visited[8][8][8][8]; memset( visited, 0, sizeof(visited) ); BIME_CACHE( 0, 0 ); CHECK_BIDIR( 0, 0, 0, 0 ); if( bm0y > h->mb.mv_max_spel[1] - 8 || bm1y > h->mb.mv_max_spel[1] - 8 ) return bcost; for( pass = 0; pass < 8; pass++ ) { /* check all mv pairs that differ in at most 2 components from the current mvs. */ /* doesn't do chroma ME. this probably doesn't matter, as the gains * from bidir ME are the same with and without chroma ME. */ BIME_CACHE2( 1, 0 ); BIME_CACHE2( 0, 1 ); BIME_CACHE2( 1, 1 ); BIME_CACHE2( 1,-1 ); CHECK_BIDIR8( 0, 0, 0, 1 ); CHECK_BIDIR8( 0, 0, 1, 1 ); CHECK_BIDIR2( 0, 1, 0, 1 ); CHECK_BIDIR2( 1, 0, 1, 0 ); CHECK_BIDIR8( 0, 0,-1, 1 ); CHECK_BIDIR2( 0,-1, 0, 1 ); CHECK_BIDIR2(-1, 0, 1, 0 ); if( om0x == bm0x && om0y == bm0y && om1x == bm1x && om1y == bm1y ) break; om0x = bm0x; om0y = bm0y; om1x = bm1x; om1y = bm1y; BIME_CACHE( 0, 0 ); } m0->mv[0] = bm0x; m0->mv[1] = bm0y; m1->mv[0] = bm1x; m1->mv[1] = bm1y; return bcost;}#undef COST_MV_SATD#define COST_MV_SATD( mx, my, dst ) \{ \ int stride = 16; \ uint8_t *src = h->mc.get_ref( m->p_fref, m->i_stride[0], pix, &stride, mx, my, bw*4, bh*4 ); \ dst = h->pixf.mbcmp[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \ + p_cost_mvx[mx] + p_cost_mvy[my]; \ COPY1_IF_LT( bsatd, dst ); \}#define COST_MV_RD( mx, my, satd, do_dir, mdir ) \{ \ if( satd <= bsatd * SATD_THRESH )\ { \ int cost; \ cache_mv[0] = cache_mv2[0] = mx; \ cache_mv[1] = cache_mv2[1] = my; \ cost = x264_rd_cost_part( h, i_lambda2, i8, m->i_pixel ); \ COPY4_IF_LT( bcost, cost, bmx, mx, bmy, my, dir, do_dir?mdir:dir ); \ } \}#define SATD_THRESH 17/16void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i8 ){ // don't have to fill the whole mv cache rectangle static const int pixel_mv_offs[] = { 0, 4, 4*8, 0 }; int16_t *cache_mv = h->mb.cache.mv[0][x264_scan8[i8*4]]; int16_t *cache_mv2 = cache_mv + pixel_mv_offs[m->i_pixel]; const int16_t *p_cost_mvx, *p_cost_mvy; const int bw = x264_pixel_size[m->i_pixel].w>>2; const int bh = x264_pixel_size[m->i_pixel].h>>2; const int i_pixel = m->i_pixel; DECLARE_ALIGNED( uint8_t, pix[16*16], 16 ); int bcost = m->i_pixel == PIXEL_16x16 ? m->cost : COST_MAX; int bmx = m->mv[0]; int bmy = m->mv[1]; int omx = bmx; int omy = bmy; int pmx, pmy, i, j; unsigned bsatd; int satd = 0; int dir = -2; int satds[8]; if( m->i_pixel != PIXEL_16x16 && i8 != 0 ) x264_mb_predict_mv( h, 0, i8*4, bw, m->mvp ); pmx = m->mvp[0]; pmy = m->mvp[1]; p_cost_mvx = m->p_cost_mv - pmx; p_cost_mvy = m->p_cost_mv - pmy; COST_MV_SATD( bmx, bmy, bsatd ); COST_MV_RD( bmx, bmy, 0, 0, 0); /* check the predicted mv */ if( (bmx != pmx || bmy != pmy) && pmx >= h->mb.mv_min_spel[0] && pmx <= h->mb.mv_max_spel[0] && pmy >= h->mb.mv_min_spel[1] && pmy <= h->mb.mv_max_spel[1] ) { COST_MV_SATD( pmx, pmy, satd ); COST_MV_RD( pmx, pmy, satd, 0,0 ); } /* subpel hex search, same pattern as ME HEX. */ dir = -2; omx = bmx; omy = bmy; for( j=0; j<6; j++ ) COST_MV_SATD( omx + hex2[j+1][0], omy + hex2[j+1][1], satds[j] ); for( j=0; j<6; j++ ) COST_MV_RD ( omx + hex2[j+1][0], omy + hex2[j+1][1], satds[j], 1,j ); if( dir != -2 ) { /* half hexagon, not overlapping the previous iteration */ for( i = 1; i < 10; i++ ) { const int odir = mod6m1[dir+1]; if( bmy > h->mb.mv_max_spel[1] - 2 || bmy < h->mb.mv_min_spel[1] - 2 ) break; dir = -2; omx = bmx; omy = bmy; for( j=0; j<3; j++ ) COST_MV_SATD( omx + hex2[odir+j][0], omy + hex2[odir+j][1], satds[j] ); for( j=0; j<3; j++ ) COST_MV_RD ( omx + hex2[odir+j][0], omy + hex2[odir+j][1], satds[j], 1, odir-1+j ); if( dir == -2 ) break; } } /* square refine, same as pattern as ME HEX. */ omx = bmx; omy = bmy; for( i=0; i<8; i++ ) COST_MV_SATD( omx + square1[i][0], omy + square1[i][1], satds[i] ); for( i=0; i<8; i++ ) COST_MV_RD ( omx + square1[i][0], omy + square1[i][1], satds[i], 0,0 ); bmy = x264_clip3( bmy, h->mb.mv_min_spel[1], h->mb.mv_max_spel[1] ); m->cost = bcost; m->mv[0] = bmx; m->mv[1] = bmy; x264_macroblock_cache_mv ( h, 2*(i8&1), i8&2, bw, bh, 0, bmx, bmy ); x264_macroblock_cache_mvd( h, 2*(i8&1), i8&2, bw, bh, 0, bmx - pmx, bmy - pmy );}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -