📄 me.c
字号:
static const int x264_pixel_size_shift[7] = { 0, 1, 1, 2, 3, 3, 4 };
int ucost1, ucost2;
int cross_start = 1;
/* refine predictors */
ucost1 = bcost;
DIA1_ITER( pmx, pmy );
if( pmx || pmy )
DIA1_ITER( 0, 0 );
if(i_pixel == PIXEL_4x4)
goto me_hex2;
ucost2 = bcost;
if( (bmx || bmy) && (bmx!=pmx || bmy!=pmy) )
DIA1_ITER( bmx, bmy );
if( bcost == ucost2 )
cross_start = 3;
omx = bmx; omy = bmy;
/* early termination */
#define SAD_THRESH(v) ( bcost < ( v >> x264_pixel_size_shift[i_pixel] ) )
if( bcost == ucost2 && SAD_THRESH(2000) )
{
COST_MV_X4( 0,-2, -1,-1, 1,-1, -2,0 );
COST_MV_X4( 2, 0, -1, 1, 1, 1, 0,2 );
if( bcost == ucost1 && SAD_THRESH(500) )
break;
if( bcost == ucost2 )
{
int range = (i_me_range>>1) | 1;
CROSS( 3, range, range );
COST_MV_X4( -1,-2, 1,-2, -2,-1, 2,-1 );
COST_MV_X4( -2, 1, 2, 1, -1, 2, 1, 2 );
if( bcost == ucost2 )
break;
cross_start = range + 2;
}
}
/* adaptive search range */
if( i_mvc )
{
/* range multipliers based on casual inspection of some statistics of
* average distance between current predictor and final mv found by ESA.
* these have not been tuned much by actual encoding. */
static const int range_mul[4][4] =
{
{ 3, 3, 4, 4 },
{ 3, 4, 4, 4 },
{ 4, 4, 4, 5 },
{ 4, 4, 5, 6 },
};
int mvd;
int sad_ctx, mvd_ctx;
if( i_mvc == 1 )
{
if( i_pixel == PIXEL_16x16 )
/* mvc is probably the same as mvp, so the difference isn't meaningful.
* but prediction usually isn't too bad, so just use medium range */
mvd = 25;
else
mvd = abs( m->mvp[0] - mvc[0][0] )
+ abs( m->mvp[1] - mvc[0][1] );
}
else
{
/* calculate the degree of agreement between predictors. */
/* in 16x16, mvc includes all the neighbors used to make mvp,
* so don't count mvp separately. */
int i_denom = i_mvc - 1;
mvd = 0;
if( i_pixel != PIXEL_16x16 )
{
mvd = abs( m->mvp[0] - mvc[0][0] )
+ abs( m->mvp[1] - mvc[0][1] );
i_denom++;
}
for( i = 0; i < i_mvc-1; i++ )
mvd += abs( mvc[i][0] - mvc[i+1][0] )
+ abs( mvc[i][1] - mvc[i+1][1] );
mvd /= i_denom; //FIXME idiv
}
sad_ctx = SAD_THRESH(1000) ? 0
: SAD_THRESH(2000) ? 1
: SAD_THRESH(4000) ? 2 : 3;
mvd_ctx = mvd < 10 ? 0
: mvd < 20 ? 1
: mvd < 40 ? 2 : 3;
i_me_range = i_me_range * range_mul[mvd_ctx][sad_ctx] / 4;
}
/* FIXME if the above DIA2/OCT2/CROSS found a new mv, it has not updated omx/omy.
* we are still centered on the same place as the DIA2. is this desirable? */
CROSS( cross_start, i_me_range, i_me_range/2 );
/* 5x5 ESA */
omx = bmx; omy = bmy;
if( bcost != ucost2 )
COST_MV_X4( 1, 0, 0, 1, -1, 0, 0,-1 );
COST_MV_X4( 1, 1, -1, 1, -1,-1, 1,-1 );
COST_MV_X4( 2,-1, 2, 0, 2, 1, 2, 2 );
COST_MV_X4( 1, 2, 0, 2, -1, 2, -2, 2 );
COST_MV_X4( -2, 1, -2, 0, -2,-1, -2,-2 );
COST_MV_X4( -1,-2, 0,-2, 1,-2, 2,-2 );
/* hexagon grid */
omx = bmx; omy = bmy;
for( i = 1; i <= i_me_range/4; i++ )
{
static const int hex4[16][2] = {
{-4, 2}, {-4, 1}, {-4, 0}, {-4,-1}, {-4,-2},
{ 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
{ 2, 3}, { 0, 4}, {-2, 3},
{-2,-3}, { 0,-4}, { 2,-3},
};
if( 4*i > X264_MIN4( mv_x_max-omx, omx-mv_x_min,
mv_y_max-omy, omy-mv_y_min ) )
{
for( j = 0; j < 16; j++ )
{
int mx = omx + hex4[j][0]*i;
int my = omy + hex4[j][1]*i;
if( mx >= mv_x_min && mx <= mv_x_max
&& my >= mv_y_min && my <= mv_y_max )
COST_MV( mx, my );
}
}
else
{
COST_MV_X4( -4*i, 2*i, -4*i, 1*i, -4*i, 0*i, -4*i,-1*i );
COST_MV_X4( -4*i,-2*i, 4*i,-2*i, 4*i,-1*i, 4*i, 0*i );
COST_MV_X4( 4*i, 1*i, 4*i, 2*i, 2*i, 3*i, 0*i, 4*i );
COST_MV_X4( -2*i, 3*i, -2*i,-3*i, 0*i,-4*i, 2*i,-3*i );
}
}
goto me_hex2;
}
case X264_ME_ESA:
{
const int min_x = X264_MAX( bmx - i_me_range, mv_x_min);
const int min_y = X264_MAX( bmy - i_me_range, mv_y_min);
const int max_x = X264_MIN( bmx + i_me_range, mv_x_max);
const int max_y = X264_MIN( bmy + i_me_range, mv_y_max);
int mx, my;
#if 0
/* plain old exhaustive search */
for( my = min_y; my <= max_y; my++ )
for( mx = min_x; mx <= max_x; mx++ )
COST_MV( mx, my );
#else
/* successive elimination by comparing DC before a full SAD,
* because sum(abs(diff)) >= abs(diff(sum)). */
const int stride = m->i_stride[0];
const int dw = x264_pixel_size[i_pixel].w;
const int dh = x264_pixel_size[i_pixel].h * stride;
static uint8_t zero[16*16] = {0,};
const int enc_dc = h->pixf.sad[i_pixel]( m->p_fenc[0], FENC_STRIDE, zero, 16 );
const uint16_t *integral_base = &m->integral[ -1 - 1*stride ];
for( my = min_y; my <= max_y; my++ )
{
int mvs[3], i_mvs=0;
for( mx = min_x; mx <= max_x; mx++ )
{
const uint16_t *integral = &integral_base[ mx + my * stride ];
const uint16_t ref_dc = integral[ 0 ] + integral[ dh + dw ]
- integral[ dw ] - integral[ dh ];
const int bsad = bcost - BITS_MVD(mx,my);
if( abs( ref_dc - enc_dc ) < bsad )
{
if( i_mvs == 3 )
{
COST_MV_X4_ABS( mvs[0],my, mvs[1],my, mvs[2],my, mx,my );
i_mvs = 0;
}
else
mvs[i_mvs++] = mx;
}
}
for( i=0; i<i_mvs; i++ )
COST_MV( mvs[i], my );
}
#endif
}
break;
}
/* -> qpel mv */
if( bpred_cost < bcost )
{
m->mv[0] = bpred_mx;
m->mv[1] = bpred_my;
m->cost = bpred_cost;
}
else
{
m->mv[0] = bmx << 2;
m->mv[1] = bmy << 2;
m->cost = bcost;
}
/* compute the real cost */
m->cost_mv = p_cost_mvx[ m->mv[0] ] + p_cost_mvy[ m->mv[1] ];
if( bmx == pmx && bmy == pmy && h->mb.i_subpel_refine < 3 )
m->cost += m->cost_mv;
/* subpel refine */
if( h->mb.i_subpel_refine >= 2 )
{
int hpel = subpel_iterations[h->mb.i_subpel_refine][2];
int qpel = subpel_iterations[h->mb.i_subpel_refine][3];
refine_subpel( h, m, hpel, qpel, p_halfpel_thresh, 0 );
}
}
#undef COST_MV
void x264_me_refine_qpel( x264_t *h, x264_me_t *m )
{
int hpel = subpel_iterations[h->mb.i_subpel_refine][0];
int qpel = subpel_iterations[h->mb.i_subpel_refine][1];
if( m->i_pixel <= PIXEL_8x8 && h->sh.i_type == SLICE_TYPE_P )
m->cost -= m->i_ref_cost;
refine_subpel( h, m, hpel, qpel, NULL, 1 );
}
#define COST_MV_SAD( mx, my ) \
{ \
int stride = 16; \
uint8_t *src = h->mc.get_ref( m->p_fref, m->i_stride[0], pix[0], &stride, mx, my, bw, bh ); \
int cost = h->pixf.sad[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \
+ p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \
COPY3_IF_LT( bcost, cost, bmx, mx, bmy, my ); \
}
#define COST_MV_SATD( mx, my, dir ) \
if( b_refine_qpel || (dir^1) != odir ) \
{ \
int stride = 16; \
uint8_t *src = h->mc.get_ref( m->p_fref, m->i_stride[0], pix[0], &stride, mx, my, bw, bh ); \
int cost = h->pixf.mbcmp[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \
+ p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \
if( b_chroma_me && cost < bcost ) \
{ \
h->mc.mc_chroma( m->p_fref[4], m->i_stride[1], pix[0], 8, mx, my, bw/2, bh/2 ); \
cost += h->pixf.mbcmp[i_pixel+3]( m->p_fenc[1], FENC_STRIDE, pix[0], 8 ); \
if( cost < bcost ) \
{ \
h->mc.mc_chroma( m->p_fref[5], m->i_stride[1], pix[0], 8, mx, my, bw/2, bh/2 ); \
cost += h->pixf.mbcmp[i_pixel+3]( m->p_fenc[2], FENC_STRIDE, pix[0], 8 ); \
} \
} \
if( cost < bcost ) \
{ \
bcost = cost; \
bmx = mx; \
bmy = my; \
bdir = dir; \
} \
}
static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters, int *p_halfpel_thresh, int b_refine_qpel )
{
const int bw = x264_pixel_size[m->i_pixel].w;
const int bh = x264_pixel_size[m->i_pixel].h;
const int16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0];
const int16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1];
const int i_pixel = m->i_pixel;
const int b_chroma_me = h->mb.b_chroma_me && i_pixel <= PIXEL_8x8;
DECLARE_ALIGNED( uint8_t, pix[4][16*16], 16 );
int omx, omy;
int i;
int bmx = m->mv[0];
int bmy = m->mv[1];
int bcost = m->cost;
int odir = -1, bdir;
/* try the subpel component of the predicted mv */
if( hpel_iters && h->mb.i_subpel_refine < 3 )
{
int mx = x264_clip3( m->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
int my = x264_clip3( m->mvp[1], h->mb.mv_min_spel[1], h->mb.mv_max_spel[1] );
if( mx != bmx || my != bmy )
COST_MV_SAD( mx, my );
}
/* halfpel diamond search */
for( i = hpel_iters; i > 0; i-- )
{
int omx = bmx, omy = bmy;
int costs[4];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -