📄 me.c
字号:
int stride = 16; // candidates are either all hpel or all qpel, so one stride is enough
uint8_t *src0, *src1, *src2, *src3;
src0 = h->mc.get_ref( m->p_fref, m->i_stride[0], pix[0], &stride, omx, omy-2, bw, bh );
src2 = h->mc.get_ref( m->p_fref, m->i_stride[0], pix[2], &stride, omx-2, omy, bw, bh );
if( (omx|omy)&1 )
{
src1 = h->mc.get_ref( m->p_fref, m->i_stride[0], pix[1], &stride, omx, omy+2, bw, bh );
src3 = h->mc.get_ref( m->p_fref, m->i_stride[0], pix[3], &stride, omx+2, omy, bw, bh );
}
else
{
src1 = src0 + stride;
src3 = src2 + 1;
}
h->pixf.sad_x4[i_pixel]( m->p_fenc[0], src0, src1, src2, src3, stride, costs );
COPY2_IF_LT( bcost, costs[0] + p_cost_mvx[omx ] + p_cost_mvy[omy-2], bmy, omy-2 );
COPY2_IF_LT( bcost, costs[1] + p_cost_mvx[omx ] + p_cost_mvy[omy+2], bmy, omy+2 );
COPY3_IF_LT( bcost, costs[2] + p_cost_mvx[omx-2] + p_cost_mvy[omy ], bmx, omx-2, bmy, omy );
COPY3_IF_LT( bcost, costs[3] + p_cost_mvx[omx+2] + p_cost_mvy[omy ], bmx, omx+2, bmy, omy );
if( bmx == omx && bmy == omy )
break;
}
if( !b_refine_qpel )
{
bcost = COST_MAX;
COST_MV_SATD( bmx, bmy, -1 );
}
/* early termination when examining multiple reference frames */
if( p_halfpel_thresh )
{
if( (bcost*7)>>3 > *p_halfpel_thresh )
{
m->cost = bcost;
m->mv[0] = bmx;
m->mv[1] = bmy;
// don't need cost_mv
return;
}
else if( bcost < *p_halfpel_thresh )
*p_halfpel_thresh = bcost;
}
/* quarterpel diamond search */
bdir = -1;
for( i = qpel_iters; i > 0; i-- )
{
odir = bdir;
omx = bmx;
omy = bmy;
COST_MV_SATD( omx, omy - 1, 0 );
COST_MV_SATD( omx, omy + 1, 1 );
COST_MV_SATD( omx - 1, omy, 2 );
COST_MV_SATD( omx + 1, omy, 3 );
if( bmx == omx && bmy == omy )
break;
}
m->cost = bcost;
m->mv[0] = bmx;
m->mv[1] = bmy;
m->cost_mv = p_cost_mvx[ bmx ] + p_cost_mvy[ bmy ];
}
#define BIME_CACHE( dx, dy ) \
{ \
int i = 4 + 3*dx + dy; \
h->mc.mc_luma( m0->p_fref, m0->i_stride[0], pix0[i], bw, om0x+dx, om0y+dy, bw, bh ); \
h->mc.mc_luma( m1->p_fref, m1->i_stride[0], pix1[i], bw, om1x+dx, om1y+dy, bw, bh ); \
}
#define BIME_CACHE2(a,b) \
BIME_CACHE(a,b) \
BIME_CACHE(-(a),-(b))
#define COST_BIMV_SATD( m0x, m0y, m1x, m1y ) \
if( pass == 0 || !visited[(m0x)&7][(m0y)&7][(m1x)&7][(m1y)&7] ) \
{ \
int cost; \
int i0 = 4 + 3*(m0x-om0x) + (m0y-om0y); \
int i1 = 4 + 3*(m1x-om1x) + (m1y-om1y); \
visited[(m0x)&7][(m0y)&7][(m1x)&7][(m1y)&7] = 1; \
memcpy( pix, pix0[i0], bs ); \
if( i_weight == 32 ) \
h->mc.avg[i_pixel]( pix, bw, pix1[i1], bw ); \
else \
h->mc.avg_weight[i_pixel]( pix, bw, pix1[i1], bw, i_weight ); \
cost = h->pixf.mbcmp[i_pixel]( m0->p_fenc[0], FENC_STRIDE, pix, bw ) \
+ p_cost_m0x[ m0x ] + p_cost_m0y[ m0y ] \
+ p_cost_m1x[ m1x ] + p_cost_m1y[ m1y ]; \
if( cost < bcost ) \
{ \
bcost = cost; \
bm0x = m0x; \
bm0y = m0y; \
bm1x = m1x; \
bm1y = m1y; \
} \
}
#define CHECK_BIDIR(a,b,c,d) \
COST_BIMV_SATD(om0x+a, om0y+b, om1x+c, om1y+d)
#define CHECK_BIDIR2(a,b,c,d) \
CHECK_BIDIR(a,b,c,d) \
CHECK_BIDIR(-(a),-(b),-(c),-(d))
#define CHECK_BIDIR8(a,b,c,d) \
CHECK_BIDIR2(a,b,c,d) \
CHECK_BIDIR2(b,c,d,a) \
CHECK_BIDIR2(c,d,a,b) \
CHECK_BIDIR2(d,a,b,c)
int x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight )
{
const int i_pixel = m0->i_pixel;
const int bw = x264_pixel_size[i_pixel].w;
const int bh = x264_pixel_size[i_pixel].h;
const int bs = bw*bh;
const int16_t *p_cost_m0x = m0->p_cost_mv - x264_clip3( m0->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
const int16_t *p_cost_m0y = m0->p_cost_mv - x264_clip3( m0->mvp[1], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
const int16_t *p_cost_m1x = m1->p_cost_mv - x264_clip3( m1->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
const int16_t *p_cost_m1y = m1->p_cost_mv - x264_clip3( m1->mvp[1], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
DECLARE_ALIGNED( uint8_t, pix0[9][16*16], 16 );
DECLARE_ALIGNED( uint8_t, pix1[9][16*16], 16 );
DECLARE_ALIGNED( uint8_t, pix[16*16], 16 );
int bm0x = m0->mv[0], om0x = bm0x;
int bm0y = m0->mv[1], om0y = bm0y;
int bm1x = m1->mv[0], om1x = bm1x;
int bm1y = m1->mv[1], om1y = bm1y;
int bcost = COST_MAX;
int pass = 0;
uint8_t visited[8][8][8][8];
memset( visited, 0, sizeof(visited) );
BIME_CACHE( 0, 0 );
CHECK_BIDIR( 0, 0, 0, 0 );
for( pass = 0; pass < 8; pass++ )
{
/* check all mv pairs that differ in at most 2 components from the current mvs. */
/* doesn't do chroma ME. this probably doesn't matter, as the gains
* from bidir ME are the same with and without chroma ME. */
BIME_CACHE2( 1, 0 );
BIME_CACHE2( 0, 1 );
BIME_CACHE2( 1, 1 );
BIME_CACHE2( 1,-1 );
CHECK_BIDIR8( 0, 0, 0, 1 );
CHECK_BIDIR8( 0, 0, 1, 1 );
CHECK_BIDIR2( 0, 1, 0, 1 );
CHECK_BIDIR2( 1, 0, 1, 0 );
CHECK_BIDIR8( 0, 0,-1, 1 );
CHECK_BIDIR2( 0,-1, 0, 1 );
CHECK_BIDIR2(-1, 0, 1, 0 );
if( om0x == bm0x && om0y == bm0y && om1x == bm1x && om1y == bm1y )
break;
om0x = bm0x;
om0y = bm0y;
om1x = bm1x;
om1y = bm1y;
BIME_CACHE( 0, 0 );
}
m0->mv[0] = bm0x;
m0->mv[1] = bm0y;
m1->mv[0] = bm1x;
m1->mv[1] = bm1y;
return bcost;
}
#undef COST_MV_SATD
#define COST_MV_SATD( mx, my, dst ) \
{ \
int stride = 16; \
uint8_t *src = h->mc.get_ref( m->p_fref, m->i_stride[0], pix, &stride, mx, my, bw*4, bh*4 ); \
dst = h->pixf.mbcmp[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \
+ p_cost_mvx[mx] + p_cost_mvy[my]; \
COPY1_IF_LT( bsatd, dst ); \
}
#define COST_MV_RD( mx, my, satd, dir ) \
{ \
if( satd <= bsatd * SATD_THRESH \
&& (dir^1) != odir \
&& (dir<0 || !p_visited[(mx)+(my)*16]) ) \
{ \
int cost; \
cache_mv[0] = cache_mv2[0] = mx; \
cache_mv[1] = cache_mv2[1] = my; \
cost = x264_rd_cost_part( h, i_lambda2, i8, m->i_pixel ); \
COPY3_IF_LT( bcost, cost, bmx, mx, bmy, my ); \
if(dir>=0) p_visited[(mx)+(my)*16] = 1; \
} \
}
#define SATD_THRESH 17/16
void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i8 )
{
// don't have to fill the whole mv cache rectangle
static const int pixel_mv_offs[] = { 0, 4, 4*8, 0 };
int16_t *cache_mv = h->mb.cache.mv[0][x264_scan8[i8*4]];
int16_t *cache_mv2 = cache_mv + pixel_mv_offs[m->i_pixel];
const int16_t *p_cost_mvx, *p_cost_mvy;
const int bw = x264_pixel_size[m->i_pixel].w>>2;
const int bh = x264_pixel_size[m->i_pixel].h>>2;
const int i_pixel = m->i_pixel;
DECLARE_ALIGNED( uint8_t, pix[16*16], 16 );
int bcost = m->i_pixel == PIXEL_16x16 ? m->cost : COST_MAX;
int bmx = m->mv[0];
int bmy = m->mv[1];
int pmx, pmy, omx, omy, i;
int odir = -1, bdir;
unsigned bsatd, satds[4];
int visited[16*13] = {0}; // only need 13x13, but 16 is more convenient
int *p_visited = &visited[6+6*16];
if( m->i_pixel != PIXEL_16x16 && i8 != 0 )
x264_mb_predict_mv( h, 0, i8*4, bw, m->mvp );
pmx = m->mvp[0];
pmy = m->mvp[1];
p_cost_mvx = m->p_cost_mv - pmx;
p_cost_mvy = m->p_cost_mv - pmy;
COST_MV_SATD( bmx, bmy, bsatd );
if( m->i_pixel != PIXEL_16x16 )
COST_MV_RD( bmx, bmy, 0, -1 );
/* check the predicted mv */
if( (bmx != pmx || bmy != pmy)
&& pmx >= h->mb.mv_min_spel[0] && pmx <= h->mb.mv_max_spel[0]
&& pmy >= h->mb.mv_min_spel[1] && pmy <= h->mb.mv_max_spel[1] )
{
int satd;
COST_MV_SATD( pmx, pmy, satd );
COST_MV_RD( pmx, pmy, satd, -1 );
}
/* mark mv and mvp as visited */
p_visited[0] = 1;
p_visited -= bmx + bmy*16;
{
int mx = bmx ^ m->mv[0] ^ pmx;
int my = bmy ^ m->mv[1] ^ pmy;
if( abs(mx-bmx) < 7 && abs(my-bmy) < 7 )
p_visited[mx + my*16] = 1;
}
/* hpel diamond */
bdir = -1;
for( i = 0; i < 2; i++ )
{
omx = bmx;
omy = bmy;
odir = bdir;
COST_MV_SATD( omx, omy - 2, satds[0] );
COST_MV_SATD( omx, omy + 2, satds[1] );
COST_MV_SATD( omx - 2, omy, satds[2] );
COST_MV_SATD( omx + 2, omy, satds[3] );
COST_MV_RD( omx, omy - 2, satds[0], 0 );
COST_MV_RD( omx, omy + 2, satds[1], 1 );
COST_MV_RD( omx - 2, omy, satds[2], 2 );
COST_MV_RD( omx + 2, omy, satds[3], 3 );
if( bmx == omx && bmy == omy )
break;
}
/* qpel diamond */
bdir = -1;
for( i = 0; i < 2; i++ )
{
omx = bmx;
omy = bmy;
odir = bdir;
COST_MV_SATD( omx, omy - 1, satds[0] );
COST_MV_SATD( omx, omy + 1, satds[1] );
COST_MV_SATD( omx - 1, omy, satds[2] );
COST_MV_SATD( omx + 1, omy, satds[3] );
COST_MV_RD( omx, omy - 1, satds[0], 0 );
COST_MV_RD( omx, omy + 1, satds[1], 1 );
COST_MV_RD( omx - 1, omy, satds[2], 2 );
COST_MV_RD( omx + 1, omy, satds[3], 3 );
if( bmx == omx && bmy == omy )
break;
}
m->cost = bcost;
m->mv[0] = bmx;
m->mv[1] = bmy;
x264_macroblock_cache_mv ( h, 2*(i8&1), i8&2, bw, bh, 0, bmx, bmy );
x264_macroblock_cache_mvd( h, 2*(i8&1), i8&2, bw, bh, 0, bmx - pmx, bmy - pmy );
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -