📄 me.c.svn-base
字号:
/***************************************************************************** * me.c: h264 encoder library (Motion Estimation) ***************************************************************************** * Copyright (C) 2003 Laurent Aimar * $Id: me.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $ * * Authors: Laurent Aimar <fenrir@via.ecp.fr> * Loren Merritt <lorenm@u.washington.edu> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. *****************************************************************************/#include <stdio.h>#include <string.h>#include "common/common.h"#include "me.h"/* presets selected from good points on the speed-vs-quality curve of several test videos * subpel_iters[i_subpel_refine] = { refine_hpel, refine_qpel, me_hpel, me_qpel } * where me_* are the number of EPZS iterations run on all candidate block types, * and refine_* are run only on the winner. */static const int subpel_iterations[][4] = {{1,0,0,0}, {1,1,0,0}, {0,1,1,0}, {0,2,1,0}, {0,2,1,1}, {0,2,1,2}, {0,0,2,2}};static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters, int *p_halfpel_thresh, int b_refine_qpel );#define COST_MV_INT( mx, my, bd, d ) \{ \ int cost = h->pixf.sad[i_pixel]( m->p_fenc[0], m->i_stride[0], \ &p_fref[(my)*m->i_stride[0]+(mx)], m->i_stride[0] ) \ + p_cost_mvx[ (mx)<<2 ] \ + p_cost_mvy[ (my)<<2 ]; \ if( cost < bcost ) \ { \ bcost = cost; \ bmx = mx; \ bmy = my; \ if( bd ) \ dir = d; \ } \}#define COST_MV( mx, my ) COST_MV_INT( mx, my, 0, 0 )#define COST_MV_DIR( mx, my, d ) COST_MV_INT( mx, my, 1, d )#define COST_MV_PDE( mx, my ) \{ \ int cost = h->pixf.sad_pde[i_pixel]( m->p_fenc[0], m->i_stride[0], \ &p_fref[(my)*m->i_stride[0]+(mx)], m->i_stride[0], \ bcost - p_cost_mvx[ (mx)<<2 ] - p_cost_mvy[ (my)<<2 ] ); \ if( cost < bcost - p_cost_mvx[ (mx)<<2 ] - p_cost_mvy[ (my)<<2 ] ) \ { \ bcost = cost + p_cost_mvx[ (mx)<<2 ] + p_cost_mvy[ (my)<<2 ]; \ bmx = mx; \ bmy = my; \ } \}#define DIA1_ITER( mx, my )\ {\ omx = mx; omy = my;\ COST_MV( omx , omy-1 );/* 1 */\ COST_MV( omx , omy+1 );/* 101 */\ COST_MV( omx-1, omy );/* 1 */\ COST_MV( omx+1, omy );\ }#define DIA2 \ {\ COST_MV( omx , omy-2 );\ COST_MV( omx-1, omy-1 );/* 1 */\ COST_MV( omx+1, omy-1 );/* 1 1 */\ COST_MV( omx-2, omy );/* 1 0 1 */\ COST_MV( omx+2, omy );/* 1 1 */\ COST_MV( omx-1, omy+1 );/* 1 */\ COST_MV( omx+1, omy+1 );\ COST_MV( omx , omy+2 );\ }\#define OCT2 \ {\ COST_MV( omx-1, omy-2 );\ COST_MV( omx+1, omy-2 );/* 1 1 */\ COST_MV( omx-2, omy-1 );/* 1 1 */\ COST_MV( omx+2, omy-1 );/* 0 */\ COST_MV( omx-2, omy+1 );/* 1 1 */\ COST_MV( omx+2, omy+1 );/* 1 1 */\ COST_MV( omx-1, omy+2 );\ COST_MV( omx+1, omy+2 );\ }#define CROSS( start, x_max, y_max ) \ { \ for( i = start; i < x_max; i+=2 ) \ { \ if( omx + i <= mv_x_max ) \ COST_MV( omx + i, omy ); \ if( omx - i >= mv_x_min ) \ COST_MV( omx - i, omy ); \ } \ for( i = start; i < y_max; i+=2 ) \ { \ if( omy + i <= mv_y_max ) \ COST_MV( omx, omy + i ); \ if( omy - i >= mv_y_min ) \ COST_MV( omx, omy - i ); \ } \ }void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int *p_halfpel_thresh ){ const int i_pixel = m->i_pixel; int i_me_range = h->param.analyse.i_me_range; int bmx, bmy, bcost; int omx, omy, pmx, pmy; uint8_t *p_fref = m->p_fref[0]; int i, j; int dir; int mv_x_min = h->mb.mv_min_fpel[0]; int mv_y_min = h->mb.mv_min_fpel[1]; int mv_x_max = h->mb.mv_max_fpel[0]; int mv_y_max = h->mb.mv_max_fpel[1]; const int16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0]; const int16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1]; if( h->mb.i_me_method == X264_ME_UMH ) { /* clamp mvp to inside frame+padding, so that we don't have to check it each iteration */ p_cost_mvx = m->p_cost_mv - x264_clip3( m->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] ); p_cost_mvy = m->p_cost_mv - x264_clip3( m->mvp[1], h->mb.mv_min_spel[1], h->mb.mv_max_spel[1] ); } bmx = pmx = x264_clip3( ( m->mvp[0] + 2 ) >> 2, mv_x_min, mv_x_max ); bmy = pmy = x264_clip3( ( m->mvp[1] + 2 ) >> 2, mv_y_min, mv_y_max ); bcost = COST_MAX; COST_MV( pmx, pmy ); /* I don't know why this helps */ bcost -= p_cost_mvx[ bmx<<2 ] + p_cost_mvy[ bmy<<2 ]; /* try extra predictors if provided */ for( i = 0; i < i_mvc; i++ ) { const int mx = x264_clip3( ( mvc[i][0] + 2 ) >> 2, mv_x_min, mv_x_max ); const int my = x264_clip3( ( mvc[i][1] + 2 ) >> 2, mv_y_min, mv_y_max ); if( mx != bmx || my != bmy ) COST_MV( mx, my ); } COST_MV( 0, 0 ); mv_x_max += 8; mv_y_max += 8; mv_x_min -= 8; mv_y_min -= 8; switch( h->mb.i_me_method ) { case X264_ME_DIA: /* diamond search, radius 1 */ for( i = 0; i < i_me_range; i++ ) { DIA1_ITER( bmx, bmy ); if( bmx == omx && bmy == omy ) break; } break; case X264_ME_HEX:me_hex2: /* hexagon search, radius 2 */#if 0 for( i = 0; i < i_me_range/2; i++ ) { omx = bmx; omy = bmy; COST_MV( omx-2, omy ); COST_MV( omx-1, omy+2 ); COST_MV( omx+1, omy+2 ); COST_MV( omx+2, omy ); COST_MV( omx+1, omy-2 ); COST_MV( omx-1, omy-2 ); if( bmx == omx && bmy == omy ) break; }#else /* equivalent to the above, but eliminates duplicate candidates */ dir = -1; omx = bmx; omy = bmy; COST_MV_DIR( omx-2, omy, 0 ); COST_MV_DIR( omx-1, omy+2, 1 ); COST_MV_DIR( omx+1, omy+2, 2 ); COST_MV_DIR( omx+2, omy, 3 ); COST_MV_DIR( omx+1, omy-2, 4 ); COST_MV_DIR( omx-1, omy-2, 5 ); if( dir != -1 ) { for( i = 1; i < i_me_range/2; i++ ) { static const int hex2[8][2] = {{-1,-2}, {-2,0}, {-1,2}, {1,2}, {2,0}, {1,-2}, {-1,-2}, {-2,0}}; static const int mod6[8] = {5,0,1,2,3,4,5,0}; const int odir = mod6[dir+1]; omx = bmx; omy = bmy; COST_MV_DIR( omx + hex2[odir+0][0], omy + hex2[odir+0][1], odir-1 ); COST_MV_DIR( omx + hex2[odir+1][0], omy + hex2[odir+1][1], odir ); COST_MV_DIR( omx + hex2[odir+2][0], omy + hex2[odir+2][1], odir+1 ); if( bmx == omx && bmy == omy ) break; } }#endif /* square refine */ DIA1_ITER( bmx, bmy ); COST_MV( omx-1, omy-1 ); COST_MV( omx-1, omy+1 ); COST_MV( omx+1, omy-1 ); COST_MV( omx+1, omy+1 ); break; case X264_ME_UMH: { /* Uneven-cross Multi-Hexagon-grid Search * as in JM, except with different early termination */ static const int x264_pixel_size_shift[7] = { 0, 1, 1, 2, 3, 3, 4 }; int ucost1, ucost2; int cross_start = 1; /* refine predictors */ ucost1 = bcost; DIA1_ITER( pmx, pmy ); if( pmx || pmy ) DIA1_ITER( 0, 0 ); if(i_pixel == PIXEL_4x4) goto me_hex2; ucost2 = bcost; if( (bmx || bmy) && (bmx!=pmx || bmy!=pmy) ) DIA1_ITER( bmx, bmy ); if( bcost == ucost2 ) cross_start = 3; omx = bmx; omy = bmy; /* early termination */#define SAD_THRESH(v) ( bcost < ( v >> x264_pixel_size_shift[i_pixel] ) ) if( bcost == ucost2 && SAD_THRESH(2000) ) { DIA2; if( bcost == ucost1 && SAD_THRESH(500) ) break; if( bcost == ucost2 ) { int range = (i_me_range>>1) | 1; CROSS( 3, range, range ); OCT2; if( bcost == ucost2 ) break; cross_start = range + 2; } } /* adaptive search range */ if( i_mvc ) { /* range multipliers based on casual inspection of some statistics of * average distance between current predictor and final mv found by ESA. * these have not been tuned much by actual encoding. */ static const int range_mul[4][4] = { { 3, 3, 4, 4 }, { 3, 4, 4, 4 }, { 4, 4, 4, 5 }, { 4, 4, 5, 6 }, }; int mvd; int sad_ctx, mvd_ctx; if( i_mvc == 1 ) { if( i_pixel == PIXEL_16x16 ) /* mvc is probably the same as mvp, so the difference isn't meaningful. * but prediction usually isn't too bad, so just use medium range */ mvd = 25; else mvd = abs( m->mvp[0] - mvc[0][0] ) + abs( m->mvp[1] - mvc[0][1] ); } else { /* calculate the degree of agreement between predictors. */ /* in 16x16, mvc includes all the neighbors used to make mvp, * so don't count mvp separately. */ int i_denom = i_mvc - 1; mvd = 0; if( i_pixel != PIXEL_16x16 ) { mvd = abs( m->mvp[0] - mvc[0][0] ) + abs( m->mvp[1] - mvc[0][1] ); i_denom++; } for( i = 0; i < i_mvc-1; i++ ) mvd += abs( mvc[i][0] - mvc[i+1][0] ) + abs( mvc[i][1] - mvc[i+1][1] ); mvd /= i_denom; //FIXME idiv } sad_ctx = SAD_THRESH(1000) ? 0 : SAD_THRESH(2000) ? 1 : SAD_THRESH(4000) ? 2 : 3; mvd_ctx = mvd < 10 ? 0 : mvd < 20 ? 1 : mvd < 40 ? 2 : 3; i_me_range = i_me_range * range_mul[mvd_ctx][sad_ctx] / 4; } /* FIXME if the above DIA2/OCT2/CROSS found a new mv, it has not updated omx/omy. * we are still centered on the same place as the DIA2. is this desirable? */ CROSS( cross_start, i_me_range, i_me_range/2 ); /* 5x5 ESA */ omx = bmx; omy = bmy; for( i = (bcost == ucost2) ? 4 : 0; i < 24; i++ ) { static const int square2[24][2] = { { 1, 0}, { 0, 1}, {-1, 0}, { 0,-1}, { 1, 1}, {-1, 1}, {-1,-1}, { 1,-1}, { 2,-1}, { 2, 0}, { 2, 1}, { 2, 2}, { 1, 2}, { 0, 2}, {-1, 2}, {-2, 2}, {-2, 1}, {-2, 0}, {-2,-1}, {-2,-2}, {-1,-2}, { 0,-2}, { 1,-2}, { 2,-2} }; COST_MV( omx + square2[i][0], omy + square2[i][1] ); } /* hexagon grid */ omx = bmx; omy = bmy;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -