📄 simplified_fast_me.c
字号:
/*!
*************************************************************************************
*
* \file simplified_fast_me.c
*
* \brief
* Fast integer pixel and sub pixel motion estimation
* Improved and simplified from the original UMHexagonS algorithms
* See JVT-P021 for details
*
* \author
* Main contributors: (see contributors.h for copyright, address and affiliation details)
* - Zhibo Chen <chenzhibo@tsinghua.org.cn>
* - JianFeng Xu <fenax@video.mdc.tsinghua.edu.cn>
* - Wenfang Fu <fwf@video.mdc.tsinghua.edu.cn>
*
* - Xiaoquan Yi <xyi@engr.scu.edu>
* - Jun Zhang <jzhang2@engr.scu.edu>
*
* \date
* 16. June 2005
*************************************************************************************
*/
#include <stdlib.h>
#include <string.h>
#include "global.h"
#include "memalloc.h"
#include "simplified_fast_me.h"
#include "refbuf.h"
extern unsigned int *byte_abs;
extern int *mvbits;
static const short Diamond_X[4] = {-1, 1, 0, 0};
static const short Diamond_Y[4] = { 0, 0,-1, 1};
static const short Hexagon_X[6] = {-2, 2,-1, 1,-1, 1};
static const short Hexagon_Y[6] = { 0, 0,-2, 2, 2,-2};
static const short Big_Hexagon_X[16] = {-4, 4, 0, 0,-4, 4,-4, 4,-4, 4,-4, 4,-2, 2,-2, 2};
static const short Big_Hexagon_Y[16] = { 0, 0,-4, 4,-1, 1, 1,-1,-2, 2, 2,-2,-3, 3, 3,-3};
static pel_t *(*get_line) (pel_t**, int, int, int, int);
// Macro for motion estimation cost computation per match
#define SEARCH_ONE_PIXEL_HELPER \
if(absm(cand_x - center_x) <= search_range && absm(cand_y - center_y) <= search_range) \
{ \
mcost = MV_COST (lambda_factor, mvshift, cand_x, cand_y, pred_x, pred_y); \
mcost = simplified_partial_SAD_calculate(ref_pic, orig_pic, get_ref_line, height, \
blocksize_y, blocksize_x, blocksize_x4, mcost, min_mcost, cand_x, cand_y); \
if (mcost < min_mcost) \
{ \
best_x = cand_x; \
best_y = cand_y; \
min_mcost = mcost; \
} \
}
/*!
************************************************************************
* \brief
* Set thresholds for fast motion estimation
* Those thresholds may be adjusted to trade off rate-distortion
* performance and FME speed
************************************************************************
*/
void simplified_init_FME()
{
SymmetricalCrossSearchThreshold1 = 800;
SymmetricalCrossSearchThreshold2 = 7000;
ConvergeThreshold = 1000;
SubPelThreshold1 = 1000;
SubPelThreshold3 = 400;
}
/*!
************************************************************************
* \brief
* Allocation of space for fast motion estimation
************************************************************************
*/
int simplified_get_mem_FME()
{
int memory_size = 0;
if (NULL==(simplified_flag_intra = calloc((img->width>>4)+1, sizeof(byte))))
no_mem_exit("simplified_get_mem_FME: simplified_flag_intra");
memory_size += get_mem3Dint(&simplified_fastme_l0_cost, 9, img->height/4, img->width/4);
memory_size += get_mem3Dint(&simplified_fastme_l1_cost, 9, img->height/4, img->width/4);
memory_size += get_mem2D(&simplified_SearchState, 7, 7);
return memory_size;
}
/*!
************************************************************************
* \brief
* Free space for fast motion estimation
************************************************************************
*/
void simplified_free_mem_FME()
{
free_mem3Dint(simplified_fastme_l0_cost, 9);
free_mem3Dint(simplified_fastme_l1_cost, 9);
free_mem2D(simplified_SearchState);
free (simplified_flag_intra);
}
/*!
************************************************************************
* \brief
* Partial SAD calculation for fast motion estimation
************************************************************************
*/
int simplified_partial_SAD_calculate(pel_t* ref_pic,
pel_t** orig_pic,
pel_t* (*get_ref_line)(int, pel_t*, int, int, int, int),
int height,
int blocksize_y,
int blocksize_x,
int blocksize_x4,
int mcost,
int min_mcost,
int cand_x,
int cand_y)
{
unsigned short y, x4;
pel_t *orig_line, *ref_line;
for (y = 0; y < blocksize_y; y++)
{
ref_line = get_ref_line (blocksize_x, ref_pic, cand_y+y, cand_x, height, img->width);
orig_line = orig_pic[y];
for (x4 = 0; x4 < blocksize_x4; x4++)
{
mcost += byte_abs[ *orig_line++ - *ref_line++ ];
mcost += byte_abs[ *orig_line++ - *ref_line++ ];
mcost += byte_abs[ *orig_line++ - *ref_line++ ];
mcost += byte_abs[ *orig_line++ - *ref_line++ ];
}
if (mcost >= min_mcost)
break;
}
return mcost;
}
/*!
************************************************************************
* \brief
* Add up SAD for sub pixel for fast motion estimation
************************************************************************
*/
int simplified_add_up_SAD_quarter_pel(int pic_pix_x,
int pic_pix_y,
int blocksize_x,
int blocksize_y,
int cand_mv_x,
int cand_mv_y,
StorablePicture *ref_picture,
pel_t** orig_pic,
int Mvmcost,
int min_mcost,
int useABT,
int blocktype)
{
int j, i, k;
int diff[16], *d;
int mcost = Mvmcost;
int c_diff[MB_PIXELS];
int y_offset, ypels =(128 - ((blocktype == 3)<<6));
int ry0, ry4, ry8, ry12;
int y0, y1, y2, y3;
int x0, x1, x2, x3;
int abort_search, rx0;
int img_width = (ref_picture->size_x + (IMG_PAD_SIZE<<1) - 1)<<2;
int img_height = (ref_picture->size_y + (IMG_PAD_SIZE<<1) - 1)<<2;
//===== Use weighted Reference for ME ====
pel_t **ref_pic;
pel_t *ref_line;
pel_t *orig_line;
int apply_weights = ( (active_pps->weighted_pred_flag &&
(img->type == P_SLICE || img->type == SP_SLICE)) ||
(active_pps->weighted_bipred_idc && (img->type == B_SLICE)) );
if (apply_weights && input->UseWeightedReferenceME)
{
ref_pic = ref_picture->imgY_ups_w;
}
else
{
ref_pic = ref_picture->imgY_ups;
}
for (y0 = 0, abort_search = 0; y0 < blocksize_y && !abort_search; y0 += 4)
{
y_offset = (y0>7)*ypels;
ry0 = (y0<<2) + cand_mv_y;
ry4 = ry0 + 4;
ry8 = ry4 + 4;
ry12 = ry8 + 4;
y1 = y0 + 1;
y2 = y1 + 1;
y3 = y2 + 1;
for (x0 = 0; x0 < blocksize_x; x0 += 4)
{
rx0 = (x0<<2) + cand_mv_x;
x1 = x0 + 1;
x2 = x1 + 1;
x3 = x2 + 1;
d = diff;
orig_line = orig_pic [y0];
ref_line = get_line (ref_pic, ry0, rx0, img_height, img_width);
*d++ = orig_line[x0] - *(ref_line );
*d++ = orig_line[x1] - *(ref_line + 4 );
*d++ = orig_line[x2] - *(ref_line + 8 );
*d++ = orig_line[x3] - *(ref_line + 12);
orig_line = orig_pic [y1];
ref_line = get_line (ref_pic, ry4, rx0, img_height, img_width);
*d++ = orig_line[x0] - *(ref_line );
*d++ = orig_line[x1] - *(ref_line + 4 );
*d++ = orig_line[x2] - *(ref_line + 8 );
*d++ = orig_line[x3] - *(ref_line + 12);
orig_line = orig_pic [y2];
ref_line = get_line (ref_pic, ry8, rx0, img_height, img_width);
*d++ = orig_line[x0] - *(ref_line );
*d++ = orig_line[x1] - *(ref_line += 4);
*d++ = orig_line[x2] - *(ref_line += 4);
*d++ = orig_line[x3] - *(ref_line += 4);
orig_line = orig_pic [y3];
ref_line = get_line (ref_pic, ry12, rx0, img_height, img_width);
*d++ = orig_line[x0] - *(ref_line );
*d++ = orig_line[x1] - *(ref_line += 4);
*d++ = orig_line[x2] - *(ref_line += 4);
*d = orig_line[x3] - *(ref_line += 4);
if (!useABT)
{
if ((mcost += SATD (diff, input->hadamard)) >= min_mcost) {
abort_search = 1;
break;
}
}
else
{ // copy diff to curr_diff for ABT SATD calculation
i = (x0&0x7) + (x0>7) * 64 + y_offset;
for(k=0, j=y0; j<BLOCK_SIZE + y0; j++, k+=BLOCK_SIZE)
{
memcpy(&(c_diff[i + ((j&0x7)<<3)]), &diff[k], BLOCK_SIZE*sizeof(int));
}
}
}
}
// Adaptive Block Transform
if(useABT)
{
mcost += find_SATD (c_diff, blocktype);
}
return mcost;
}
/*!
************************************************************************
* \brief
* Fast integer pixel block motion estimation
************************************************************************
*/
int // ==> minimum motion cost after search
simplified_FastIntegerPelBlockMotionSearch (
pel_t **orig_pic, // <-- not used
short ref, // <-- reference frame (0... or -1 (backward))
int list, // <-- reference picture list
int pic_pix_x, // <-- absolute x-coordinate of regarded AxB block
int pic_pix_y, // <-- absolute y-coordinate of regarded AxB block
int blocktype, // <-- block type (1-16x16 ... 7-4x4)
short pred_mv_x, // <-- motion vector predictor (x) in sub-pel units
short pred_mv_y, // <-- motion vector predictor (y) in sub-pel units
short* mv_x, // --> motion vector (x) - in pel units
short* mv_y, // --> motion vector (y) - in pel units
int search_range, // <-- 1-d search range in pel units
int min_mcost, // <-- minimum motion cost (cost for center or huge value)
int lambda_factor) // <-- lagrangian parameter for determining motion cost
{
short mvshift = 2;
int blocksize_y = input->blc_size[blocktype][1];
int blocksize_x = input->blc_size[blocktype][0];
int blocksize_x4 = blocksize_x >> 2;
int pred_x = (pic_pix_x << mvshift) + pred_mv_x;
int pred_y = (pic_pix_y << mvshift) + pred_mv_y;
int center_x = pic_pix_x + *mv_x;
int center_y = pic_pix_y + *mv_y;
int best_x = 0, best_y = 0;
int search_step, iYMinNow, iXMinNow;
int cand_x, cand_y, mcost;
unsigned short i, m;
pel_t *ref_pic;
pel_t *(*get_ref_line)(int, pel_t*, int, int, int, int);
short list_offset = ((img->MbaffFrameFlag)&&(img->mb_data[img->current_mb_nr].mb_field))?
img->current_mb_nr%2 ? 4 : 2 : 0;
int height = ((img->MbaffFrameFlag)&&(img->mb_data[img->current_mb_nr].mb_field))?
(img->height+1)>>1 : img->height;
//===== Use weighted Reference for ME ====
int apply_weights = ( (active_pps->weighted_pred_flag &&
(img->type == P_SLICE || img->type == SP_SLICE)) ||
(active_pps->weighted_bipred_idc && (img->type == B_SLICE)) );
if (apply_weights && input->UseWeightedReferenceME)
{
ref_pic = listX[list+list_offset][ref]->imgY_11_w;
}
else
{
ref_pic = listX[list+list_offset][ref]->imgY_11;
}
//===== set function for getting reference picture lines =====
if ((center_x > search_range) && (center_x < img->width-1-search_range-blocksize_x) &&
(center_y > search_range) && (center_y < height-1-search_range-blocksize_y))
{
get_ref_line = FastLineX;
}
else
{
get_ref_line = UMVLineX;
}
// Check the center median predictor
cand_x = center_x ;
cand_y = center_y ;
mcost = MV_COST (lambda_factor, mvshift, cand_x, cand_y, pred_x, pred_y);
mcost = simplified_partial_SAD_calculate (ref_pic, orig_pic, get_ref_line,
height, blocksize_y, blocksize_x,
blocksize_x4, mcost, min_mcost, cand_x, cand_y);
if (mcost < min_mcost)
{
min_mcost = mcost;
best_x = cand_x;
best_y = cand_y;
}
iXMinNow = best_x;
iYMinNow = best_y;
if ((0 != pred_mv_x) || (0 != pred_mv_y))
{
cand_x = pic_pix_x;
cand_y = pic_pix_y;
SEARCH_ONE_PIXEL_HELPER
}
// If the min_mcost is small enough, do a local search then terminate
// Ihis is good for stationary or quasi-stationary areas
if (min_mcost < (ConvergeThreshold>>block_type_shift_factor[blocktype]))
{
for (m = 0; m < 4; m++)
{
cand_x = iXMinNow + Diamond_X[m];
cand_y = iYMinNow + Diamond_Y[m];
SEARCH_ONE_PIXEL_HELPER
}
*mv_x = best_x - pic_pix_x;
*mv_y = best_y - pic_pix_y;
return min_mcost;
}
// Small local search
for (m = 0; m < 4; m++)
{
cand_x = iXMinNow + Diamond_X[m];
cand_y = iYMinNow + Diamond_Y[m];
SEARCH_ONE_PIXEL_HELPER
}
// First_step: Symmetrical-cross search
// If distortion is large, use large shapes. Otherwise, compact shapes are faster
if ( (blocktype == 1 &&
min_mcost > (SymmetricalCrossSearchThreshold1>>block_type_shift_factor[blocktype])) ||
(min_mcost > (SymmetricalCrossSearchThreshold2>>block_type_shift_factor[blocktype])) )
{
iXMinNow = best_x;
iYMinNow = best_y;
for(i = 1; i <= search_range/2; i++)
{
search_step = (i<<1) - 1;
cand_x = iXMinNow + search_step;
cand_y = iYMinNow;
SEARCH_ONE_PIXEL_HELPER
cand_x = iXMinNow - search_step;
SEARCH_ONE_PIXEL_HELPER
cand_x = iXMinNow;
cand_y = iYMinNow + search_step;
SEARCH_ONE_PIXEL_HELPER
cand_y = iYMinNow - search_step;
SEARCH_ONE_PIXEL_HELPER
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -