⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 simplified_fast_me.c

📁 JM 11.0 KTA 2.1 Source Code
💻 C
📖 第 1 页 / 共 2 页
字号:

/*!
*************************************************************************************
*
* \file simplified_fast_me.c
*
* \brief
*   Fast integer pixel and sub pixel motion estimation
*   Improved and simplified from the original UMHexagonS algorithms
*   See JVT-P021 for details
*
* \author 
*    Main contributors: (see contributors.h for copyright, address and affiliation details)
*    - Zhibo Chen                      <chenzhibo@tsinghua.org.cn>
*    - JianFeng Xu                     <fenax@video.mdc.tsinghua.edu.cn>  
*    - Wenfang Fu                      <fwf@video.mdc.tsinghua.edu.cn>
*
*    - Xiaoquan Yi                     <xyi@engr.scu.edu>
*    - Jun Zhang                       <jzhang2@engr.scu.edu>
*
* \date
*    16. June 2005
*************************************************************************************
*/

#include <stdlib.h>
#include <string.h>

#include "global.h"
#include "memalloc.h"
#include "simplified_fast_me.h"
#include "refbuf.h"

#ifdef MV_COMPETITION
#include "mv_competition.h"
#endif

extern  unsigned int *byte_abs;
extern           int *mvbits;

static const short Diamond_X[4]      = {-1, 1, 0, 0};
static const short Diamond_Y[4]      = { 0, 0,-1, 1};
static const short Hexagon_X[6]      = {-2, 2,-1, 1,-1, 1};
static const short Hexagon_Y[6]      = { 0, 0,-2, 2, 2,-2};
static const short Big_Hexagon_X[16] = {-4, 4, 0, 0,-4, 4,-4, 4,-4, 4,-4, 4,-2, 2,-2, 2};
static const short Big_Hexagon_Y[16] = { 0, 0,-4, 4,-1, 1, 1,-1,-2, 2, 2,-2,-3, 3, 3,-3};
static const short block_type_shift_factor[8] = {0, 0, 1, 1, 2, 3, 3, 1}; // last one relaxed to 1 instead 4

static pel_t *(*get_line) (pel_t**, int, int, int, int);

// Macro for motion estimation cost computation per match
#define SEARCH_ONE_PIXEL_HELPER                                                         \
  if(absm(cand_x - center_x) <= search_range && absm(cand_y - center_y) <= search_range)  \
{                                                                                       \
  mcost = MV_COST (lambda_factor, mvshift, cand_x, cand_y, pred_x, pred_y);             \
  mcost = simplified_partial_SAD_calculate(ref_pic, orig_pic, get_ref_line, height,     \
  blocksize_y, blocksize_x, blocksize_x4, mcost, min_mcost, cand_x, cand_y); \
  if (mcost < min_mcost)                                                                \
{                                                                                     \
  best_x    = cand_x;                                                                 \
  best_y    = cand_y;                                                                 \
  min_mcost = mcost;                                                                  \
}                                                                                     \
}

/*!
************************************************************************
* \brief
*    Set thresholds for fast motion estimation
*    Those thresholds may be adjusted to trade off rate-distortion
*    performance and FME speed
************************************************************************
*/
void simplified_init_FME()
{
  SymmetricalCrossSearchThreshold1 =  800;
  SymmetricalCrossSearchThreshold2 = 7000;
  ConvergeThreshold                = 1000;
  SubPelThreshold1                 = 1000;
  SubPelThreshold3                 =  400;
}

/*!
************************************************************************
* \brief
*    Allocation of space for fast motion estimation
************************************************************************
*/
int simplified_get_mem_FME()
{
  int memory_size = 0;
  if (NULL==(simplified_flag_intra = calloc((img->width>>4)+1, sizeof(byte))))
    no_mem_exit("simplified_get_mem_FME: simplified_flag_intra");
  
  memory_size += get_mem3Dint(&simplified_fastme_l0_cost, 9, img->height/4, img->width/4);
  memory_size += get_mem3Dint(&simplified_fastme_l1_cost, 9, img->height/4, img->width/4);
  memory_size += get_mem2D(&simplified_SearchState, 7, 7);
  
  return memory_size;
}

/*!
************************************************************************
* \brief
*    Free space for fast motion estimation
************************************************************************
*/
void simplified_free_mem_FME()
{
  free_mem3Dint(simplified_fastme_l0_cost, 9);
  free_mem3Dint(simplified_fastme_l1_cost, 9);
  free_mem2D(simplified_SearchState);
  
  free (simplified_flag_intra);
}

/*!
************************************************************************
* \brief
*    Partial SAD calculation for fast motion estimation
************************************************************************
*/
int simplified_partial_SAD_calculate(pel_t*   ref_pic,
                                     pel_t**  orig_pic,
                                     pel_t* (*get_ref_line)(int, pel_t*, int, int, int, int),
                                     int      height,
                                     int      blocksize_y,
                                     int      blocksize_x,
                                     int      blocksize_x4,
                                     int      mcost,
                                     int      min_mcost,
                                     int      cand_x,
                                     int      cand_y)
{
  unsigned short    y, x4;
  pel_t *orig_line, *ref_line;
  
  for (y = 0; y < blocksize_y; y++)
  {
    ref_line  = get_ref_line (blocksize_x, ref_pic, cand_y+y, cand_x, height, img->width);
    orig_line = orig_pic[y];
    
    for (x4 = 0; x4 < blocksize_x4; x4++)
    {
      mcost += byte_abs[ *orig_line++ - *ref_line++ ];
      mcost += byte_abs[ *orig_line++ - *ref_line++ ];
      mcost += byte_abs[ *orig_line++ - *ref_line++ ];
      mcost += byte_abs[ *orig_line++ - *ref_line++ ];
    }
    if (mcost >= min_mcost)
      break;
  }
  return mcost;
}

/*!
************************************************************************
* \brief
*    Add up SAD for sub pixel for fast motion estimation
************************************************************************
*/
int simplified_add_up_SAD_quarter_pel(int   pic_pix_x,
                                      int                     pic_pix_y,
                                      int                     blocksize_x,
                                      int                     blocksize_y,
                                      int                     cand_mv_x,
                                      int                     cand_mv_y,
                                      StorablePicture        *ref_picture,
                                      pel_t**                 orig_pic,
                                      int                     Mvmcost,
                                      int                     min_mcost,
                                      int                     useABT,
                                      int                     blocktype)
{
  int j, i, k;  
  int diff[16], *d; 
  int mcost = Mvmcost;
  int c_diff[MB_PIXELS];
  int y_offset, ypels =(128 - ((blocktype == 3)<<6));
  int ry0, ry4, ry8, ry12;
  int y0, y1, y2, y3;
  int x0, x1, x2, x3;
  int abort_search, rx0; 
  int img_width  = (ref_picture->size_x + (IMG_PAD_SIZE<<1) - 1)<<2;
  int img_height = (ref_picture->size_y + (IMG_PAD_SIZE<<1) - 1)<<2;
  
  //===== Use weighted Reference for ME ====
  pel_t **ref_pic;      
  pel_t *ref_line;
  pel_t *orig_line;
  int    apply_weights = ( (active_pps->weighted_pred_flag && 
    (img->type == P_SLICE || img->type == SP_SLICE)) ||
    (active_pps->weighted_bipred_idc && (img->type == B_SLICE)) );  
  
  if (apply_weights && input->UseWeightedReferenceME)
  {
    ref_pic = ref_picture->imgY_ups_w;
  }
  else
  {
    ref_pic = ref_picture->imgY_ups;
  }
  
  for (y0 = 0, abort_search = 0; y0 < blocksize_y && !abort_search; y0 += 4)
  {
    y_offset = (y0>7)*ypels;
    ry0  = (y0<<2) + cand_mv_y;
    ry4  = ry0 + 4;
    ry8  = ry4 + 4;
    ry12 = ry8 + 4;
    y1   = y0  + 1;
    y2   = y1  + 1;
    y3   = y2  + 1;
    
    for (x0 = 0; x0 < blocksize_x; x0 += 4)
    {
      rx0 = (x0<<2) + cand_mv_x;
      x1  = x0 + 1;
      x2  = x1 + 1;
      x3  = x2 + 1;
      d   = diff;
      
      orig_line = orig_pic [y0];    
      ref_line  = get_line (ref_pic, ry0, rx0, img_height, img_width);
      *d++      = orig_line[x0] - *(ref_line     );
      *d++      = orig_line[x1] - *(ref_line + 4 );
      *d++      = orig_line[x2] - *(ref_line + 8 );
      *d++      = orig_line[x3] - *(ref_line + 12);
      
      orig_line = orig_pic [y1];    
      ref_line  = get_line (ref_pic, ry4, rx0, img_height, img_width);
      *d++      = orig_line[x0] - *(ref_line     );
      *d++      = orig_line[x1] - *(ref_line + 4 );
      *d++      = orig_line[x2] - *(ref_line + 8 );
      *d++      = orig_line[x3] - *(ref_line + 12);
      
      orig_line = orig_pic [y2];
      ref_line  = get_line (ref_pic, ry8, rx0, img_height, img_width);
      *d++      = orig_line[x0] - *(ref_line     );
      *d++      = orig_line[x1] - *(ref_line += 4);
      *d++      = orig_line[x2] - *(ref_line += 4);
      *d++      = orig_line[x3] - *(ref_line += 4);
      
      orig_line = orig_pic [y3];    
      ref_line  = get_line (ref_pic, ry12, rx0, img_height, img_width);
      *d++      = orig_line[x0] - *(ref_line     );
      *d++      = orig_line[x1] - *(ref_line += 4);
      *d++      = orig_line[x2] - *(ref_line += 4);
      *d        = orig_line[x3] - *(ref_line += 4);
      
      if (!useABT)
      {
        if ((mcost += SATD (diff, input->hadamard)) >= min_mcost)
        {
          abort_search = 1;
          break;
        }
      }
      else
      { // copy diff to curr_diff for ABT SATD calculation
        i = (x0&0x7) +  (x0>7) * 64 + y_offset;
        for(k=0, j=y0; j<BLOCK_SIZE + y0; j++, k+=BLOCK_SIZE)
        {
          memcpy(&(c_diff[i + ((j&0x7)<<3)]), &diff[k], BLOCK_SIZE*sizeof(int));
        }
      }
    }
  }
  
  // Adaptive Block Transform
  if(useABT)
  {
    mcost += find_SATD (c_diff, blocktype);
  }
  
  return mcost;
}

/*!
************************************************************************
* \brief
*    Fast integer pixel block motion estimation
************************************************************************
*/
int                                     //  ==> minimum motion cost after search
simplified_FastIntegerPelBlockMotionSearch (
                                            pel_t   **orig_pic,      // <--  not used
                                            short     ref,           // <--  reference frame (0... or -1 (backward))
                                            int       list,          // <--  reference picture list
                                            int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
                                            int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
                                            int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
                                            short     pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units
                                            short     pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units
                                            short*    mv_x,          //  --> motion vector (x) - in pel units
                                            short*    mv_y,          //  --> motion vector (y) - in pel units
                                            int       search_range,  // <--  1-d search range in pel units                         
                                            int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
                                            int       lambda_factor) // <--  lagrangian parameter for determining motion cost
{
  short mvshift       = 2;
  int   blocksize_y   = input->blc_size[blocktype][1];
  int   blocksize_x   = input->blc_size[blocktype][0];
  int   blocksize_x4  = blocksize_x >> 2;
  int   pred_x        = (pic_pix_x << mvshift) + pred_mv_x;
  int   pred_y        = (pic_pix_y << mvshift) + pred_mv_y;
  int   center_x      = pic_pix_x + *mv_x;
  int   center_y      = pic_pix_y + *mv_y;
  int   best_x        = 0, best_y = 0;
  int   search_step, iYMinNow, iXMinNow;
  int   cand_x, cand_y, mcost;
  
  unsigned short        i, m; 
  pel_t                *ref_pic;
  pel_t *(*get_ref_line)(int, pel_t*, int, int, int, int);
  
  short list_offset   = ((img->MbaffFrameFlag)&&(img->mb_data[img->current_mb_nr].mb_field))?
    img->current_mb_nr%2 ? 4 : 2 : 0;
  int   height        = ((img->MbaffFrameFlag)&&(img->mb_data[img->current_mb_nr].mb_field))?
    (img->height+1)>>1 : img->height;
  
  //===== Use weighted Reference for ME ====
  int  apply_weights = ( (active_pps->weighted_pred_flag  && 
    (img->type == P_SLICE || img->type == SP_SLICE)) ||
    (active_pps->weighted_bipred_idc && (img->type == B_SLICE)) );  
  
  if (apply_weights && input->UseWeightedReferenceME)
  {
    ref_pic       = listX[list+list_offset][ref]->imgY_11_w;
  }
  else
  {
    ref_pic       = listX[list+list_offset][ref]->imgY_11;
  }
  
  //===== set function for getting reference picture lines =====
  if ((center_x > search_range) && (center_x < img->width-1-search_range-blocksize_x) &&
    (center_y > search_range) && (center_y < height-1-search_range-blocksize_y))
  {
    get_ref_line = FastLineX;
  }
  else
  {
    get_ref_line = UMVLineX;
  }
  
  // Check the center median predictor
  cand_x = center_x ;
  cand_y = center_y ;
  mcost  = MV_COST    (lambda_factor, mvshift, cand_x, cand_y, pred_x, pred_y);
  mcost  = simplified_partial_SAD_calculate (ref_pic, orig_pic, get_ref_line,
    height, blocksize_y, blocksize_x,
    blocksize_x4, mcost, min_mcost, cand_x, cand_y);
  
  if (mcost < min_mcost)
  {
    min_mcost = mcost;
    best_x    = cand_x;
    best_y    = cand_y;
  }
  
  iXMinNow = best_x;
  iYMinNow = best_y;
  if ((0 != pred_mv_x) || (0 != pred_mv_y))
  {
    cand_x = pic_pix_x;
    cand_y = pic_pix_y;
    SEARCH_ONE_PIXEL_HELPER
  } 
  
  // If the min_mcost is small enough, do a local search then terminate
  // Ihis is good for stationary or quasi-stationary areas
  if (min_mcost < (ConvergeThreshold>>block_type_shift_factor[blocktype]))
  {
    for (m = 0; m < 4; m++)
    {   
      cand_x = iXMinNow + Diamond_X[m];
      cand_y = iYMinNow + Diamond_Y[m];
      SEARCH_ONE_PIXEL_HELPER
    }
    *mv_x = best_x - pic_pix_x;
    *mv_y = best_y - pic_pix_y; 
    return min_mcost;
  }
  
  // Small local search
  for (m = 0; m < 4; m++)
  {   
    cand_x = iXMinNow + Diamond_X[m];
    cand_y = iYMinNow + Diamond_Y[m];
    SEARCH_ONE_PIXEL_HELPER
  }
  
  // First_step: Symmetrical-cross search
  // If distortion is large, use large shapes. Otherwise, compact shapes are faster
  if ( (blocktype == 1 && 
    min_mcost > (SymmetricalCrossSearchThreshold1>>block_type_shift_factor[blocktype])) ||
    (min_mcost > (SymmetricalCrossSearchThreshold2>>block_type_shift_factor[blocktype])) )
  {
    iXMinNow = best_x;
    iYMinNow = best_y;
    
    for(i = 1; i <= search_range/2; i++)
    {
      search_step = (i<<1) - 1;
      cand_x = iXMinNow + search_step;
      cand_y = iYMinNow;
      SEARCH_ONE_PIXEL_HELPER
        
        cand_x = iXMinNow - search_step;
      SEARCH_ONE_PIXEL_HELPER
        
        cand_x = iXMinNow;
      cand_y = iYMinNow + search_step;
      SEARCH_ONE_PIXEL_HELPER
        
        cand_y = iYMinNow - search_step;
      SEARCH_ONE_PIXEL_HELPER
    }
    
    // Hexagon Search
    iXMinNow = best_x;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -