fast_me.c

来自「H264编码器,比较3步搜索法和全搜索等的信燥比,平均搜索点数,搜索时间等」· C语言代码 · 共 733 行 · 第 1/2 页
733 行
	
	int   img_width     = listX[list][ref]->size_x;
	int   img_height    = listX[list][ref]->size_y;
	int   best_x      = center_x;       // position with minimum motion cost
	int   best_y      = center_y;
	int   best_step      = 0;       // position with minimum motion cost
	int   best_pos      = 0;
	
	int   distx[3][9]={{0,-4,0,4,-4,4,-4,0,4},{0,-2,0,2,-2,2,-2,0,2},{0,-1,0,1,-1,1,-1,0,1}};
	int   disty[3][9]={{0,-4,-4,-4,0,0,4,4,4},{0,-2,0,2,-2,2,-2,0,2},{0,-1,0,1,-1,1,-1,0,1}};
	
	
	pel_t *orig_line, *ref_line;
	pel_t *(*get_ref_line)(int, pel_t*, int, int, int, int);
	
	if ((center_x > search_range) && (center_x < img->width -1-search_range-blocksize_x) &&
		(center_y > search_range) && (center_y < img->height-1-search_range-blocksize_y)   )
	{
		get_ref_line = FastLineX;
	}
	else
	{
		get_ref_line = UMVLineX;
	}
	
	
	for(step=0;step<3;step++)
	{
		for(pos=(step>0)? 1:0;pos<9;pos++)
		{
			
			
			cand_x=center_x+distx[step][pos];
			cand_y=center_y+disty[step][pos];
			
			
			for (y=0; y<blocksize_y; y++)
			{
				ref_line  = get_ref_line (blocksize_x, ref_pic, cand_y+y, cand_x, img_height, img_width);
				orig_line = orig_pic [y];
				
				for (x=0; x<blocksize_x>>2; x++)
				{
					mcost += byte_abs[( *orig_line++ - *ref_line++ )];
					mcost += byte_abs[( *orig_line++ - *ref_line++ )];
					mcost += byte_abs[( *orig_line++ - *ref_line++ )];
					mcost += byte_abs[( *orig_line++ - *ref_line++ )];
				}
				
				if (mcost > min_mcost)
				{
					break;
				}
			}
			if (mcost < min_mcost)
			{
				best_x  = cand_x;
				best_y  = cand_y;
				min_mcost = mcost;
				best_step=step;
				best_pos=pos;
				flag=1;
				
			}
			
		}
		
		if(flag==1)
		{
			center_x+=distx[best_step][best_pos];
			center_y+=disty[best_step][best_pos];
			flag=0;
		}
		
		if(min_mcost<THRESH)
			break;
		
	}
    *mv_x=best_x-pic_pix_x;
    *mv_y=best_y-pic_pix_y;
	
	
	return min_mcost;
				  }				  
  /*!
 ************************************************************************
 * \brief
 * Functions for fast fractional pel motion estimation.
 * 1. int AddUpSADQuarter() returns SADT of a fractiona pel MV
 * 2. int FastSubPelBlockMotionSearch () proceed the fast fractional pel ME
 * \authors: Zhibo Chen
 *           Dept.of EE, Tsinghua Univ.
 * \date   : 2003.4
 ************************************************************************
 */
int AddUpSADQuarter(int pic_pix_x,int pic_pix_y,int blocksize_x,int blocksize_y,
                    int cand_mv_x,int cand_mv_y, StorablePicture *ref_picture, pel_t**   orig_pic, 
                    int Mvmcost, int min_mcost,int useABT)
{
  int abort_search, y0, x0, rx0, ry0, ry; 
  pel_t *orig_line;
  int   diff[16], *d; 
  int  mcost = Mvmcost;
  int yy,kk,xx;
  int   curr_diff[MB_BLOCK_SIZE][MB_BLOCK_SIZE]; // for ABT SATD calculation
//2004.3.3
  pel_t **ref_pic = ref_picture->imgY_ups;
  int img_width  = ref_picture->size_x;
  int img_height = ref_picture->size_y;

  
  for (y0=0, abort_search=0; y0<blocksize_y && !abort_search; y0+=4)
  {
    ry0 = ((pic_pix_y+y0)<<2) + cand_mv_y;
    
    for (x0=0; x0<blocksize_x; x0+=4)
    {
      rx0 = ((pic_pix_x+x0)<<2) + cand_mv_x;
      d   = diff;
      
      orig_line = orig_pic [y0  ];    ry=ry0;
      *d++      = orig_line[x0  ]  -  PelY_14 (ref_pic, ry, rx0   , img_height, img_width);
      *d++      = orig_line[x0+1]  -  PelY_14 (ref_pic, ry, rx0+ 4, img_height, img_width);
      *d++      = orig_line[x0+2]  -  PelY_14 (ref_pic, ry, rx0+ 8, img_height, img_width);
      *d++      = orig_line[x0+3]  -  PelY_14 (ref_pic, ry, rx0+ 12, img_height, img_width);
      
      orig_line = orig_pic [y0+1];    ry=ry0+4;
      *d++      = orig_line[x0  ]  -  PelY_14 (ref_pic, ry, rx0   , img_height, img_width);
      *d++      = orig_line[x0+1]  -  PelY_14 (ref_pic, ry, rx0+ 4, img_height, img_width);
      *d++      = orig_line[x0+2]  -  PelY_14 (ref_pic, ry, rx0+ 8, img_height, img_width);
      *d++      = orig_line[x0+3]  -  PelY_14 (ref_pic, ry, rx0+ 12, img_height, img_width);
      
      orig_line = orig_pic [y0+2];    ry=ry0+8;
      *d++      = orig_line[x0  ]  -  PelY_14 (ref_pic, ry, rx0   , img_height, img_width);
      *d++      = orig_line[x0+1]  -  PelY_14 (ref_pic, ry, rx0+ 4, img_height, img_width);
      *d++      = orig_line[x0+2]  -  PelY_14 (ref_pic, ry, rx0+ 8, img_height, img_width);
      *d++      = orig_line[x0+3]  -  PelY_14 (ref_pic, ry, rx0+ 12, img_height, img_width);
      
      orig_line = orig_pic [y0+3];    ry=ry0+12;
      *d++      = orig_line[x0  ]  -  PelY_14 (ref_pic, ry, rx0   , img_height, img_width);
      *d++      = orig_line[x0+1]  -  PelY_14 (ref_pic, ry, rx0+ 4, img_height, img_width);
      *d++      = orig_line[x0+2]  -  PelY_14 (ref_pic, ry, rx0+ 8, img_height, img_width);
      *d        = orig_line[x0+3]  -  PelY_14 (ref_pic, ry, rx0+ 12, img_height, img_width);
      
      if (!useABT)
      {
        if ((mcost += SATD (diff, input->hadamard)) > min_mcost)
        {
          abort_search = 1;
          break;
        }
      }
      else  // copy diff to curr_diff for ABT SATD calculation
      {
        for (yy=y0,kk=0; yy<y0+4; yy++)
          for (xx=x0; xx<x0+4; xx++, kk++)
            curr_diff[yy][xx] = diff[kk];
      }
    }
  }
  
  return mcost;
}


int                                                   //  ==> minimum motion cost after search
FastSubPelBlockMotionSearch (pel_t**   orig_pic,      // <--  original pixel values for the AxB block
                             int       ref,           // <--  reference frame (0... or -1 (backward))
                             int       list,
                             int       pic_pix_x,     // <--  absolute x-coordinate of regarded AxB block
                             int       pic_pix_y,     // <--  absolute y-coordinate of regarded AxB block
                             int       blocktype,     // <--  block type (1-16x16 ... 7-4x4)
                             int       pred_mv_x,     // <--  motion vector predictor (x) in sub-pel units
                             int       pred_mv_y,     // <--  motion vector predictor (y) in sub-pel units
                             int*      mv_x,          // <--> in: search center (x) / out: motion vector (x) - in pel units
                             int*      mv_y,          // <--> in: search center (y) / out: motion vector (y) - in pel units
                             int       search_pos2,   // <--  search positions for    half-pel search  (default: 9)
                             int       search_pos4,   // <--  search positions for quarter-pel search  (default: 9)
                             int       min_mcost,     // <--  minimum motion cost (cost for center or huge value)
                             double    lambda,
                             int useABT)        // <--  lagrangian parameter for determining motion cost
{
  int   diff[16], *d;
  int   k;
  int   pos,step, best_pos=0,best_step=0, mcost, abort_search,flag=0;
  int   y0, x0, ry0, rx0, ry;
  int   cand_mv_x, cand_mv_y;
  int   max_pos_x4, max_pos_y4;
  int   lambda_factor   = LAMBDA_FACTOR (lambda);
  int   mv_shift        = 0;
  pel_t *orig_line;
  pel_t **ref_pic;      
  StorablePicture *ref_picture;
  int   blocksize_x     = input->blc_size[blocktype][0];
  int   blocksize_y     = input->blc_size[blocktype][1];
  int   pic4_pix_x      = (pic_pix_x <<2);
  int   pic4_pix_y      = (pic_pix_y <<2);
  int   img_width, img_height;

  int   distx[3][5]={{0,0,-3,0,3},{0,0,-1,0,1}};
  int   disty[3][5]={{0,-3,0,3,0},{0,-1,0,1,0}};


  ref_picture     = listX[list][ref];
  ref_pic = listX[list][ref]->imgY_ups;//上采样的参考帧

  img_width  = ref_picture->size_x;
  img_height = ref_picture->size_y;

  max_pos_x4      = ((ref_picture->size_x - blocksize_x+1)<<2);
  max_pos_y4      = ((ref_picture->size_y - blocksize_y+1)<<2);

  *mv_x <<= 2;
  *mv_y <<= 2;//扩展为1/4像素的MV

  if ((pic4_pix_x + *mv_x > 1) && (pic4_pix_x + *mv_x < max_pos_x4 - 2) &&
      (pic4_pix_y + *mv_y > 1) && (pic4_pix_y + *mv_y < max_pos_y4 - 2)   )
  {
    PelY_14 = FastPelY_14;
  }
  else
  {
    PelY_14 = UMVPelY_14;
  }

  for(step=0;step<2;step++)
  {
    for(pos=(step>0)? 1:0;pos<5;pos++)
    {
      cand_mv_x = *mv_x + distx[step][pos];    // quarter-pel units
      cand_mv_y = *mv_y + disty[step][pos];    // quarter-pel units

      //----- set motion vector cost -----
     mcost = MV_COST (lambda_factor, mv_shift, cand_mv_x, cand_mv_y, pred_mv_x, pred_mv_y);
     //  mcost=0;

    for (y0=0, abort_search=0; y0<blocksize_y && !abort_search; y0+=4)
    {
      ry0 = ((pic_pix_y+y0)<<2) + cand_mv_y;

      for (x0=0; x0<blocksize_x; x0+=4)
      {
        rx0 = ((pic_pix_x+x0)<<2) + cand_mv_x;
        d   = diff;

        orig_line = orig_pic [y0  ];    ry=ry0;
        *d++      = orig_line[x0  ]  -  PelY_14 (ref_pic, ry, rx0   , img_height, img_width);
        *d++      = orig_line[x0+1]  -  PelY_14 (ref_pic, ry, rx0+ 4, img_height, img_width);
        *d++      = orig_line[x0+2]  -  PelY_14 (ref_pic, ry, rx0+ 8, img_height, img_width);
        *d++      = orig_line[x0+3]  -  PelY_14 (ref_pic, ry, rx0+12, img_height, img_width);

        orig_line = orig_pic [y0+1];    ry=ry0+4;
        *d++      = orig_line[x0  ]  -  PelY_14 (ref_pic, ry, rx0   , img_height, img_width);
        *d++      = orig_line[x0+1]  -  PelY_14 (ref_pic, ry, rx0+ 4, img_height, img_width);
        *d++      = orig_line[x0+2]  -  PelY_14 (ref_pic, ry, rx0+ 8, img_height, img_width);
        *d++      = orig_line[x0+3]  -  PelY_14 (ref_pic, ry, rx0+12, img_height, img_width);

        orig_line = orig_pic [y0+2];    ry=ry0+8;
        *d++      = orig_line[x0  ]  -  PelY_14 (ref_pic, ry, rx0   , img_height, img_width);
        *d++      = orig_line[x0+1]  -  PelY_14 (ref_pic, ry, rx0+ 4, img_height, img_width);
        *d++      = orig_line[x0+2]  -  PelY_14 (ref_pic, ry, rx0+ 8, img_height, img_width);
        *d++      = orig_line[x0+3]  -  PelY_14 (ref_pic, ry, rx0+12, img_height, img_width);

        orig_line = orig_pic [y0+3];    ry=ry0+12;
        *d++      = orig_line[x0  ]  -  PelY_14 (ref_pic, ry, rx0   , img_height, img_width);
        *d++      = orig_line[x0+1]  -  PelY_14 (ref_pic, ry, rx0+ 4, img_height, img_width);
        *d++      = orig_line[x0+2]  -  PelY_14 (ref_pic, ry, rx0+ 8, img_height, img_width);
        *d        = orig_line[x0+3]  -  PelY_14 (ref_pic, ry, rx0+12, img_height, img_width);

       for(k=0;k<16;k++)
       {
         mcost+=byte_abs[diff[k]];
       }
     //   if ((mcost += SATD (diff, input->hadamard)) > min_mcost)
       if (mcost>min_mcost)
        {
          abort_search = 1;
          break;
        }
      }
    }

    if (mcost < min_mcost)
    {
      min_mcost = mcost;
      best_step = step;
      best_pos  = pos;
      flag      = 1;
    }


    }
    if (flag)
    {
      *mv_x += distx [best_step][best_pos];
      *mv_y += disty [best_step][best_pos];
    }
    flag=0;
    
  }
 

  return min_mcost;
  }

/*!
************************************************************************
* \brief
* Functions for SAD prediction of intra block cases.
* 1. void   decide_intrabk_SAD() judges the block coding type(intra/inter) 
*    of neibouring blocks
 * 2. void skip_intrabk_SAD() set the SAD to zero if neigouring block coding 
 *    type is intra
  * \date   : 2003.4
 ************************************************************************
 */
void   decide_intrabk_SAD()
{
  if (img->type != 0)
  {
    if (img->pix_x == 0 && img->pix_y == 0)
    {
      flag_intra_SAD = 0;
    }
    else if (img->pix_x == 0)
    {
      flag_intra_SAD = flag_intra[(img->pix_x)>>4];
    }
    else if (img->pix_y == 0)
    {
      flag_intra_SAD = flag_intra[((img->pix_x)>>4)-1];
    }
    else 
    {
      flag_intra_SAD = ((flag_intra[(img->pix_x)>>4])||(flag_intra[((img->pix_x)>>4)-1])||(flag_intra[((img->pix_x)>>4)+1])) ;
    }
  }
  return;
}

void skip_intrabk_SAD(int best_mode, int ref_max)
{
  int i,j,k, ref;
  if (img->number > 0) 
    flag_intra[(img->pix_x)>>4] = (best_mode == 9 || best_mode == 10) ? 1:0;
  if (img->type!=0  && (best_mode == 9 || best_mode == 10))
  {
    for (i=0; i < 4; i++)
    {
      for (j=0; j < 4; j++)
      {
        for (k=1; k < 8;k++)
        {
          for (ref=0; ref<ref_max;ref++)
          {
            all_mincost[(img->pix_x>>2)+i][(img->pix_y>>2)+j][ref][k][0] = 0;   
          }
        }
      }
    }
  
  }
  return;
}
fast_me.c - 源码说明

本页面展示了「H264编码器,比较3步搜索法和全搜索等的信燥比,平均搜索点数,搜索时间等」中的 fast_me.c 源码文件，采用 C语言编程语言编写，共 733 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与H264相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?