mv-search.c

来自「包含了从MPEG4的视频解码到H.264的视频编码部分的源代码」· C语言 代码 · 共 1,913 行 · 第 1/5 页

C
1,913
字号
  {
    range_partly_outside = 1;
  }


  //===== determine position of (0,0)-vector =====
  if (!input->rdopt)
  {
    ref_x = img->pix_x - offset_x;
    ref_y = pix_y - offset_y;

    for (pos = 0; pos < max_pos; pos++)
    {
      if (ref_x == spiral_search_x[pos] &&
          ref_y == spiral_search_y[pos])
      {
        pos_00[refindex] = pos;
        break;
      }
    }
  }


  //===== loop over search range (spiral search): get blockwise SAD =====
  for (pos = 0; pos < max_pos; pos++)
  {
    abs_y = offset_y + spiral_search_y[pos];
    abs_x = offset_x + spiral_search_x[pos];

    if (range_partly_outside)
    {
      if (abs_y >= 0 && abs_y <= max_height &&
          abs_x >= 0 && abs_x <= max_width    )
      {
        PelYline_11 = FastLine16Y_11;
      }
      else
      {
        PelYline_11 = UMVLine16Y_11;
      }
    }

    orgptr = orig_blocks;
    bindex = 0;
    for (blky = 0; blky < 4; blky++)
    {
      LineSadBlk0 = LineSadBlk1 = LineSadBlk2 = LineSadBlk3 = 0;
      for (y = 0; y < 4; y++)
      {
        refptr = PelYline_11 (ref_pic, abs_y++, abs_x);

        LineSadBlk0 += byte_abs [*refptr++ - *orgptr++];
        LineSadBlk0 += byte_abs [*refptr++ - *orgptr++];
        LineSadBlk0 += byte_abs [*refptr++ - *orgptr++];
        LineSadBlk0 += byte_abs [*refptr++ - *orgptr++];
        LineSadBlk1 += byte_abs [*refptr++ - *orgptr++];
        LineSadBlk1 += byte_abs [*refptr++ - *orgptr++];
        LineSadBlk1 += byte_abs [*refptr++ - *orgptr++];
        LineSadBlk1 += byte_abs [*refptr++ - *orgptr++];
        LineSadBlk2 += byte_abs [*refptr++ - *orgptr++];
        LineSadBlk2 += byte_abs [*refptr++ - *orgptr++];
        LineSadBlk2 += byte_abs [*refptr++ - *orgptr++];
        LineSadBlk2 += byte_abs [*refptr++ - *orgptr++];
        LineSadBlk3 += byte_abs [*refptr++ - *orgptr++];
        LineSadBlk3 += byte_abs [*refptr++ - *orgptr++];
        LineSadBlk3 += byte_abs [*refptr++ - *orgptr++];
        LineSadBlk3 += byte_abs [*refptr++ - *orgptr++];
      }
      block_sad[bindex++][pos] = LineSadBlk0;
      block_sad[bindex++][pos] = LineSadBlk1;
      block_sad[bindex++][pos] = LineSadBlk2;
      block_sad[bindex++][pos] = LineSadBlk3;
    }
  }


  //===== combine SAD's for larger block types =====
  SetupLargerBlocks (refindex, max_pos);


  //===== set flag marking that search setup have been done =====
  search_setup_done[refindex] = 1;
}*/
void
SetupFastFullPelSearch (int   ref)  // <--  reference frame parameter (0... or -1 (backward))
{
  int     pmv[2];
  pel_t   orig_blocks[256], *orgptr=orig_blocks, *refptr1,*refptr2,*refptr3,*refptr4,refpix[16][16],*refptr[16];
  int     offset_x, offset_y, x, y, range_partly_outside, ref_x, ref_y, pos, abs_x, abs_y, bindex, blky;
  _int64     LineSadBlk[4],*lsadptr=LineSadBlk;

  int     refframe      = (ref>=0 ? ref             : 0);
  int     refindex      = (ref>=0 ? ref             : img->buf_cycle);
  pel_t**  ref_pic; //zdd   //   = img->type==B_IMG? Refbuf11 [ref+((mref==mref_fld)) +1] : Refbuf11[ref];
  int**   ref_array     = ((img->type!=B_IMG )? refFrArr : ref>=0 ? fw_refFrArr : bw_refFrArr);
  int***  mv_array      = ((img->type!=B_IMG )? tmp_mv   : ref>=0 ? tmp_fwMV    : tmp_bwMV);
  int**   block_sad     = BlockSAD[refindex][7];
  int     max_width     = img->width  - 17;
  int     max_height    = img->height - 17;
  int     search_range  = max_search_range[refindex];
  int     max_pos       = (2*search_range+1) * (2*search_range+1);
  byte**  imgY_orig     = imgY_org;
  int     pix_y         = img->pix_y;
  //zdd
  int i,j;
  int imgw=img->width;
  pel_t *pt1,*pt2;
  int         bframe    = (img->type==B_IMG);
  Macroblock  *currMB   = &img->mb_data[img->current_mb_nr];
  int **prevmv,*mvacc,mvdiff[2];  
  //end
 
  /*if(img->current_mb_nr==0)
  {	  
	  if(img->type==B_IMG)
	  {
		  		  
		 copyframewithextend16(&(tmpRef1[0][0]),Refbuf11[ref+1],img->height,img->width);
		 copyframewithextend16(&(tmpRef2[0][0]),Refbuf11[0],img->height,img->width);		  
	  }
	  else
	  {
		  copyframewithextend16(&(tmpRef1[0][0]),Refbuf11[ref],img->height,img->width);		  
	  }
  }*/
  ref_pic       = img->type==B_IMG? Refbuf11 [ref+1] : Refbuf11[ref];
  
  //end
 


	
  //===== get search center: predictor of 16x16 block =====
  
  /*if(currMB->bestmode.state<3||bframe)
	SetMotionVectorPredictor (pmv, ref_array, mv_array, refframe, 0, 0, 16, 16);
  else
  {
	  pmv[0]=2*currMB->bestmode.mv[2][0]-currMB->bestmode.mv[1][0]+currMB->bestmode.acc[0];
	  pmv[1]=2*currMB->bestmode.mv[2][1]-currMB->bestmode.mv[1][1]+currMB->bestmode.acc[1];
	  max_pos=9;
  }*/
  SetMotionVectorPredictor (pmv, ref_array, mv_array, refframe, 0, 0, 16, 16);

  search_center_x[refindex] = pmv[0] / 4;
  search_center_y[refindex] = pmv[1] / 4;
  if (!input->rdopt)
  {
    //--- correct center so that (0,0) vector is inside ---
    search_center_x[refindex] = max(-search_range, min(search_range, search_center_x[refindex]));
    search_center_y[refindex] = max(-search_range, min(search_range, search_center_y[refindex]));
  }
  search_center_x[refindex] += img->pix_x;
  search_center_y[refindex] += pix_y;
  offset_x = search_center_x[refindex];
  offset_y = search_center_y[refindex];


  //===== copy original block for fast access =====
  for   (y = pix_y; y < pix_y+16; y++)
    for (x = img->pix_x; x < img->pix_x+16; x++)
      *orgptr++ = imgY_orig [y][x];


  //===== check if whole search range is inside image =====
 


  //===== determine position of (0,0)-vector =====
 
    ref_x = img->pix_x - offset_x;
    ref_y = pix_y - offset_y;

    for (pos = 0; pos < max_pos; pos++)
    {
      if (ref_x == spiral_search_x[pos] &&
          ref_y == spiral_search_y[pos])
      {
        pos_00[refindex] = pos;
        break;
      }
    }
  

  
  //===== loop over search range (spiral search): get blockwise SAD =====
  for (pos = 0; pos < max_pos; pos++)
  {	
	  
    abs_y = offset_y + spiral_search_y[pos];
    abs_x = offset_x + spiral_search_x[pos];
	abs_x=abs_x>=-16?abs_x:-16;
	abs_y=abs_y>=-16?abs_y:-16;
	abs_x=abs_x<=img->width?abs_x:img->width;
	abs_y=abs_y<=img->height?abs_y:img->height;
    i=0;
 
	  //zdd
	for(j=0;j<16;j++,abs_y++)
	{
		refptr[j]=&(ref_pic[abs_y][abs_x]);
	}
	
    orgptr = orig_blocks;
    bindex = 0;
	//zdd

    for (blky = 0; blky < 4; blky++)
    {

	   refptr1 = refptr[i++];
	   refptr2 = refptr[i++];
	   refptr3 = refptr[i++];
	   refptr4 = refptr[i++];	   
	   _asm
	   {
		   
		   mov eax,refptr1
		   mov ebx,refptr2
		   mov ecx,refptr3
		   mov edx,refptr4
		   mov esi,orgptr		   
		   mov edi,lsadptr
		   PXOR xmm7,xmm7

		   movd mm0,[eax]
		   movd mm1,[ebx]
		   movd mm2,[ecx]
		   movd mm3,[edx]
		   movd mm4,[esi]
		   movd mm5,[esi+16]
		   movd mm6,[esi+32]
		   movd mm7,[esi+48]
		   PUNPCKLDQ mm0,mm1
		   PUNPCKLDQ mm2,mm3
		   PUNPCKLDQ mm4,mm5
		   PUNPCKLDQ mm6,mm7
		   MOVQ2DQ xmm0,mm0
		   MOVQ2DQ xmm1,mm2
		   UNPCKLPD xmm0,xmm1
		   MOVQ2DQ xmm2,mm4
		   MOVQ2DQ xmm3,mm6
		   UNPCKLPD xmm2,xmm3
		   PSADBW xmm0,xmm2
		   MOVAPD xmm1,xmm0
		   UNPCKLPD xmm0,xmm7
		   UNPCKHPD xmm1,xmm7
		   PADDD xmm0,xmm1
		   MOVLPD [edi],xmm0 		   
		   
		   movd mm0,[eax+4]
		   movd mm1,[ebx+4]
		   movd mm2,[ecx+4]
		   movd mm3,[edx+4]
		   movd mm4,[esi+4]
		   movd mm5,[esi+20]
		   movd mm6,[esi+36]
		   movd mm7,[esi+52]
		   PUNPCKLDQ mm0,mm1
		   PUNPCKLDQ mm2,mm3
		   PUNPCKLDQ mm4,mm5
		   PUNPCKLDQ mm6,mm7
		   MOVQ2DQ xmm0,mm0
		   MOVQ2DQ xmm1,mm2
		   UNPCKLPD xmm0,xmm1
		   MOVQ2DQ xmm2,mm4
		   MOVQ2DQ xmm3,mm6
		   UNPCKLPD xmm2,xmm3
		   PSADBW xmm0,xmm2
		   MOVAPD xmm1,xmm0
		   UNPCKLPD xmm0,xmm7
		   UNPCKHPD xmm1,xmm7
		   PADDD xmm0,xmm1
		   MOVLPD [edi+8],xmm0 

		   movd mm0,[eax+8]
		   movd mm1,[ebx+8]
		   movd mm2,[ecx+8]
		   movd mm3,[edx+8]
		   movd mm4,[esi+8]
		   movd mm5,[esi+24]
		   movd mm6,[esi+40]
		   movd mm7,[esi+56]
		   PUNPCKLDQ mm0,mm1
		   PUNPCKLDQ mm2,mm3
		   PUNPCKLDQ mm4,mm5
		   PUNPCKLDQ mm6,mm7
		   MOVQ2DQ xmm0,mm0
		   MOVQ2DQ xmm1,mm2
		   UNPCKLPD xmm0,xmm1
		   MOVQ2DQ xmm2,mm4
		   MOVQ2DQ xmm3,mm6
		   UNPCKLPD xmm2,xmm3
		   PSADBW xmm0,xmm2
		   MOVAPD xmm1,xmm0
		   UNPCKLPD xmm0,xmm7
		   UNPCKHPD xmm1,xmm7
		   PADDD xmm0,xmm1
		   MOVLPD [edi+16],xmm0 

		   movd mm0,[eax+12]
		   movd mm1,[ebx+12]
		   movd mm2,[ecx+12]
		   movd mm3,[edx+12]
		   movd mm4,[esi+12]
		   movd mm5,[esi+28]
		   movd mm6,[esi+44]
		   movd mm7,[esi+60]
		   PUNPCKLDQ mm0,mm1
		   PUNPCKLDQ mm2,mm3
		   PUNPCKLDQ mm4,mm5
		   PUNPCKLDQ mm6,mm7
		   MOVQ2DQ xmm0,mm0
		   MOVQ2DQ xmm1,mm2
		   UNPCKLPD xmm0,xmm1
		   MOVQ2DQ xmm2,mm4
		   MOVQ2DQ xmm3,mm6
		   UNPCKLPD xmm2,xmm3
		   PSADBW xmm0,xmm2
		   MOVAPD xmm1,xmm0
		   UNPCKLPD xmm0,xmm7
		   UNPCKHPD xmm1,xmm7
		   PADDD xmm0,xmm1
		   MOVLPD [edi+24],xmm0 	   
		   
		   emms
	   }
	  orgptr+=64;     
      block_sad[bindex++][pos] = LineSadBlk[0];
      block_sad[bindex++][pos] = LineSadBlk[1];
      block_sad[bindex++][pos] = LineSadBlk[2];
      block_sad[bindex++][pos] = LineSadBlk[3];

   
		
	}
	
  }


  //===== combine SAD's for larger block types =====
  SetupLargerBlocks (refindex, max_pos);


  //===== set flag marking that search setup have been done =====
  search_setup_done[refindex] = 1;
}

#endif // _FAST_FULL_ME_


/*!
 ***********************************************************************
 * \brief
 *    setting the motion vector predictor
 ***********************************************************************
 */
void
SetMotionVectorPredictor (int  pmv[2],
                          int  **refFrArr,
                          int  ***tmp_mv,
                          int  ref_frame,
                          int  mb_x,
                          int  mb_y,
                          int  blockshape_x,
                          int  blockshape_y)
{
  int pic_block_x          = img->block_x + (mb_x>>2);
  int pic_block_y          = img->block_y + (mb_y>>2);
  int mb_nr                = img->current_mb_nr;
  int mb_width             = img->width/16;
  int mb_available_up      = (img->mb_y == 0          ) ? 0 : (img->mb_data[mb_nr].slice_nr == img->mb_data[mb_nr-mb_width  ].slice_nr);
  int mb_available_left    = (img->mb_x == 0          ) ? 0 : (img->mb_data[mb_nr].slice_nr == img->mb_data[mb_nr-1         ].slice_nr);
  int mb_available_upleft  = (img->mb_x == 0 ||
                              img->mb_y == 0          ) ? 0 : (img->mb_data[mb_nr].slice_nr == img->mb_data[mb_nr-mb_width-1].slice_nr);
  int mb_available_upright = (img->mb_x >= mb_width-1 ||
                              img->mb_y == 0          ) ? 0 : (img->mb_data[mb_nr].slice_nr == img->mb_data[mb_nr-mb_width+1].slice_nr);
  int block_available_up, block_available_left, block_available_upright, block_available_upleft;
  int mv_a, mv_b, mv_c, mv_d, pred_vec=0;
  int mvPredType, rFrameL, rFrameU, rFrameUR;
  int hv;
  Macroblock  *currMB   = &img->mb_data[img->current_mb_nr];
  if(input->InterlaceCodingOption >= MB_CODING && mb_adaptive && img->field_mode)

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?