⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 motion_est_core_2_optimized.c

📁 基于Linux的ffmepg decoder
💻 C
📖 第 1 页 / 共 5 页
字号:
	pCodec->ME_command_queue0[23] = (3<<29) | (d_type << 24) | (Diamond_search_limit << 16) | ThEES;		// dsize is always small

	pCodec->ME_command_queue0[28] = (2<<29) | ((pMB->mv16x_1&0x7f)<<19) | ((pMB->mv16y_1&0x7f)<<12) | (pCodec->Raddr + (pMB->mv16y_1 < 0 ? 0 : (pMB->mv16y_1 >> 1)*16));
	pCodec->ME_command_queue0[31] = (1<<28) | (2<<29) | ((prevMB->mv16x_0 & 0x7f) << 19) | ((prevMB->mv16y_0 & 0x7f) << 12) 
							| pCodec->Raddr + (prevMB->mv16y_0 < 0 ? 0 : (prevMB->mv16y_0 >> 1)*16);

	pCodec->ME_command_queue0[40] = (1<<28) | (2<<29) | ((prevMB->mv16x_1 & 0x7f) << 19) | ((prevMB->mv16y_1 & 0x7f) << 12) 
							| pCodec->Raddr + (prevMB->mv16y_1 < 0 ? 0 : (prevMB->mv16y_1 >> 1)*16);
	pCodec->ME_command_queue0[46] = (2<<29) | ((pMB->mv16x_3&0x7f)<<19) | ((pMB->mv16y_3&0x7f)<<12) | (pCodec->Raddr23 + (pMB->mv16y_3 < -16 ? ((-16>>1) * 16) : (pMB->mv16y_3 >> 1)*16));
	pCodec->ME_command_queue0[49] = (1<<28) | (2<<29) | ((prevMB->mv16x_2 & 0x7f) << 19) | ((prevMB->mv16y_2 & 0x7f) << 12) 
							| pCodec->Raddr23 + (prevMB->mv16y_2 < -16 ? ((-16>>1) * 16) : (prevMB->mv16y_2 >> 1)*16);

	pCodec->ME_command_queue0[58] = (1<<28) | (2<<29) | ((prevMB->mv16x_3 & 0x7f) << 19) | ((prevMB->mv16y_3 & 0x7f) << 12) 
							| pCodec->Raddr23 + (prevMB->mv16y_3 < -16 ? ((-16>>1) * 16) : (prevMB->mv16y_3 >> 1)*16);

   // what's the bit 11 is doing ??? there's no bit 11 for MECTL control register
   // after asking James and Albert, there is no bit 11, we can remove it after the code is stable
	X = current1->rounding_type << 2 | (1 << 11) | 1;			//	1<< 4 => fcode = 1
	pCodec->ME_COMMAND = X;

/*	end setup for next ME	*/

	return 0;  
}

int32_t
MotionEstimation_4MV(MACROBLOCK *const pMB,
				 MACROBLOCK *const pMB_mc,
				 uint32_t x, uint32_t y,
				 MBParam * const pParam,
				 Encoder *pEnc,
				 int32_t counter)
{   
    FTMCP100_CODEC *pCodec=(FTMCP100_CODEC *)pEnc->pCodec;
	uint32_t iWcount;
	MACROBLOCK *prevMB, *pMB_tmp;
	VECTOR pmv[4];
	int32_t index;
	int32_t  X, d_type;
	int32_t MOTION_ACTIVITY;
	int32_t tmp1, tmp2, tmp3, tmp4;
	volatile MDMA *pmdma = MDMA1;
	DECLARE_MP4_PTR
	
    FRAMEINFO * const current1=pEnc->current1;
    FRAMEINFO * const reference=pEnc->reference;
	unsigned int XDIM=pEnc->mEncParam.u32FrameWidth;
	
	// to get the 'current' and 'not current' half of DMA double command buffer
	unsigned int *pDMA_cur,*pDMA_next; 
	pDMA_cur=pCodec->DMA_COMMAND_local+(pCodec->even_odd_1^1)*DMA_COMMAND_QUEUE_STRIDE;
	pDMA_next=pCodec->DMA_COMMAND_local+(pCodec->even_odd_1)*DMA_COMMAND_QUEUE_STRIDE;

    // since we have synchronized the DMA commands entries 28th~39th (or 76th~87th)
	// to MC done , so checking DMA done is equivalent to checking MC done
	POLL_MARKER_S
	while((pmdma->Status & 0x1) == 0) {}
	POLL_MARKER_E

	SET_HOFFSET(((counter-1)&0x3)*16)
	
	SET_MECADDR(CUR_Y0+pCodec->even_odd_1*384)

    SET_MEIADDR((pCodec->triple_buffer_selector)*384 + INTER_Y0)
	
    // for optimization, we invoke ME go here instead after MC go
    SET_MECTL(pCodec->ME_COMMAND)							//		ME GO  ************************
	
	iWcount = pParam->mb_width;
	// MB_mode = 1 means inter mode, 0 means intra mode
	if (pCodec->MB_mode) {
		index = (MBTransQuantInter(pParam, current1,pCodec)) | (pCodec->MVZ << 16) | (pMB_mc->mode<<6);
	} else {
      if (pParam->resyn==0)
      { 
		pMB_tmp = pMB_mc;
		pCodec->acdc_status = 0;
		if ((y==0) || ((y==1) && (x==0))) {
			pCodec->acdc_status = 6;
			pMB_tmp = pMB_tmp - 1;
			if (pMB_tmp->mode != MODE_INTRA)	//	check Left
				pCodec->acdc_status |= 1;
		} else if (x==1) {
			pCodec->acdc_status = 5;
			pMB_tmp = pMB_tmp - (XDIM/16);		//	check Top
			if (pMB_tmp->mode != MODE_INTRA)
				pCodec->acdc_status |= 2;
		} else {
			pMB_tmp = pMB_tmp - 1;
			if (pMB_tmp->mode != MODE_INTRA)	//	check Left
				pCodec->acdc_status |= 1;
			pMB_tmp = pMB_tmp - (XDIM/16);		//	check Top-Left
			if (pMB_tmp->mode != MODE_INTRA)
				pCodec->acdc_status |= 4;
			pMB_tmp += 1;
			if (pMB_tmp->mode != MODE_INTRA)	//	check Top
				pCodec->acdc_status |= 2;
		}
	  }
	  else
	  {
		pCodec->acdc_status = 6;
		pMB_tmp = pMB_mc - 1;
		if ((pMB_tmp->mode != MODE_INTRA) | (x==1))
			pCodec->acdc_status |= 1;
      }
		index = MBTransQuantIntra_p(pParam, current1,pCodec);
	}
    
	SET_MCCTL(index)								//		MC GO

    // begin to execute the DMA commands prepared in advance (from 0th to 0+44(0x2C)-1=43th)
    DMA_MOVE((pCodec->even_odd_1^1)*DMA_COMMAND_QUEUE_STRIDE, 0x4B0002C)
	
	// to prepare the 'not current' DMA buffer command
    pDMA_next[1] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS((uint32_t) REF_Y + ((counter+2)&0x3)*16);
	pDMA_next[5] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS((uint32_t) REF_U + ((counter+2)&0x3)*8);
	pDMA_next[9] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS((uint32_t) REF_V + ((counter+2)&0x3)*8);

	pDMA_next[37] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(LOCAL_PREDICTOR0 + (pCodec->even_odd_I)*0x80);
	pDMA_next[40] = pDMA_cur[36];

    if(x==pEnc->mbParam.mb_width-1) { pDMA_next[36] = (uint32_t) (pCodec->pred_value_phy);	} else {pDMA_next[36] = (uint32_t) (pCodec->pred_value_phy + (x+1)*32);	}    
	  
    // make it a chain
    pDMA_next[39] = 0x04A42010; // make it group 2, to sync to MC done

	{
	  unsigned int offset=((pCodec->triple_buffer_selector+2)%3)*384;
      //pDMA_next[25] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(((pCodec->triple_buffer_selector+2)%3)*384 + INTER_Y0);
	  //pDMA_next[29] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(((pCodec->triple_buffer_selector+2)%3)*384 + INTER_U0);
	  //pDMA_next[33] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(((pCodec->triple_buffer_selector+2)%3)*384 + INTER_V0);
	  pDMA_next[25] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(offset + INTER_Y0);
	  pDMA_next[29] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(offset + INTER_U0);
	  pDMA_next[33] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(offset + INTER_V0);
	    
	  // well, originally, we want to make the DMA transfer of reconstructed YUV image to be moved to 
	  // system memory with no synchronization. But we found that one problem occurs due to the fact that
	  // ME engine will copy the current MB to the another bank 0 current MB buffer once MC is done , but 
	  // sometimes when quantizer is 1 and MC period is long and CPU is running fast enough, the program
	  // will invoke the ME go or MC go before the operation of copying current MB to another bank 0 current
	  // MB buffer is done. This will cause problem and make some patterns fail in FPGA platform.
	  // There's no way to synchronize this copying operation at this point. Unless we add delay mechanism
	  // after MC go.
	  // Anyway, in order to avoid such condition, so we make reconstructed V component to sync to MC done
	  // in order to let the copying operation has enough time to complete before ME go or MC go.
	  // make it group 0, to normal execution
	  //pDMA_next[27] = 0x04B00040;
	  //pDMA_next[31] = 0x04B00010;
	  //pDMA_next[35] = 0x04B00010;
	  // make it group 2, make the V component sync to MC done
	  pDMA_next[27] = 0x04B00040;
	  pDMA_next[31] = 0x04B02010; // to make U component sync to MC done in order to let hardware has enough time to move the current MB to bank 0
	  pDMA_next[35] = 0x04B02010;// to make V component sync to MC done in order to let hardware has enough time to move the current MB to bank 0
    }
	
    pCodec->even_odd_I ^= 1;
	

	X = current1->rounding_type << 2 | (1 << 11) | 1;			//	1<< 4 => fcode = 1
	
	pCodec->ME_COMMAND = X;

	prevMB = &reference->mbs[counter];
	index = pCodec->Raddr;
    
    get_pmvdata0_4MV(current1->mbs, iWcount, x+1, counter, pmv, pParam);

	MOTION_ACTIVITY=MAX(abs(pmv[1].x)+abs(pmv[1].y),abs(pmv[3].x)+abs(pmv[3].y));
	d_type = (MOTION_ACTIVITY>L1);

/*************			check ME_DONE		*************/
	POLL_ME_DONE_MARKER_START
	do {
	  READ_CPSTS(X)
	} while(!(X&0x01));
	POLL_ME_DONE_MARKER_END
	
    #ifdef DUMP_PMV_RESULT
    if(1) {
      int index_array[20]={ 19,20,21,22, 28,29,30,31, 37,38,39,40, 46,47,48,49, 55,56,57,58 };
      int c;  
      int x_result[20],y_result[20],radd_result[20];
      int32_t sad_value;
      int32_t XX;
      static int mv_buffer_selector=0;
      mv_buffer_selector^=1;
      for(c=0;c<20;c++)
        {
          sad_value=pCodec->ME_command_queue0[index_array[c]];
          if(sad_value & (1<<26))
            { // SRC==1
              int src_index=(sad_value>>19)&0x7f;
              XX = (pCodec->ME_command_queue0[src_index+(mv_buffer_selector)*4]>>12);
              x_result[c]=(XX<<18)>>25;
			  y_result[c]=(XX<<25)>>25;
            }
          else
            { // SRC==0
              XX = (pCodec->ME_command_queue0[index_array[c]]>>12);
              x_result[c]=(XX<<18)>>25;
              y_result[c]=(XX<<25)>>25;
            }
          radd_result[c]=sad_value&0x0fff;
        }
    
      for(c=0;c<20;c+=4)
        {
          #ifdef DUMP_WITH_RADDR
          fprintf(pmv_result_file,"\n");
          fprintf(pmv_result_file,"Left candidate : 0x%04x, 0x%04x (RADDR:0x%04x)\n",x_result[c]&0x07f,y_result[c]&0x07f,radd_result[c]);
          fprintf(pmv_result_file,"Top candidate : 0x%04x, 0x%04x (RADDR:0x%04x)\n",x_result[c+1]&0x07f,y_result[c+1]&0x07f,radd_result[c+1]);
          fprintf(pmv_result_file,"Top-Right candidate : 0x%04x, 0x%04x, (RADDR:0x%04x)\n",x_result[c+2]&0x07f,y_result[c+2]&0x07f,radd_result[c+2]);
          fprintf(pmv_result_file,"Previous MB candidate : 0x%04x, 0x%04x, (RADDR:0x%04x)\n",x_result[c+3]&0x07f,y_result[c+3]&0x07f,radd_result[c+3]);          
          #else
          fprintf(pmv_result_file,"\n");
          fprintf(pmv_result_file,"Left candidate : 0x%04x, 0x%04x\n",x_result[c]&0x07f,y_result[c]&0x07f);
          fprintf(pmv_result_file,"Top candidate : 0x%04x, 0x%04x\n",x_result[c+1]&0x07f,y_result[c+1]&0x07f);
          fprintf(pmv_result_file,"Top-Right candidate : 0x%04x, 0x%04x\n",x_result[c+2]&0x07f,y_result[c+2]&0x07f);
          fprintf(pmv_result_file,"Previous MB candidate : 0x%04x, 0x%04x\n",x_result[c+3]&0x07f,y_result[c+3]&0x07f);
          #endif
          
          #if 0
          fprintf(pmv_result_file,"\n");
          fprintf(pmv_result_file,"Left candidate : (RADDR:0x%04x)\n",radd_result[c]);
          fprintf(pmv_result_file,"Top candidate : (RADDR:0x%04x)\n",radd_result[c+1]);
          fprintf(pmv_result_file,"Top-Right candidate : (RADDR:0x%04x)\n",radd_result[c+2]);
          fprintf(pmv_result_file,"Previous MB candidate : (RADDR:0x%04x)\n",radd_result[c+3]);
          #endif
        }
        #if 0
        fprintf(pmv_result_file,"\nFour motion vectors are :\n");
        XX = (pCodec->ME_command_queue0[0+(pCodec->even_odd_1)*4]>>12);
        fprintf(pmv_result_file,"0x%04x,0x%04x,",((XX<<18)>>25)& 0x7f,((XX<<25)>>25)&0x7f);
  	    XX = (pCodec->ME_command_queue0[1+(pCodec->even_odd_1)*4]>>12);
        fprintf(pmv_result_file,"0x%04x,0x%04x,",((XX<<18)>>25)& 0x7f,((XX<<25)>>25)&0x7f);
        XX = (pCodec->ME_command_queue0[2+(pCodec->even_odd_1)*4]>>12);
        fprintf(pmv_result_file,"0x%04x,0x%04x,",((XX<<18)>>25)& 0x7f,((XX<<25)>>25)&0x7f);
        XX = (pCodec->ME_command_queue0[3+(pCodec->even_odd_1)*4]>>12);
        fprintf(pmv_result_file,"0x%04x,0x%04x",((XX<<18)>>25)& 0x7f,((XX<<25)>>25)&0x7f);
        fprintf(pmv_result_file,"\n");
        #endif
        
      }
    #endif	
	
//#ifdef DUMP_ME_RESULT
	//READ_MIN_SADMV(pMB->sad16)
	//READ_CURDEV(mode_decision)
//#endif

	if (X & 0x80) {
		pCodec->MB_mode = 0;
		pMB->mode = MODE_INTRA;
		pMB->mv16x_0 = 0;
		pMB->mv16y_0 = 0;
		pMB->mv16x_1 = 0;
		pMB->mv16y_1 = 0;
		pMB->mv16x_2 = 0;
		pMB->mv16y_2 = 0;
		pMB->mv16x_3 = 0;
		pMB->mv16y_3 = 0;
	} else {
		pCodec->MB_mode = 1;
		if (X & 64) {
			pMB->mode = MODE_INTER;
			X = (pCodec->ME_command_queue0[3+(pCodec->even_odd_1)*4]);
			X = X >> 12;
			pMB->mv16x_0 = (X<<18)>>25;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -