📄 motion_est_core_2_optimized.c
字号:
//---------------------------------------------------------------------------
// let's correct some settings in primary part of DMA double command buffers
// because some initial DMA command settings in primary part of DMA command
// double buffers inside FrameCodeP() were modifed for moving
// the reference image. So we should correct those temporary
// settings to normal settings.
//---------------------------------------------------------------------------
// You know we disable this command chain for the purpose of moving another
// reference image again right before MotionEstimation_block0_1MV(),
// so we eanble the command chain again.
pDMA1[11] = 0x4A50000 | 48;
// let's set the primary of DMA command buffer for reference image location to the right address
// for DMA double command buffer purpose
// since we use DMA double buffer, so the increment for Y block of reference image is 512 bytes instead of 256 bytes
pDMA1[0] = ((uint32_t) pEnc->reference->reconstruct.y - (256*XDIM/16) + 768 | 0x07); // add offset 256*3=768
// since we use DMA double buffer, so the increment for U block of reference image is 128 bytes instead of 64 bytes
pDMA1[4] = ((uint32_t) pEnc->reference->reconstruct.u - (64*XDIM/16) + 192 | 0x05); // add offset 64*3=192
// since we use DMA double buffer, so the increment for V block of reference image is 128 bytes instead of 64 bytes
pDMA1[8] = ((uint32_t) pEnc->reference->reconstruct.v - (64*XDIM/16) + 192 | 0x05); // add offset 64*3=192
//---------------------------------------------------------------------------
// let's prepare the primary part of DMA double command buffer here
// for MotionEstimation_1MV() since you can see from the RTL simulation that
// the time that polling ME done take is quite wasteful for 1MV.
//---------------------------------------------------------------------------
pDMA1[1] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(REF_Y + 48);
pDMA1[5] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(REF_U + 24);
pDMA1[9] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(REF_V + 24);
pDMA1[37] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(LOCAL_PREDICTOR0 + 0x80);
pDMA1[40] = pDMA2[36];
pDMA1[36] = (uint32_t) (pCodec->pred_value_phy + 32);
// make it a chain
pDMA1[39] = 0x04A42010;
//pDMA1[27] = 0x04A42010; // make it gorup 2, to sync to MC done
// make it group ID 1, disable all these DMA commands
pDMA1[27] = 0x04B01040;
pDMA1[31] = 0x04B01010;
pDMA1[35] = 0x04B01010;
POLL_ME_DONE_MARKER_START
// CHECK ME IS DONE **************
do {
READ_CPSTS(X)
} while(!(X&0x01));
POLL_ME_DONE_MARKER_END
#ifdef DUMP_PMV_RESULT
if(1) {
int index_array[4]={ 19,20,21,22 };
int c;
int x_result[4],y_result[4],radd_result[4];
int32_t sad_value;
int32_t XX;
for(c=0;c<4;c++)
{
sad_value=pCodec->ME_command_queue0[index_array[c]];
if(sad_value & (1<<26))
{ // SRC==1
int src_index=(sad_value>>19)&0x7f;
XX = (pCodec->ME_command_queue0[src_index+(0)*4]>>12);
x_result[c]=(XX<<18)>>25;
y_result[c]=(XX<<25)>>25;
}
else
{ // SRC==0
XX = (pCodec->ME_command_queue0[index_array[c]]>>12);
x_result[c]=(XX<<18)>>25;
y_result[c]=(XX<<25)>>25;
}
radd_result[c]=sad_value&0x0fff;
}
for(c=0;c<1;c++)
{
#ifdef DUMP_WITH_RADDR
fprintf(pmv_result_file,"\n");
fprintf(pmv_result_file,"Left candidate : 0x%04x, 0x%04x (RADDR:0x%04x)\n",x_result[c]&0x07f,y_result[c]&0x07f,radd_result[c]);
fprintf(pmv_result_file,"Top candidate : 0x%04x, 0x%04x (RADDR:0x%04x)\n",x_result[c+1]&0x07f,y_result[c+1]&0x07f,radd_result[c+1]);
fprintf(pmv_result_file,"Top-Right candidate : 0x%04x, 0x%04x, (RADDR:0x%04x)\n",x_result[c+2]&0x07f,y_result[c+2]&0x07f,radd_result[c+2]);
fprintf(pmv_result_file,"Previous MB candidate : 0x%04x, 0x%04x, (RADDR:0x%04x)\n",x_result[c+3]&0x07f,y_result[c+3]&0x07f,radd_result[c+3]);
#else
fprintf(pmv_result_file,"\n");
fprintf(pmv_result_file,"Left candidate : 0x%04x, 0x%04x\n",x_result[c]&0x07f,y_result[c]&0x07f);
fprintf(pmv_result_file,"Top candidate : 0x%04x, 0x%04x\n",x_result[c+1]&0x07f,y_result[c+1]&0x07f);
fprintf(pmv_result_file,"Top-Right candidate : 0x%04x, 0x%04x\n",x_result[c+2]&0x07f,y_result[c+2]&0x07f);
fprintf(pmv_result_file,"Previous MB candidate : 0x%04x, 0x%04x\n",x_result[c+3]&0x07f,y_result[c+3]&0x07f);
#endif
}
}
#endif
d_type = (X & 0x80); // check intra/inter mode
X = (pCodec->ME_command_queue0[3]>>12);
pmv[1].x = pmv[0].x = pmvsx = (X<<18)>>25;
pmv[1].y = pmv[0].y = pmvsy = (X<<25)>>25;
if (d_type) {
pCodec->MB_mode = 0;
pMB->mode = MODE_INTRA;
pMB->mv16x_3 = 0;
pMB->mv16y_3 = 0;
// it looks it's the bug which causes the pattern bus_19(CIF) failed. So we add these tow lines
pmvsx = 0;
pmvsy = 0;
} else {
pCodec->MB_mode = 1;
pMB->mode = MODE_INTER;
pMB->mv16x_3 = pmvsx;
pMB->mv16y_3 = pmvsy;
}
pCodec->MVZ = (X == 0);
#ifdef DUMP_PMV_RESULT
switch(pMB->mode)
{
case MODE_INTRA:
fprintf(pmv_result_file," Mode is Intra mode\n\n");
break;
case MODE_INTER:
fprintf(pmv_result_file," (1MV) MVD are : 0x%04x, 0x%04x\n",((pCodec->ME_command_queue0[11]>>16)&0x0ffff),(pCodec->ME_command_queue0[11]&0x0ffff));
fprintf(pmv_result_file," Mode is Inter 1MV mode\n\n");
break;
}
#endif
//#ifdef DUMP_ME_RESULT
//READ_MIN_SADMV(pMB->sad16)
//#endif
/* setup for next ME */
prevMB = &reference->mbs[1];
// it looks it's the bug which cause the pattern bus_19(CIF) failed. So we modify this line
MOTION_ACTIVITY=abs(pmvsx)+abs(pmvsy);
d_type=(MOTION_ACTIVITY > L1); // if (MOTION_ACTIVITY > L1) => Large Diamond else Small Diamond
// it looks it's the bug which cause the pattern bus_19(CIF) failed. So we modify this line
pCodec->ME_command_queue0[19] = (2<<29) | ((pmvsx & 0x7f) << 19) | ((pmvsy & 0x7f) << 12) | pCodec->Raddr + (pmvsy < 0 ? 0 : (pmvsy >> 1)*16);
pCodec->ME_command_queue0[22] = (1<<28) | (2<<29) | ((prevMB->mv16x_3 & 0x7f) << 19) | ((prevMB->mv16y_3 & 0x7f) << 12)
| pCodec->Raddr + (prevMB->mv16y_3 < 0 ? 0 : (prevMB->mv16y_3 >> 1)*16);
pCodec->ME_command_queue0[23] = (3<<29) | (d_type << 24) | (Diamond_search_limit << 16) | ThEES; // dsize is always small
X = current1->rounding_type << 2 | 1;
pCodec->ME_COMMAND = X;
/* end setup for next ME */
return 0;
}
int32_t
MotionEstimation_1MV(MACROBLOCK *const pMB,
MACROBLOCK *const pMB_mc,
uint32_t x, uint32_t y,
MBParam * const pParam,
Encoder *pEnc,
int32_t counter)
{
FTMCP100_CODEC *pCodec=(FTMCP100_CODEC *)pEnc->pCodec;
uint32_t iWcount;
MACROBLOCK *prevMB, *pMB_tmp;
VECTOR pmv[4];
int32_t index;
int32_t X, Z, d_type;
int32_t MOTION_ACTIVITY;
int32_t tmp1, tmp2, tmp3;
volatile MDMA *pmdma = MDMA1;
DECLARE_MP4_PTR
FRAMEINFO * const current1=pEnc->current1;
FRAMEINFO * const reference=pEnc->reference;
unsigned int XDIM=pEnc->mEncParam.u32FrameWidth;
// to get the 'current' and 'not current' half of DMA double command buffer
unsigned int *pDMA_cur,*pDMA_next;
pDMA_cur=pCodec->DMA_COMMAND_local+(pCodec->even_odd_1^1)*DMA_COMMAND_QUEUE_STRIDE;
pDMA_next=pCodec->DMA_COMMAND_local+(pCodec->even_odd_1)*DMA_COMMAND_QUEUE_STRIDE;
// originally the PMV checking is necessary, but if we remove this checking,
// the pattern ''table_qcif_21' , 'table_qcif_19' , 'table_qcif_24' and
// 'table_qcif_5' will fail.
// The reason may be that the ME or MC engine is started before the ME copy
// operation which will copy current MB to another current MB buffer in bank 0
// is finished. So we add PMV_DONE chceking back for the purpose of delay.
// check PMV done
POLL_PMV_DONE_MARKER_START
do {
READ_CPSTS(X)
} while(!(X&0x08));
POLL_PMV_DONE_MARKER_END
// since we have synchronized the DMA commands entries 36th~43th
// to MC done , so checking DMA done is equivalent to checking MC done
POLL_MARKER_S
while((pmdma->Status & 0x1) == 0) { }
POLL_MARKER_E
SET_HOFFSET(((counter-1)&0x3)*16)
SET_MECADDR(CUR_Y0+pCodec->even_odd_1*384)
SET_MEIADDR((pCodec->triple_buffer_selector)*384 + INTER_Y0)
iWcount = pParam->mb_width;
if (pCodec->MB_mode) {
index = (MBTransQuantInter(pParam, current1,pCodec)) | (pCodec->MVZ << 16) | (pMB_mc->mode<<6);
} else {
if (pParam->resyn==0)
{
pMB_tmp = pMB_mc;
pCodec->acdc_status = 0;
if ((y==0) || ((y==1) && (x==0))) {
pCodec->acdc_status = 6;
pMB_tmp = pMB_tmp - 1;
if (pMB_tmp->mode != MODE_INTRA) // check Left
pCodec->acdc_status |= 1;
} else if (x==1) {
pCodec->acdc_status = 5;
pMB_tmp = pMB_tmp - (XDIM/16); // check Top
if (pMB_tmp->mode != MODE_INTRA)
pCodec->acdc_status |= 2;
} else {
pMB_tmp = pMB_tmp - 1;
if (pMB_tmp->mode != MODE_INTRA) // check Left
pCodec->acdc_status |= 1;
pMB_tmp = pMB_tmp - (XDIM/16); // check Top-Left
if (pMB_tmp->mode != MODE_INTRA)
pCodec->acdc_status |= 4;
pMB_tmp += 1;
if (pMB_tmp->mode != MODE_INTRA) // check Top
pCodec->acdc_status |= 2;
}
}
else
{
pCodec->acdc_status = 6;
pMB_tmp = pMB_mc - 1;
if ((pMB_tmp->mode != MODE_INTRA) | (x==1))
pCodec->acdc_status |= 1;
}
index = MBTransQuantIntra_p(pParam, current1,pCodec);
}
SET_MCCTL(index) // MC GO
SET_MECTL(pCodec->ME_COMMAND) // ME GO
// begin to execute the DMA commands prepared in advance (from 0th to 0+44(0x2C)-1=43th)
DMA_MOVE((pCodec->even_odd_1^1)*DMA_COMMAND_QUEUE_STRIDE, 0x4B0002C)
// to prepare the 'not current' DMA buffer command
pDMA_next[1] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(REF_Y + ((counter+2)&0x3)*16);
pDMA_next[5] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(REF_U + ((counter+2)&0x3)*8);
pDMA_next[9] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(REF_V + ((counter+2)&0x3)*8);
pDMA_next[37] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(LOCAL_PREDICTOR0 + (pCodec->even_odd_I)*0x80);
pDMA_next[40] = pDMA_cur[36];
if(x==pEnc->mbParam.mb_width-1)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -