📄 umc_h264_me.cpp
字号:
#ifdef NEW_INTERPOLATE
Interpolate(pRef, pitchPixels, pInterpBuf, 16, iXType, iYType, size16x16, skip_vec.mvy, m_CurMB_Y*16);
#else
ippiInterpolateLuma_H264(pRef, pitchPixels*sizeof(PixType), pInterpBuf, 16*sizeof(PixType), iXType, iYType, size16x16, m_PicParamSet.bit_depth_luma);
#endif
Ipp32s coeffs_cost = 0;
Ipp32u uMBQP = getLumaQP(curr_slice->m_cur_mb.LocalMacroblockInfo->QP, m_PicParamSet.bit_depth_luma);
Ipp32s chromaQP = getChromaQP(curr_slice->m_cur_mb.LocalMacroblockInfo->QP, m_PicParamSet.chroma_qp_index_offset, m_SeqParamSet.bit_depth_chroma);
bool transform_bypass = m_SeqParamSet.qpprime_y_zero_transform_bypass_flag && uMBQP == 0;
// code block
for (Ipp32s uBlock = 0; uBlock < 16; uBlock ++) {
PixType* pPredBuf = pInterpBuf + xoff[uBlock] + yoff[uBlock]*16;
Diff4x4(pPredBuf, pSrcPlane + offset, pitchPixels*sizeof(PixType), pDiffBuf);
if(!transform_bypass)
ippiTransformQuantResidual_H264(pDiffBuf, pTransformResult,(Ipp32s)uMBQP,&iNumCoeffs,0, enc_single_scan[curr_slice->m_is_cur_mb_field], &iLastCoeff);
else {
for (Ipp32s i = 0; i < 16; i++)
pTransformResult[i] = pDiffBuf[i];
}
coeffs_cost += CalculateCoeffsCost(pTransformResult, 16, dec_single_scan[curr_slice->m_is_cur_mb_field]);
if (coeffs_cost >= 6)
return false;
offset += m_EncBlockOffsetInc[curr_slice->m_is_cur_mb_field][uBlock];
}
if (m_PicParamSet.chroma_format_idc != 0) {
offset = pMBOffset->uChromaOffset[m_is_cur_pic_afrm][curr_slice->m_is_cur_mb_field];
H264MotionVector chroma_skip_vec = skip_vec;
Ipp32s f_mvy_add = 0;
if (!curr_slice->m_is_cur_mb_bottom_field && pFields0[0])
f_mvy_add += - 2;
else
if (curr_slice->m_is_cur_mb_bottom_field && !pFields0[0])
f_mvy_add += 2;
chroma_skip_vec.mvy += f_mvy_add;
CoeffsType* pQBuf = (CoeffsType*) (pTransformResult + 16);
CoeffsType* pDCBuf = (CoeffsType*) (pQBuf + 16); // Used for both luma and chroma DC blocks
Ipp16s* pMassDiffBuf = (Ipp16s*) (pDCBuf + 16);
Ipp32s start_block = 16, last_block = 20;
pSrcPlane = m_pCurrentFrame->m_pUPlane;
pRef = pRefPicList0[0]->m_pUPlane + offset + curr_slice->m_InitialOffset[pFields0[0]];
for (Ipp32s i = 0; i < 2 ; i++) {
pRef += SubpelChromaMVAdjust(&chroma_skip_vec, pitchPixels, iXType, iYType, m_PicParamSet.chroma_format_idc);
#ifdef NEW_INTERPOLATE
Interpolate_Cr(pRef, pitchPixels, pInterpBuf, 16, iXType, iYType, size16x16, chroma_skip_vec.mvy, m_CurMB_Y*16);
#else
ippiInterpolateChroma_H264(pRef, pitchPixels*sizeof(PixType), pInterpBuf, 16*sizeof(PixType), iXType, iYType, size8x8, m_SeqParamSet.bit_depth_chroma);
#endif
ippiSumsDiff8x8Blocks4x4(pSrcPlane + offset, pitchPixels, pInterpBuf, 16, pDCBuf, pMassDiffBuf);
// 2x2 forward transform
if (!transform_bypass)
ippiTransformQuantChromaDC_H264(pDCBuf, pQBuf, chromaQP,&iNumCoeffs,(curr_slice->m_slice_type == INTRASLICE),1);
else
for (Ipp32s i = 0; i < 2; i++)
for (Ipp32s j = 0; j < 2; j++)
pDCBuf[i*2+j] = pMassDiffBuf[i*32 + j*16];
if (pDCBuf[0] || pDCBuf[1] || pDCBuf[2] || pDCBuf[3])
return false;
coeffs_cost = 0;
for (Ipp32s uBlock = start_block; uBlock < last_block; uBlock ++) {
Ipp16s* pTempDiffBuf = pMassDiffBuf+(uBlock - last_block + 4)*16;
if (!transform_bypass)
ippiTransformQuantResidual_H264(pTempDiffBuf, pTransformResult, chromaQP, &iNumCoeffs, 0, enc_single_scan[curr_slice->m_is_cur_mb_field], &iLastCoeff);
else
for (Ipp32s i = 0; i < 16; i++)
pTransformResult[i] = pTempDiffBuf[i];
coeffs_cost += CalculateCoeffsCost(pTransformResult, 15, &dec_single_scan[curr_slice->m_is_cur_mb_field][1]);
if (coeffs_cost >= 7)
return false;
offset += m_EncBlockOffsetInc[curr_slice->m_is_cur_mb_field][uBlock];
}
start_block = 20;
last_block = 24;
pSrcPlane = m_pCurrentFrame->m_pVPlane;
pRef = pRefPicList0[0]->m_pVPlane + offset + curr_slice->m_InitialOffset[pFields0[0]];
}
}
*puMBSAD = 0;
fill_n(curr_slice->m_cur_mb.MVs[LIST_0]->MotionVectors, 16, skip_vec);
fill_n(curr_slice->m_cur_mb.MVs[LIST_1]->MotionVectors, 16, null_mv);
fill_n(curr_slice->m_cur_mb.RefIdxs[LIST_0]->RefIdxs, 16, (T_RefIdx)0);
fill_n(curr_slice->m_cur_mb.RefIdxs[LIST_1]->RefIdxs, 16, (T_RefIdx)-1);
curr_slice->m_cur_mb.LocalMacroblockInfo->cbp_luma = 0x000000;
curr_slice->m_cur_mb.LocalMacroblockInfo->cbp_chroma = 0xffffffff;
curr_slice->m_cur_mb.LocalMacroblockInfo->cbp = 0;
curr_slice->m_cur_mb.GlobalMacroblockInfo->mbtype = MBTYPE_SKIPPED;
// cur_mb.GlobalMacroblockInfo->sbtype[0] = MBTYPE_SKIPPED;
return true;
}
////////////////////////////////////////////////////////////////////////////////
// CMEOneMB
//
// Main function to drive motion estimation for one macroblock.
////////////////////////////////////////////////////////////////////////////////
template <class PixType, class CoeffsType>
void H264CoreEncoder<PixType,CoeffsType>::CMEOneMB(H264EncoderThreadPrivateSlice<PixType, CoeffsType> *curr_slice,
Ipp32u uMB,
Ipp32u *puMBSAD
)
{
Ipp32s is_cur_mb_field = curr_slice->m_is_cur_mb_field;
H264EncoderFrame<PixType> **pRefPicList0 = GetRefPicList(curr_slice, LIST_0,is_cur_mb_field,uMB&1)->m_RefPicList;
H264EncoderFrame<PixType> **pRefPicList1 = GetRefPicList(curr_slice, LIST_1,is_cur_mb_field,uMB&1)->m_RefPicList;
Ipp8s *pFields0 = GetRefPicList(curr_slice, LIST_0,is_cur_mb_field,uMB&1)->m_Prediction;
Ipp8s *pFields1 = GetRefPicList(curr_slice, LIST_1,is_cur_mb_field,uMB&1)->m_Prediction;
const Ipp32u ME_MAX_SAD = INT_MAX >> 5; // Scaled down to allow sums of MAX without overflow.
Ipp32s iQP = getLumaQP51(curr_slice->m_cur_mb.LocalMacroblockInfo->QP, m_PicParamSet.bit_depth_luma);
Ipp16s* pRDQM = glob_RDQM[iQP];
Ipp32s iSearchHor, iSearchVer;
H264CurrentMacroblockDescriptor<PixType, CoeffsType> &cur_mb = curr_slice->m_cur_mb;
Ipp32u *pRefIndexTmp = (Ipp32u *)cur_mb.RefIdxs[LIST_1]->RefIdxs;
// pRefIndexTmp is 4-byte aligned for a MB, mb_width=(uSubBlockWidth>>2)
pRefIndexTmp[0] = 0;
pRefIndexTmp[1] = 0;
pRefIndexTmp[2] = 0;
pRefIndexTmp[3] = 0;
pRefIndexTmp = (Ipp32u *)cur_mb.RefIdxs[LIST_0]->RefIdxs;
// pRefIndexTmp is 4-byte aligned for a MB, mb_width=(uSubBlockWidth>>2)
pRefIndexTmp[0] = 0;
pRefIndexTmp[1] = 0;
pRefIndexTmp[2] = 0;
pRefIndexTmp[3] = 0;
ME_Info prev_me_info[41];
ME_Info futr_me_info[41];
// TODO ADB should throw down ref_idx to Estimate_Predictor function
for (Ipp32s block = 0; block < 41; block++)
{
prev_me_info[block].sad = ME_MAX_SAD;
prev_me_info[block].ref_idx = 0;
prev_me_info[block].mv = null_mv;
prev_me_info[block].predicted_mv = null_mv;
futr_me_info[block].sad = ME_MAX_SAD;
futr_me_info[block].ref_idx = 0;
futr_me_info[block].mv = null_mv;
futr_me_info[block].predicted_mv = null_mv;
}
iSearchHor = m_info.me_search_x;
iSearchVer = m_info.me_search_y;
if (BPREDSLICE == curr_slice->m_slice_type)
{
#ifdef H264_NEW_ME
*puMBSAD = ME_B(curr_slice);
#else
Ipp32s k;
Ipp32u SADFuture, SADPrev, sad_direct[4], SADDirectTotal, SADTemp;
H264MacroblockRefIdxs ref_idxs_direct[2];
H264MacroblockMVs mvs_direct[2];
// Not a real loop
// Permits "break" to bail early without a goto...
#ifndef BFRAME_NO_DIRECT_MODE
CDirectBOneMB_worker(curr_slice,
uMB,
sad_direct,
ref_idxs_direct,
mvs_direct);
// Sum up the 8x8 Distortions to get the distortion of the whole 16x16
SADDirectTotal = sad_direct[0] + sad_direct[1] + sad_direct[2] + sad_direct[3];
// Bias the SAD so that Direct B mode is chosen more often
if (SADDirectTotal >= (Ipp32u)(10 + BITS_COST(1, pRDQM)))
SADDirectTotal -= 10 + BITS_COST(1, pRDQM);
else
SADDirectTotal = 0;
#ifdef B_EARLY_EXIT
// skip forward and backward motion search if 16x16 Direct Mode is good enough.
if (SADDirectTotal < m_DirectBSkipMEThres[iQP])
{
Ipp32u cbr_direct;
// declare empty if SAD is below threshold
#ifndef NO_EMPTY_THRESH
if (SADDirectTotal < m_EmptyThreshold[iQP])
{
// mark all blocks as empty, excluding chroma
cbr_direct = 0xff0000;
}
else
#endif
cbr_direct = 0xffffff;
cur_mb.GlobalMacroblockInfo->mbtype = MBTYPE_DIRECT;
cur_mb.LocalMacroblockInfo->cbp_luma = cbr_direct & 0xffff;
cur_mb.LocalMacroblockInfo->cbp_chroma = ((cbr_direct & 0xff0000) == 0xff0000)? 0xffffffff: 0;
pSetMB8x8TSPackFlag(cur_mb.GlobalMacroblockInfo, m_SeqParamSet.direct_8x8_inference_flag && m_info.transform_8x8_mode_flag);
// populate the mvs and ref_idxs arrays
*cur_mb.RefIdxs[LIST_0] = ref_idxs_direct[LIST_0];
*cur_mb.RefIdxs[LIST_1] = ref_idxs_direct[LIST_1];
*cur_mb.MVs[LIST_0] = mvs_direct[LIST_0];
*cur_mb.MVs[LIST_1] = mvs_direct[LIST_1];
*puMBSAD = SADDirectTotal; // Set up to break out of loop
return;
}
#endif // B_EARLY_EXIT
#endif // #ifndef BFRAME_NO_DIRECT_MODE
SADFuture = SADPrev = SADTemp = *puMBSAD = ME_MAX_SAD;
// Start Integer Search
// Previous reference
// Initial MB_type
cur_mb.GlobalMacroblockInfo->mbtype = MBTYPE_FORWARD;
VM_ASSERT(curr_slice->m_NumRefsInL0List && curr_slice->m_NumRefsInL1List);
for (k = 0; k < curr_slice->m_NumRefsInL0List; k++)
{
CMEOneMB_worker(curr_slice, uMB, k, pRefPicList0[k], pFields0[k], iSearchHor, iSearchVer,
true, prev_me_info);
}
// Future reference
// Initial MB_type
cur_mb.GlobalMacroblockInfo->mbtype = MBTYPE_BACKWARD;
for (k = 0; k < curr_slice->m_NumRefsInL1List; k++)
{
CMEOneMB_worker(curr_slice, uMB, k, pRefPicList1[k], pFields1[k],iSearchHor, iSearchVer,
true, futr_me_info);
}
#if defined BFRAME_FORCE_FUTURE_REFERENCE
for (Ipp32s i = 0; i < 41; i++)
BestSADs[i] = ME_MAX_SAD;
#endif
#if defined BFRAME_FORCE_PREVIOUS_REFERENCE
for (Ipp32s i = 0; i < 41; i++)
BestSADs_Future[i] = ME_MAX_SAD;
#endif
// end integer search
// Start Subpel Search
// Split the B slice MB
CMESplitOneMB_B_Slice(curr_slice, uMB, &SADTemp, ref_idxs_direct, mvs_direct,
sad_direct, prev_me_info, futr_me_info);
// end subpel search
#if defined _DEBUG && defined BFRAME_PRINT_MVS
printf("MB %2d Previous: %3d,%3d %4d Future: %3d,%3d %4d\n", uMB,
prev_me_info[0].mv.mvx, prev_me_info[0].mv.mvy, SADPrev,
futr_me_info[0].mv.mvy, futr_me_info[0].mv.mvy, SADFuture);
#endif // BFRAME_PRINT_MVS
*puMBSAD = SADTemp;
#endif
} else { // P Slice
#ifdef H264_NEW_ME
*puMBSAD = ME_P(curr_slice);
#else
cur_mb.GlobalMacroblockInfo->mbtype = MBTYPE_INTER;
#ifdef P_EARLY_EXIT
H264MotionVector PredictedMV, mv_delta;
Calc_One_MV_Predictor(curr_slice, curr_slice->m_CurMBAddr, 0, false, 4, 4, &PredictedMV, &mv_delta, false);
if (IsCouldSkip(curr_slice, puMBSAD, PredictedMV))
return;
#endif
VM_ASSERT(curr_slice->m_NumRefsInL0List);
for (Ipp32s k = 0; k < curr_slice->m_NumRefsInL0List; k++) {
CMEOneMB_worker(curr_slice, uMB, k, pRefPicList0[k],pFields0[k], iSearchHor, iSearchVer, false, prev_me_info);
if (k == 0) {
#ifndef P_EARLY_EXIT
if (IsCouldSkip(curr_slice, puMBSAD, prev_me_info[SB_16x16].mv))
return;
#endif
}
}
CMESplitOneMB_P_Slice(curr_slice, uMB, puMBSAD, prev_me_info);
#endif
}
} // CMEOneMB
////////////////////////////////////////////////////////////////////////////////
// CMEOneMB_finder
////////////////////////////////////////////////////////////////////////////////
#define MVS_TABLE_SIZE 32
const Ipp32u ME_MAX_SAD = INT_MAX>>5;
template <class PixType> class MEVector
{
public:
Ipp32s x, y;
Ipp32u sads[4],s_sads; // 16 bits????? Is 16u sufficient?
MEVector()
{
x = y = 0;
sads[0] = sads[1] = sads[2] = sads[3] = ME_MAX_SAD;
};
void CalcOneMBSAD(const PixType *pRef, const PixType* pCur, Ipp32s pitchPixels, Ipp32s rx, Ipp32s ry);
};
template <class PixType> class Base_Search
{
public:
Base_Search(const H264MotionVector &mvp,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -