📄 umc_h264_me.cpp
字号:
if (uBestSAD16x16e < 0) {
uBestSAD16x16e = 0;
break;
}
#endif
for (Ipp32s i = 1; i < mv_num; i ++) {
sad_16x16 = MVConstraint((mvs_pred[i].mvx << SUB_PEL_SHIFT) - PredictedMV.mvx, (mvs_pred[i].mvy << SUB_PEL_SHIFT) - PredictedMV.mvy, pRDQM);
// This accounts for the factor that perfectly predicted MBs can lead to Skip Blocks in P Slices
//f if ((((mvs_pred[i].mvx << SUB_PEL_SHIFT) - PredictedMV.mvx) == 0) && (((mvs_pred[i].mvy << SUB_PEL_SHIFT) - PredictedMV.mvy) == 0) && !bBSlice)
//f sad_16x16 -= BITS_COST(5, glob_RDQM[iQP]);
if (sad_16x16 <= uBestSAD16x16e) {
sad_16x16_full = sad_16x16;
#ifdef NEW_INTERPOLATE
PixType *const pInterpBuf = curr_slice->m_pMBEncodeBuffer;
const PixType *pPrev1 = MVADJUST(pPrev, pitchPixels, mvs_pred[i].mvx, mvs_pred[i].mvy);
Interpolate(pPrev1, pitchPixels, pInterpBuf, 16, 0, 0, size16x16, mvs_pred[i].mvy << SUB_PEL_SHIFT, m_CurMB_Y*16, 0);
Ipp32s SAD;
ippiSAD16x16(pCurrent, pitch*sizeof(PixType), pInterpBuf, 16*sizeof(PixType), &SAD, 0);
sad_16x16_full += SAD;
#else
const PixType* pPrevSb = MVADJUST(pPrev, pitchPixels, mvs_pred[i].mvx, mvs_pred[i].mvy);
//sad_16x16_full += SAD8x8( pCurrent, pitchPixels*sizeof(PixType), pPrevSb, pitchPixels*sizeof(PixType));
//if( sad_16x16_full > uBestSAD16x16e ) continue;
//sad_16x16_full += SAD8x8( pCurrent+8, pitchPixels*sizeof(PixType), pPrevSb+8, pitchPixels*sizeof(PixType));
//if( sad_16x16_full > uBestSAD16x16e ) continue;
//sad_16x16_full += SAD8x8( pCurrent+(pitchPixels<<3), pitchPixels*sizeof(PixType), pPrevSb+(pitchPixels<<3), pitchPixels*sizeof(PixType));
//if( sad_16x16_full > uBestSAD16x16e ) continue;
//sad_16x16_full += SAD8x8( pCurrent+(pitchPixels<<3)+8, pitchPixels*sizeof(PixType), pPrevSb+(pitchPixels<<3)+8, pitchPixels*sizeof(PixType));
//if( sad_16x16_full > uBestSAD16x16e ) continue;
sad_16x16_full += SAD16x16(pCurrent, pitchPixels*sizeof(PixType), pPrevSb, pitchPixels*sizeof(PixType));;
#endif
if (sad_16x16_full < uBestSAD16x16e) {
uBestSAD16x16e = sad_16x16_full;
uBestSAD16x16NoRD = sad_16x16_full - sad_16x16;
BestMV16x16 = mvs_pred[i];
#if defined BESTOF5_EARLY_EXIT
// Consider the best of five mv is good enough if the effective SAD is less than the given threshold.
if (sad_16x16_full <= m_BestOf5EarlyExitThres[iQP])
break;
#endif
}
}
}
is_done = false; // No "Early exit" breaks taken
} while (0);
return is_done;
} // FindBestInitialMV
typedef struct {
Ipp8s d, u, r, l;
} durl_T;
// 8x8 block size data
static const durl_T durl8x8[] = {
{8, 0, 8, 0},
{8, 0, 0, 8},
{0, 8, 8, 0},
{0, 8, 0, 8}
};
// 4x4 block size data
static const durl_T durl4x4[] = {
{12, 0, 12, 0},
{12, 0, 8, 4},
{12, 0, 4, 8},
{12, 0, 0, 12},
{ 8, 4, 12, 0},
{ 8, 4, 8, 4},
{ 8, 4, 4, 8},
{ 8, 4, 0, 12},
{ 4, 8, 12, 0},
{ 4, 8, 8, 4},
{ 4, 8, 4, 8},
{ 4, 8, 0, 12},
{ 0, 12, 12, 0},
{ 0, 12, 8, 4},
{ 0, 12, 4, 8},
{ 0, 12, 0, 12},
}; // durl4x4[]
// Sub-pel search structure
// spInterpFnx: horiz index to interpolation function pointer table
// spInterpFny: vert index to interpolation function pointer table
// spInterpFnx,spInterpFny:
// 1,1 2,1 3,1 0,1 1,1 2,1 3,1
// 1,2 2,2 3,2 0,2 1,2 2,2 3,2
// 1,3 2,3 3,3 0,3 1,3 2,3 3,3
// 1,0 2,0 3,0 X 1,0 2,0 3,0 X is the integer pel location
// 1,1 2,1 3,1 0,1 1,1 2,1 3,1
// 1,2 2,2 3,2 0,2 1,2 2,2 3,2
// 1,3 2,3 3,3 0,3 1,3 2,3 3,3
// MVx: x-component of sub-pel vector relative to integer position
// MVy: y-component of sub-pel vector relative to integer position
// MVxoff: x-component offset relative to sub-pel plane with origin in the lower-right corner
// MVyoff: y-component offset relative to sub-pel plane with origin in the lower-right corner
typedef struct {
Ipp8s spMVx, spMVy, spMVxoff, spMVyoff;
Ipp32u spInterpFnx;
Ipp32u spInterpFny;
} subpel_T;
static const subpel_T spList[48] = {
{-2, 0, -1, 0, 2, 0}, {+2, 0, 0, 0, 2, 0},
{ 0, -2, 0, -1, 0, 2}, { 0, +2, 0, 0, 0, 2},
{-2, -2, -1, -1, 2, 2}, {-2, +2, -1, 0, 2, 2},
{+2, -2, 0, -1, 2, 2}, {+2, +2, 0, 0, 2, 2},
{-3, 0, -1, 0, 1, 0}, {+1, 0, 0, 0, 1, 0},
{-1, 0, -1, 0, 3, 0}, {+3, 0, 0, 0, 3, 0},
{-3, -2, -1, -1, 1, 2}, {+1, -2, 0, -1, 1, 2}, {-3, +2, -1, 0, 1, 2}, {+1, +2, 0, 0, 1, 2},
{-1, -2, -1, -1, 3, 2}, {+3, -2, 0, -1, 3, 2}, {-1, +2, -1, 0, 3, 2}, {+3, +2, 0, 0, 3, 2},
{-3, -3 -1, -1, 1, 1}, {+1, -3, 0, -1, 1, 1}, {-3, +1, -1, 0, 1, 1}, {+1, +1, 0, 0, 1, 1},
{-2, -3, -1, -1, 2, 1}, {+2, -3, 0, -1, 2, 1}, {-2, +1, -1, 0, 2, 1}, {+2, +1, 0, 0, 2, 1},
{-1, -3, -1, -1, 3, 1}, {+3, -3, 0, -1, 3, 1}, {-1, +1, -1, 0, 3, 1}, {+3, +1, 0, 0, 3, 1},
{-3, -1, -1, -1, 1, 3}, {+1, -1, 0, -1, 1, 3}, {-3, +3, -1, 0, 1, 3}, {+1, +3, 0, 0, 1, 3},
{-2, -1, -1, -1, 2, 3}, {+2, -1, 0, -1, 2, 3}, {-2, +3, -1, 0, 2, 3}, {+2, +3, 0, 0, 2, 3},
{-1, -1, -1, -1, 3, 3}, {+3, -1, 0, -1, 3, 3}, {-1, +3, -1, 0, 3, 3}, {+3, +3, 0, 0, 3, 3},
{ 0, -3, 0, -1, 0, 1}, { 0, +1, 0, 0, 0, 1},
{ 0, -1, 0, -1, 0, 3}, { 0, +3, 0, 0, 0, 3},
};
static const subpel_T spConjList[80] = {
{-2, 0, -1, 0, 2, 0}, // Step 1 L // 0
{+2, 0, 0, 0, 2, 0}, // Step 1 R // 1
// ----
{ 0, -2, 0, -1, 0, 2}, // Step 2 CU // 2 = 0*2 + 2
{ 0, +2, 0, 0, 0, 2}, // Step 2 CD // 3 = 0*2 + 2 + 1
{-2, -2, -1, -1, 2, 2}, // Step 2 LU // 4 = 1*2 + 2
{-2, +2, -1, 0, 2, 2}, // Step 2 LD // 5 = 1*2 + 2 + 1
{+2, -2, 0, -1, 2, 2}, // Step 2 RU // 6 = 2*2 + 2
{+2, +2, 0, 0, 2, 2}, // Step 2 RD // 7 = 2*2 + 2 + 1
// ----
{-1, 0, -1, 0, 3, 0}, // Step 3 CCL // 8 = (0*6) + (0*2) + 8
{+1, 0, 0, 0, 1, 0}, // Step 3 CCR // 9 = (0*6) + (0*2) + 8 + 1
{-1, -2, -1, -1, 3, 2}, // Step 3 CUL // 10 = (0*6) + (1*2) + 8
{+1, -2, 0, -1, 1, 2}, // Step 3 CUR // 11 = (0*6) + (1*2) + 8 + 1
{-1, +2, -1, 0, 3, 2}, // Step 3 CDL // 12 = (0*6) + (2*2) + 8
{+1, +2, 0, 0, 1, 2}, // Step 3 CDR // 13 = (0*6) + (2*2) + 8 + 1
{-3, 0, -1, 0, 1, 0}, // Step 3 LCL // 14 = (1*6) + (0*2) + 8
{-1, 0, -1, 0, 3, 0}, // Step 3 LCR // 15 = (1*6) + (0*2) + 8 + 1
{-3, -2, -1, -1, 1, 2}, // Step 3 LUL // 16 = (1*6) + (1*2) + 8
{-1, -2, -1, -1, 3, 2}, // Step 3 LUR // 17 = (1*6) + (1*2) + 8 + 1
{-3, +2, -1, 0, 1, 2}, // Step 3 LDL // 18 = (1*6) + (2*2) + 8
{-1, +2, -1, 0, 3, 2}, // Step 3 LDR // 19 = (1*6) + (2*2) + 8 + 1
{+1, 0, 0, 0, 1, 0}, // Step 3 RCL // 20 = (2*6) + (0*2) + 8
{+3, 0, 0, 0, 3, 0}, // Step 3 RCR // 21 = (2*6) + (0*2) + 8 + 1
{+1, -2, 0, -1, 1, 2}, // Step 3 RUL // 22 = (2*6) + (1*2) + 8
{+3, -2, 0, -1, 3, 2}, // Step 3 RUR // 23 = (2*6) + (1*2) + 8 + 1
{+1, +2, 0, 0, 1, 2}, // Step 3 RDL // 24 = (2*6) + (2*2) + 8
{+3, +2, 0, 0, 3, 2}, // Step 3 RDR // 25 = (2*6) + (2*2) + 8 + 1
// ----
{ 0, -1, 0, -1, 0, 3}, // Step 4 CCCU // 26 = (0*18) + (0*6) + (0*2) + 26
{ 0, +1, 0, 0, 0, 1}, // Step 4 CCCD // 27 = (0*18) + (0*6) + (0*2) + 26 + 1
{-1, -1, -1, -1, 3, 3}, // Step 4 CCLU // 28 = (0*18) + (0*6) + (1*2) + 26
{-1, +1, -1, 0, 3, 1}, // Step 4 CCLD // 29 = (0*18) + (0*6) + (1*2) + 26 + 1
{+1, -1, 0, -1, 1, 3}, // Step 4 CCRU // 30 = (0*18) + (0*6) + (2*2) + 26
{+1, +1, 0, 0, 1, 1}, // Step 4 CCRD // 31 = (0*18) + (0*6) + (2*2) + 26 + 1
{ 0, -3, 0, -1, 0, 1}, // Step 4 CUCU // 32 = (0*18) + (1*6) + (0*2) + 26
{ 0, -1, 0, -1, 0, 3}, // Step 4 CUCD // 33 = (0*18) + (1*6) + (0*2) + 26 + 1
{-1, -3, -1, -1, 3, 1}, // Step 4 CULU // 34 = (0*18) + (1*6) + (1*2) + 26
{-1, -1, -1, -1, 3, 3}, // Step 4 CULD // 35 = (0*18) + (1*6) + (1*2) + 26 + 1
{+1, -3, 0, -1, 1, 1}, // Step 4 CURU // 36 = (0*18) + (1*6) + (2*2) + 26
{+1, -1, 0, -1, 1, 3}, // Step 4 CURD // 37 = (0*18) + (1*6) + (2*2) + 26 + 1
{ 0, +1, 0, 0, 0, 1}, // Step 4 CDCU // 38 = (0*18) + (2*6) + (0*2) + 26
{ 0, +3, 0, 0, 0, 3}, // Step 4 CDCD // 39 = (0*18) + (2*6) + (0*2) + 26 + 1
{-1, +1, -1, 0, 3, 1}, // Step 4 CDLU // 40 = (0*18) + (2*6) + (1*2) + 26
{-1, +3, -1, 0, 3, 3}, // Step 4 CDLD // 41 = (0*18) + (2*6) + (1*2) + 26 + 1
{+1, +1, 0, 0, 1, 1}, // Step 4 CDRU // 42 = (0*18) + (2*6) + (2*2) + 26
{+1, +3, 0, 0, 1, 3}, // Step 4 CDRD // 43 = (0*18) + (2*6) + (2*2) + 26 + 1
{-2, -1, -1, -1, 2, 3}, // Step 4 LCCU // 44 = (1*18) + (0*6) + (0*2) + 26
{-2, +1, -1, 0, 2, 1}, // Step 4 LCCD // 45 = (1*18) + (0*6) + (0*2) + 26 + 1
{-3, -1, -1, -1, 1, 3}, // Step 4 LCLU // 46 = (1*18) + (0*6) + (1*2) + 26
{-3, +1, -1, 0, 1, 1}, // Step 4 LCLD // 47 = (1*18) + (0*6) + (1*2) + 26 + 1
{-1, -1, -1, -1, 3, 3}, // Step 4 LCRU // 48 = (1*18) + (0*6) + (2*2) + 26
{-1, +1, -1, 0, 3, 1}, // Step 4 LCRD // 49 = (1*18) + (0*6) + (2*2) + 26 + 1
{-2, -3, -1, -1, 2, 1}, // Step 4 LUCU // 50 = (1*18) + (1*6) + (0*2) + 26
{-2, -1, -1, -1, 2, 3}, // Step 4 LUCD // 51 = (1*18) + (1*6) + (0*2) + 26 + 1
{-3, -3 -1, -1, 1, 1}, // Step 4 LULU // 52 = (1*18) + (1*6) + (1*2) + 26
{-3, -1, -1, -1, 1, 3}, // Step 4 LULD // 53 = (1*18) + (1*6) + (1*2) + 26 + 1
{-1, -3, -1, -1, 3, 1}, // Step 4 LURU // 54 = (1*18) + (1*6) + (2*2) + 26
{-1, -1, -1, -1, 3, 3}, // Step 4 LURD // 55 = (1*18) + (1*6) + (2*2) + 26 + 1
{-2, +1, -1, 0, 2, 1}, // Step 4 LDCU // 56 = (1*18) + (2*6) + (0*2) + 26
{-2, +3, -1, 0, 2, 3}, // Step 4 LDCD // 57 = (1*18) + (2*6) + (0*2) + 26 + 1
{-3, +1, -1, 0, 1, 1}, // Step 4 LDLU // 58 = (1*18) + (2*6) + (1*2) + 26
{-3, +3, -1, 0, 1, 3}, // Step 4 LDLD // 59 = (1*18) + (2*6) + (1*2) + 26 + 1
{-1, +1, -1, 0, 3, 1}, // Step 4 LDRU // 60 = (1*18) + (2*6) + (2*2) + 26
{-1, +3, -1, 0, 3, 3}, // Step 4 LDRD // 61 = (1*18) + (2*6) + (2*2) + 26 + 1
{+2, -1, 0, -1, 2, 3}, // Step 4 RCCU // 62 = (2*18) + (0*6) + (0*2) + 26
{+2, +1, 0, 0, 2, 1}, // Step 4 RCCD // 63 = (2*18) + (0*6) + (0*2) + 26 + 1
{+1, -1, 0, -1, 1, 3}, // Step 4 RCLU // 64 = (2*18) + (0*6) + (1*2) + 26
{+1, +1, 0, 0, 1, 1}, // Step 4 RCLD // 65 = (2*18) + (0*6) + (1*2) + 26 + 1
{+3, -1, 0, -1, 3, 3}, // Step 4 RCRU // 66 = (2*18) + (0*6) + (2*2) + 26
{+3, +1, 0, 0, 3, 1}, // Step 4 RCRD // 67 = (2*18) + (0*6) + (2*2) + 26 + 1
{+2, -3, 0, -1, 2, 1}, // Step 4 RUCU // 68 = (2*18) + (1*6) + (0*2) + 26
{+2, -1, 0, -1, 2, 3}, // Step 4 RUCD // 69 = (2*18) + (1*6) + (0*2) + 26 + 1
{+1, -3, 0, -1, 1, 1}, // Step 4 RULU // 70 = (2*18) + (1*6) + (1*2) + 26
{+1, -1, 0, -1, 1, 3}, // Step 4 RULD // 71 = (2*18) + (1*6) + (1*2) + 26 + 1
{+3, -3, 0, -1, 3, 1}, // Step 4 RURU // 72 = (2*18) + (1*6) + (2*2) + 26
{+3, -1, 0, -1, 3, 3}, // Step 4 RURD // 73 = (2*18) + (1*6) + (2*2) + 26 + 1
{+2, +1, 0, 0, 2, 1}, // Step 4 RDCU // 74 = (2*18) + (2*6) + (0*2) + 26
{+2, +3, 0, 0, 2, 3}, // Step 4 RDCD // 75 = (2*18) + (2*6) + (0*2) + 26 + 1
{+1, +1, 0, 0, 1, 1}, // Step 4 RDLU // 76 = (2*18) + (2*6) + (1*2) + 26
{+1, +3, 0, 0, 1, 3}, // Step 4 RDLD // 77 = (2*18) + (2*6) + (1*2) + 26 + 1
{+3, +1, 0, 0, 3, 1}, // Step 4 RDRU // 78 = (2*18) + (2*6) + (2*2) + 26
{+3, +3, 0, 0, 3, 3}, // Step 4 RDRD // 79 = (2*18) + (2*6) + (2*2) + 26 + 1
};
// Defines for the various subblock constants
#define SB_16x16 40
#define SB_16x8_B 39
#define SB_16x8_T 38
#define SB_8x16_R 37
#define SB_8x16_L 36
#define SB_8x8_TL 32
#define SB_8x4_TL 24
#define SB_4x8_TL 16
#define SB_4x4_TL 0
static const Ipp8s BlockList8x8[5][5] =
{
{0, 2, -1, -1, -1}, // 2 8x16 Blocks - 36
{1, 3, -1, -1, -1},
{0, 1, -1, -1, -1}, // 2 16x8 Blocks - 38
{2, 3, -1, -1, -1},
{0, 1, 2, 3, -1} // 1 16x16 Block - 40
};
template <class PixType, class CoeffsType>
bool H264CoreEncoder<PixType,CoeffsType>::IsCouldSkip(H264EncoderThreadPrivateSlice<PixType, CoeffsType> *curr_slice, Ipp32u *puMBSAD, H264MotionVector &cmpMV)
{
// NO 422 support!!!!!
/*if (m_pCurrentFrame->m_mbinfo.mbs[curr_slice->m_cur_mb.CurrentMacroblockNeighbours.mb_A].mbtype != MBTYPE_SKIPPED
&& m_pCurrentFrame->m_mbinfo.mbs[curr_slice->m_cur_mb.CurrentMacroblockNeighbours.mb_B].mbtype != MBTYPE_SKIPPED
&& m_pCurrentFrame->m_mbinfo.mbs[curr_slice->m_cur_mb.CurrentMacroblockNeighbours.mb_C].mbtype != MBTYPE_SKIPPED
&& m_pCurrentFrame->m_mbinfo.mbs[curr_slice->m_cur_mb.CurrentMacroblockNeighbours.mb_D].mbtype != MBTYPE_SKIPPED)
return false;*/
H264MotionVector skip_vec;
Skip_MV_Predicted(curr_slice, curr_slice->m_CurMBAddr, &skip_vec);
if ((abs(cmpMV.mvx - skip_vec.mvx) > 1) || (abs(cmpMV.mvy - skip_vec.mvy) > 1))
return false;
H264EncoderFrame<PixType> **pRefPicList0 = GetRefPicList(curr_slice, LIST_0, curr_slice->m_is_cur_mb_field, curr_slice->m_CurMBAddr&1)->m_RefPicList;
Ipp8s *pFields0 = GetRefPicList(curr_slice, LIST_0, curr_slice->m_is_cur_mb_field, curr_slice->m_CurMBAddr&1)->m_Prediction;
PixType *const pInterpBuf = curr_slice->m_pMBEncodeBuffer;
Ipp16s* pDiffBuf = (Ipp16s*) (curr_slice->m_pMBEncodeBuffer + 512);
CoeffsType *pTransformResult = (CoeffsType*)(pDiffBuf + 16);
T_EncodeMBOffsets *pMBOffset = &m_pMBOffsets[curr_slice->m_CurMBAddr];
Ipp32s pitchPixels = m_pCurrentFrame->pitchPixels()<<curr_slice->m_is_cur_mb_field;
PixType* pSrcPlane = m_pCurrentFrame->m_pYPlane;
Ipp32u offset = m_pMBOffsets[curr_slice->m_CurMBAddr].uLumaOffset[m_is_cur_pic_afrm][curr_slice->m_is_cur_mb_field];
Ipp32s iNumCoeffs, iLastCoeff, iXType, iYType;
PixType *pRef = pRefPicList0[0]->m_pYPlane + offset + curr_slice->m_InitialOffset[pFields0[0]];
pRef += SubpelMVAdjust(&skip_vec, pitchPixels, iXType, iYType);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -