📄 umc_h264_me.cpp

📁 audio-video-codecs.rar语音编解码器
💻 CPP
📖 第 1 页 / 共 5 页
字号:
        if (uBestSAD16x16e < 0) {
            uBestSAD16x16e = 0;
            break;
        }
#endif
        for (Ipp32s i = 1; i < mv_num; i ++) {
            sad_16x16 = MVConstraint((mvs_pred[i].mvx << SUB_PEL_SHIFT) - PredictedMV.mvx, (mvs_pred[i].mvy << SUB_PEL_SHIFT) - PredictedMV.mvy, pRDQM);
            // This accounts for the factor that perfectly predicted MBs can lead to Skip Blocks in P Slices
//f            if ((((mvs_pred[i].mvx << SUB_PEL_SHIFT) - PredictedMV.mvx) == 0) && (((mvs_pred[i].mvy << SUB_PEL_SHIFT) - PredictedMV.mvy) == 0) && !bBSlice)
//f                sad_16x16 -= BITS_COST(5, glob_RDQM[iQP]);
            if (sad_16x16 <= uBestSAD16x16e) {
                sad_16x16_full = sad_16x16;
#ifdef NEW_INTERPOLATE
                PixType *const pInterpBuf = curr_slice->m_pMBEncodeBuffer;
                const PixType *pPrev1 = MVADJUST(pPrev, pitchPixels, mvs_pred[i].mvx, mvs_pred[i].mvy);
                Interpolate(pPrev1, pitchPixels, pInterpBuf, 16, 0, 0, size16x16, mvs_pred[i].mvy << SUB_PEL_SHIFT, m_CurMB_Y*16, 0);
                Ipp32s SAD;
                ippiSAD16x16(pCurrent, pitch*sizeof(PixType), pInterpBuf, 16*sizeof(PixType), &SAD, 0);
                sad_16x16_full += SAD;
#else
                const PixType* pPrevSb = MVADJUST(pPrev, pitchPixels, mvs_pred[i].mvx, mvs_pred[i].mvy);
                //sad_16x16_full += SAD8x8( pCurrent, pitchPixels*sizeof(PixType), pPrevSb, pitchPixels*sizeof(PixType));
                //if( sad_16x16_full > uBestSAD16x16e ) continue;
                //sad_16x16_full += SAD8x8( pCurrent+8, pitchPixels*sizeof(PixType), pPrevSb+8, pitchPixels*sizeof(PixType));
                //if( sad_16x16_full > uBestSAD16x16e ) continue;
                //sad_16x16_full += SAD8x8( pCurrent+(pitchPixels<<3), pitchPixels*sizeof(PixType), pPrevSb+(pitchPixels<<3), pitchPixels*sizeof(PixType));
                //if( sad_16x16_full > uBestSAD16x16e ) continue;
                //sad_16x16_full += SAD8x8( pCurrent+(pitchPixels<<3)+8, pitchPixels*sizeof(PixType), pPrevSb+(pitchPixels<<3)+8, pitchPixels*sizeof(PixType));
                //if( sad_16x16_full > uBestSAD16x16e ) continue;
                sad_16x16_full += SAD16x16(pCurrent, pitchPixels*sizeof(PixType), pPrevSb, pitchPixels*sizeof(PixType));;
#endif
                if (sad_16x16_full < uBestSAD16x16e) {
                    uBestSAD16x16e = sad_16x16_full;
                    uBestSAD16x16NoRD = sad_16x16_full - sad_16x16;
                    BestMV16x16 = mvs_pred[i];
#if defined BESTOF5_EARLY_EXIT
                    // Consider the best of five mv is good enough if the effective SAD is less than the given threshold.
                    if (sad_16x16_full <= m_BestOf5EarlyExitThres[iQP])
                        break;
#endif
                }
            }
        }
        is_done = false;  // No "Early exit" breaks taken
    } while (0);

    return is_done;
}   // FindBestInitialMV

typedef struct {
    Ipp8s d, u, r, l;
} durl_T;

// 8x8 block size data
static const durl_T durl8x8[] = {
    {8, 0, 8, 0},
    {8, 0, 0, 8},
    {0, 8, 8, 0},
    {0, 8, 0, 8}
};

// 4x4 block size data
static const durl_T durl4x4[] = {
    {12,  0, 12,  0},
    {12,  0,  8,  4},
    {12,  0,  4,  8},
    {12,  0,  0, 12},
    { 8,  4, 12,  0},
    { 8,  4,  8,  4},
    { 8,  4,  4,  8},
    { 8,  4,  0, 12},
    { 4,  8, 12,  0},
    { 4,  8,  8,  4},
    { 4,  8,  4,  8},
    { 4,  8,  0, 12},
    { 0, 12, 12,  0},
    { 0, 12,  8,  4},
    { 0, 12,  4,  8},
    { 0, 12,  0, 12},
};  // durl4x4[]

// Sub-pel search structure
//  spInterpFnx: horiz index to interpolation function pointer table
//  spInterpFny: vert index to interpolation function pointer table
//  spInterpFnx,spInterpFny:

//   1,1  2,1  3,1  0,1  1,1  2,1  3,1
//   1,2  2,2  3,2  0,2  1,2  2,2  3,2
//   1,3  2,3  3,3  0,3  1,3  2,3  3,3
//   1,0  2,0  3,0   X   1,0  2,0  3,0      X is the integer pel location
//   1,1  2,1  3,1  0,1  1,1  2,1  3,1
//   1,2  2,2  3,2  0,2  1,2  2,2  3,2
//   1,3  2,3  3,3  0,3  1,3  2,3  3,3

//  MVx: x-component of sub-pel vector relative to integer position
//  MVy: y-component of sub-pel vector relative to integer position
//  MVxoff: x-component offset relative to sub-pel plane with origin in the lower-right corner
//  MVyoff: y-component offset relative to sub-pel plane with origin in the lower-right corner

typedef struct {
    Ipp8s spMVx, spMVy, spMVxoff, spMVyoff;
    Ipp32u spInterpFnx;
    Ipp32u spInterpFny;
} subpel_T;

static const subpel_T spList[48] = {

    {-2,  0, -1,  0, 2, 0}, {+2,  0,  0,  0, 2, 0},

    { 0, -2,  0, -1, 0, 2}, { 0, +2,  0,  0, 0, 2},

    {-2, -2, -1, -1, 2, 2}, {-2, +2, -1,  0, 2, 2},

    {+2, -2,  0, -1, 2, 2}, {+2, +2,  0,  0, 2, 2},

    {-3,  0, -1,  0, 1, 0}, {+1,  0,  0,  0, 1, 0},

    {-1,  0, -1,  0, 3, 0}, {+3,  0,  0,  0, 3, 0},

    {-3, -2, -1, -1, 1, 2}, {+1, -2,  0, -1, 1, 2}, {-3, +2, -1,  0, 1, 2}, {+1, +2,  0,  0, 1, 2},

    {-1, -2, -1, -1, 3, 2}, {+3, -2,  0, -1, 3, 2}, {-1, +2, -1,  0, 3, 2}, {+3, +2,  0,  0, 3, 2},

    {-3, -3  -1, -1, 1, 1}, {+1, -3,  0, -1, 1, 1}, {-3, +1, -1,  0, 1, 1}, {+1, +1,  0,  0, 1, 1},

    {-2, -3, -1, -1, 2, 1}, {+2, -3,  0, -1, 2, 1}, {-2, +1, -1,  0, 2, 1}, {+2, +1,  0,  0, 2, 1},

    {-1, -3, -1, -1, 3, 1}, {+3, -3,  0, -1, 3, 1}, {-1, +1, -1,  0, 3, 1}, {+3, +1,  0,  0, 3, 1},

    {-3, -1, -1, -1, 1, 3}, {+1, -1,  0, -1, 1, 3}, {-3, +3, -1,  0, 1, 3}, {+1, +3,  0,  0, 1, 3},

    {-2, -1, -1, -1, 2, 3}, {+2, -1,  0, -1, 2, 3}, {-2, +3, -1,  0, 2, 3}, {+2, +3,  0,  0, 2, 3},

    {-1, -1, -1, -1, 3, 3}, {+3, -1,  0, -1, 3, 3}, {-1, +3, -1,  0, 3, 3}, {+3, +3,  0,  0, 3, 3},

    { 0, -3,  0, -1, 0, 1}, { 0, +1,  0,  0, 0, 1},

    { 0, -1,  0, -1, 0, 3}, { 0, +3,  0,  0, 0, 3},
};

static const subpel_T spConjList[80] = {

    {-2,  0, -1,  0, 2, 0}, // Step 1 L  // 0
    {+2,  0,  0,  0, 2, 0}, // Step 1 R  // 1
// ----
    { 0, -2,  0, -1, 0, 2}, // Step 2 CU  // 2 = 0*2 + 2
    { 0, +2,  0,  0, 0, 2}, // Step 2 CD  // 3 = 0*2 + 2 + 1

    {-2, -2, -1, -1, 2, 2}, // Step 2 LU  // 4 = 1*2 + 2
    {-2, +2, -1,  0, 2, 2}, // Step 2 LD  // 5 = 1*2 + 2 + 1

    {+2, -2,  0, -1, 2, 2}, // Step 2 RU  // 6 = 2*2 + 2
    {+2, +2,  0,  0, 2, 2}, // Step 2 RD  // 7 = 2*2 + 2 + 1
// ----
    {-1,  0, -1,  0, 3, 0}, // Step 3 CCL // 8 = (0*6) + (0*2) + 8
    {+1,  0,  0,  0, 1, 0}, // Step 3 CCR // 9 = (0*6) + (0*2) + 8 + 1

    {-1, -2, -1, -1, 3, 2}, // Step 3 CUL // 10 = (0*6) + (1*2) + 8
    {+1, -2,  0, -1, 1, 2}, // Step 3 CUR // 11 = (0*6) + (1*2) + 8 + 1

    {-1, +2, -1,  0, 3, 2}, // Step 3 CDL // 12 = (0*6) + (2*2) + 8
    {+1, +2,  0,  0, 1, 2}, // Step 3 CDR // 13 = (0*6) + (2*2) + 8 + 1

    {-3,  0, -1,  0, 1, 0}, // Step 3 LCL // 14 = (1*6) + (0*2) + 8
    {-1,  0, -1,  0, 3, 0}, // Step 3 LCR // 15 = (1*6) + (0*2) + 8 + 1

    {-3, -2, -1, -1, 1, 2}, // Step 3 LUL // 16 = (1*6) + (1*2) + 8
    {-1, -2, -1, -1, 3, 2}, // Step 3 LUR // 17 = (1*6) + (1*2) + 8 + 1

    {-3, +2, -1,  0, 1, 2}, // Step 3 LDL // 18 = (1*6) + (2*2) + 8
    {-1, +2, -1,  0, 3, 2}, // Step 3 LDR // 19 = (1*6) + (2*2) + 8 + 1

    {+1,  0,  0,  0, 1, 0}, // Step 3 RCL // 20 = (2*6) + (0*2) + 8
    {+3,  0,  0,  0, 3, 0}, // Step 3 RCR // 21 = (2*6) + (0*2) + 8 + 1

    {+1, -2,  0, -1, 1, 2}, // Step 3 RUL // 22 = (2*6) + (1*2) + 8
    {+3, -2,  0, -1, 3, 2}, // Step 3 RUR // 23 = (2*6) + (1*2) + 8 + 1

    {+1, +2,  0,  0, 1, 2}, // Step 3 RDL // 24 = (2*6) + (2*2) + 8
    {+3, +2,  0,  0, 3, 2}, // Step 3 RDR // 25 = (2*6) + (2*2) + 8 + 1
// ----
    { 0, -1,  0, -1, 0, 3}, // Step 4 CCCU // 26 = (0*18) + (0*6) + (0*2) + 26
    { 0, +1,  0,  0, 0, 1}, // Step 4 CCCD // 27 = (0*18) + (0*6) + (0*2) + 26 + 1

    {-1, -1, -1, -1, 3, 3}, // Step 4 CCLU // 28 = (0*18) + (0*6) + (1*2) + 26
    {-1, +1, -1,  0, 3, 1}, // Step 4 CCLD // 29 = (0*18) + (0*6) + (1*2) + 26 + 1

    {+1, -1,  0, -1, 1, 3}, // Step 4 CCRU // 30 = (0*18) + (0*6) + (2*2) + 26
    {+1, +1,  0,  0, 1, 1}, // Step 4 CCRD // 31 = (0*18) + (0*6) + (2*2) + 26 + 1

    { 0, -3,  0, -1, 0, 1}, // Step 4 CUCU // 32 = (0*18) + (1*6) + (0*2) + 26
    { 0, -1,  0, -1, 0, 3}, // Step 4 CUCD // 33 = (0*18) + (1*6) + (0*2) + 26 + 1

    {-1, -3, -1, -1, 3, 1}, // Step 4 CULU // 34 = (0*18) + (1*6) + (1*2) + 26
    {-1, -1, -1, -1, 3, 3}, // Step 4 CULD // 35 = (0*18) + (1*6) + (1*2) + 26 + 1

    {+1, -3,  0, -1, 1, 1}, // Step 4 CURU // 36 = (0*18) + (1*6) + (2*2) + 26
    {+1, -1,  0, -1, 1, 3}, // Step 4 CURD // 37 = (0*18) + (1*6) + (2*2) + 26 + 1

    { 0, +1,  0,  0, 0, 1}, // Step 4 CDCU // 38 = (0*18) + (2*6) + (0*2) + 26
    { 0, +3,  0,  0, 0, 3}, // Step 4 CDCD // 39 = (0*18) + (2*6) + (0*2) + 26 + 1

    {-1, +1, -1,  0, 3, 1}, // Step 4 CDLU // 40 = (0*18) + (2*6) + (1*2) + 26
    {-1, +3, -1,  0, 3, 3}, // Step 4 CDLD // 41 = (0*18) + (2*6) + (1*2) + 26 + 1

    {+1, +1,  0,  0, 1, 1}, // Step 4 CDRU // 42 = (0*18) + (2*6) + (2*2) + 26
    {+1, +3,  0,  0, 1, 3}, // Step 4 CDRD // 43 = (0*18) + (2*6) + (2*2) + 26 + 1

    {-2, -1, -1, -1, 2, 3}, // Step 4 LCCU // 44 = (1*18) + (0*6) + (0*2) + 26
    {-2, +1, -1,  0, 2, 1}, // Step 4 LCCD // 45 = (1*18) + (0*6) + (0*2) + 26 + 1

    {-3, -1, -1, -1, 1, 3}, // Step 4 LCLU // 46 = (1*18) + (0*6) + (1*2) + 26
    {-3, +1, -1,  0, 1, 1}, // Step 4 LCLD // 47 = (1*18) + (0*6) + (1*2) + 26 + 1

    {-1, -1, -1, -1, 3, 3}, // Step 4 LCRU // 48 = (1*18) + (0*6) + (2*2) + 26
    {-1, +1, -1,  0, 3, 1}, // Step 4 LCRD // 49 = (1*18) + (0*6) + (2*2) + 26 + 1

    {-2, -3, -1, -1, 2, 1}, // Step 4 LUCU // 50 = (1*18) + (1*6) + (0*2) + 26
    {-2, -1, -1, -1, 2, 3}, // Step 4 LUCD // 51 = (1*18) + (1*6) + (0*2) + 26 + 1

    {-3, -3  -1, -1, 1, 1}, // Step 4 LULU // 52 = (1*18) + (1*6) + (1*2) + 26
    {-3, -1, -1, -1, 1, 3}, // Step 4 LULD // 53 = (1*18) + (1*6) + (1*2) + 26 + 1

    {-1, -3, -1, -1, 3, 1}, // Step 4 LURU // 54 = (1*18) + (1*6) + (2*2) + 26
    {-1, -1, -1, -1, 3, 3}, // Step 4 LURD // 55 = (1*18) + (1*6) + (2*2) + 26 + 1

    {-2, +1, -1,  0, 2, 1}, // Step 4 LDCU // 56 = (1*18) + (2*6) + (0*2) + 26
    {-2, +3, -1,  0, 2, 3}, // Step 4 LDCD // 57 = (1*18) + (2*6) + (0*2) + 26 + 1

    {-3, +1, -1,  0, 1, 1}, // Step 4 LDLU // 58 = (1*18) + (2*6) + (1*2) + 26
    {-3, +3, -1,  0, 1, 3}, // Step 4 LDLD // 59 = (1*18) + (2*6) + (1*2) + 26 + 1

    {-1, +1, -1,  0, 3, 1}, // Step 4 LDRU // 60 = (1*18) + (2*6) + (2*2) + 26
    {-1, +3, -1,  0, 3, 3}, // Step 4 LDRD // 61 = (1*18) + (2*6) + (2*2) + 26 + 1

    {+2, -1,  0, -1, 2, 3}, // Step 4 RCCU // 62 = (2*18) + (0*6) + (0*2) + 26
    {+2, +1,  0,  0, 2, 1}, // Step 4 RCCD // 63 = (2*18) + (0*6) + (0*2) + 26 + 1

    {+1, -1,  0, -1, 1, 3}, // Step 4 RCLU // 64 = (2*18) + (0*6) + (1*2) + 26
    {+1, +1,  0,  0, 1, 1}, // Step 4 RCLD // 65 = (2*18) + (0*6) + (1*2) + 26 + 1

    {+3, -1,  0, -1, 3, 3}, // Step 4 RCRU // 66 = (2*18) + (0*6) + (2*2) + 26
    {+3, +1,  0,  0, 3, 1}, // Step 4 RCRD // 67 = (2*18) + (0*6) + (2*2) + 26 + 1

    {+2, -3,  0, -1, 2, 1}, // Step 4 RUCU // 68 = (2*18) + (1*6) + (0*2) + 26
    {+2, -1,  0, -1, 2, 3}, // Step 4 RUCD // 69 = (2*18) + (1*6) + (0*2) + 26 + 1

    {+1, -3,  0, -1, 1, 1}, // Step 4 RULU // 70 = (2*18) + (1*6) + (1*2) + 26
    {+1, -1,  0, -1, 1, 3}, // Step 4 RULD // 71 = (2*18) + (1*6) + (1*2) + 26 + 1

    {+3, -3,  0, -1, 3, 1}, // Step 4 RURU // 72 = (2*18) + (1*6) + (2*2) + 26
    {+3, -1,  0, -1, 3, 3}, // Step 4 RURD // 73 = (2*18) + (1*6) + (2*2) + 26 + 1

    {+2, +1,  0,  0, 2, 1}, // Step 4 RDCU // 74 = (2*18) + (2*6) + (0*2) + 26
    {+2, +3,  0,  0, 2, 3}, // Step 4 RDCD // 75 = (2*18) + (2*6) + (0*2) + 26 + 1

    {+1, +1,  0,  0, 1, 1}, // Step 4 RDLU // 76 = (2*18) + (2*6) + (1*2) + 26
    {+1, +3,  0,  0, 1, 3}, // Step 4 RDLD // 77 = (2*18) + (2*6) + (1*2) + 26 + 1

    {+3, +1,  0,  0, 3, 1}, // Step 4 RDRU // 78 = (2*18) + (2*6) + (2*2) + 26
    {+3, +3,  0,  0, 3, 3}, // Step 4 RDRD // 79 = (2*18) + (2*6) + (2*2) + 26 + 1
};

// Defines for the various subblock constants

#define SB_16x16 40
#define SB_16x8_B 39
#define SB_16x8_T 38
#define SB_8x16_R 37
#define SB_8x16_L 36
#define SB_8x8_TL 32
#define SB_8x4_TL 24
#define SB_4x8_TL 16
#define SB_4x4_TL 0

static const Ipp8s BlockList8x8[5][5] =
    {
        {0, 2, -1, -1, -1}, // 2 8x16 Blocks - 36
        {1, 3, -1, -1, -1},
        {0, 1, -1, -1, -1}, // 2 16x8 Blocks - 38
        {2, 3, -1, -1, -1},
        {0, 1, 2, 3, -1}    // 1 16x16 Block - 40
    };

template <class PixType, class CoeffsType>
bool H264CoreEncoder<PixType,CoeffsType>::IsCouldSkip(H264EncoderThreadPrivateSlice<PixType, CoeffsType> *curr_slice, Ipp32u *puMBSAD, H264MotionVector &cmpMV)
{
    // NO 422 support!!!!!

    /*if (m_pCurrentFrame->m_mbinfo.mbs[curr_slice->m_cur_mb.CurrentMacroblockNeighbours.mb_A].mbtype != MBTYPE_SKIPPED
        && m_pCurrentFrame->m_mbinfo.mbs[curr_slice->m_cur_mb.CurrentMacroblockNeighbours.mb_B].mbtype != MBTYPE_SKIPPED
        && m_pCurrentFrame->m_mbinfo.mbs[curr_slice->m_cur_mb.CurrentMacroblockNeighbours.mb_C].mbtype != MBTYPE_SKIPPED
        && m_pCurrentFrame->m_mbinfo.mbs[curr_slice->m_cur_mb.CurrentMacroblockNeighbours.mb_D].mbtype != MBTYPE_SKIPPED)
        return false;*/

    H264MotionVector skip_vec;
    Skip_MV_Predicted(curr_slice, curr_slice->m_CurMBAddr, &skip_vec);
    if ((abs(cmpMV.mvx - skip_vec.mvx) > 1) || (abs(cmpMV.mvy - skip_vec.mvy) > 1))
        return false;

    H264EncoderFrame<PixType> **pRefPicList0 = GetRefPicList(curr_slice, LIST_0, curr_slice->m_is_cur_mb_field, curr_slice->m_CurMBAddr&1)->m_RefPicList;
    Ipp8s *pFields0 = GetRefPicList(curr_slice, LIST_0, curr_slice->m_is_cur_mb_field, curr_slice->m_CurMBAddr&1)->m_Prediction;
    PixType *const pInterpBuf = curr_slice->m_pMBEncodeBuffer;
    Ipp16s* pDiffBuf    = (Ipp16s*) (curr_slice->m_pMBEncodeBuffer + 512);
    CoeffsType *pTransformResult = (CoeffsType*)(pDiffBuf + 16);
    T_EncodeMBOffsets *pMBOffset = &m_pMBOffsets[curr_slice->m_CurMBAddr];
    Ipp32s pitchPixels = m_pCurrentFrame->pitchPixels()<<curr_slice->m_is_cur_mb_field;
    PixType* pSrcPlane = m_pCurrentFrame->m_pYPlane;
    Ipp32u offset = m_pMBOffsets[curr_slice->m_CurMBAddr].uLumaOffset[m_is_cur_pic_afrm][curr_slice->m_is_cur_mb_field];
    Ipp32s   iNumCoeffs, iLastCoeff, iXType, iYType;

    PixType *pRef = pRefPicList0[0]->m_pYPlane + offset + curr_slice->m_InitialOffset[pFields0[0]];
    pRef += SubpelMVAdjust(&skip_vec, pitchPixels, iXType, iYType);
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -