📄 umc_h264_me.cpp

📁 这是在PCA下的基于IPP库示例代码例子,在网上下了IPP的库之后,设置相关参数就可以编译该代码.
💻 CPP
📖 第 1 页 / 共 5 页
字号:
    {-3, -3  -1, -1, 1, 1}, // Step 4 LULU // 52 = (1*18) + (1*6) + (1*2) + 26    {-3, -1, -1, -1, 1, 3}, // Step 4 LULD // 53 = (1*18) + (1*6) + (1*2) + 26 + 1    {-1, -3, -1, -1, 3, 1}, // Step 4 LURU // 54 = (1*18) + (1*6) + (2*2) + 26    {-1, -1, -1, -1, 3, 3}, // Step 4 LURD // 55 = (1*18) + (1*6) + (2*2) + 26 + 1    {-2, +1, -1,  0, 2, 1}, // Step 4 LDCU // 56 = (1*18) + (2*6) + (0*2) + 26    {-2, +3, -1,  0, 2, 3}, // Step 4 LDCD // 57 = (1*18) + (2*6) + (0*2) + 26 + 1    {-3, +1, -1,  0, 1, 1}, // Step 4 LDLU // 58 = (1*18) + (2*6) + (1*2) + 26    {-3, +3, -1,  0, 1, 3}, // Step 4 LDLD // 59 = (1*18) + (2*6) + (1*2) + 26 + 1    {-1, +1, -1,  0, 3, 1}, // Step 4 LDRU // 60 = (1*18) + (2*6) + (2*2) + 26    {-1, +3, -1,  0, 3, 3}, // Step 4 LDRD // 61 = (1*18) + (2*6) + (2*2) + 26 + 1    {+2, -1,  0, -1, 2, 3}, // Step 4 RCCU // 62 = (2*18) + (0*6) + (0*2) + 26    {+2, +1,  0,  0, 2, 1}, // Step 4 RCCD // 63 = (2*18) + (0*6) + (0*2) + 26 + 1    {+1, -1,  0, -1, 1, 3}, // Step 4 RCLU // 64 = (2*18) + (0*6) + (1*2) + 26    {+1, +1,  0,  0, 1, 1}, // Step 4 RCLD // 65 = (2*18) + (0*6) + (1*2) + 26 + 1    {+3, -1,  0, -1, 3, 3}, // Step 4 RCRU // 66 = (2*18) + (0*6) + (2*2) + 26    {+3, +1,  0,  0, 3, 1}, // Step 4 RCRD // 67 = (2*18) + (0*6) + (2*2) + 26 + 1    {+2, -3,  0, -1, 2, 1}, // Step 4 RUCU // 68 = (2*18) + (1*6) + (0*2) + 26    {+2, -1,  0, -1, 2, 3}, // Step 4 RUCD // 69 = (2*18) + (1*6) + (0*2) + 26 + 1    {+1, -3,  0, -1, 1, 1}, // Step 4 RULU // 70 = (2*18) + (1*6) + (1*2) + 26    {+1, -1,  0, -1, 1, 3}, // Step 4 RULD // 71 = (2*18) + (1*6) + (1*2) + 26 + 1    {+3, -3,  0, -1, 3, 1}, // Step 4 RURU // 72 = (2*18) + (1*6) + (2*2) + 26    {+3, -1,  0, -1, 3, 3}, // Step 4 RURD // 73 = (2*18) + (1*6) + (2*2) + 26 + 1    {+2, +1,  0,  0, 2, 1}, // Step 4 RDCU // 74 = (2*18) + (2*6) + (0*2) + 26    {+2, +3,  0,  0, 2, 3}, // Step 4 RDCD // 75 = (2*18) + (2*6) + (0*2) + 26 + 1    {+1, +1,  0,  0, 1, 1}, // Step 4 RDLU // 76 = (2*18) + (2*6) + (1*2) + 26    {+1, +3,  0,  0, 1, 3}, // Step 4 RDLD // 77 = (2*18) + (2*6) + (1*2) + 26 + 1    {+3, +1,  0,  0, 3, 1}, // Step 4 RDRU // 78 = (2*18) + (2*6) + (2*2) + 26    {+3, +3,  0,  0, 3, 3}, // Step 4 RDRD // 79 = (2*18) + (2*6) + (2*2) + 26 + 1};// Defines for the various subblock constants#define SB_16x16 40#define SB_16x8_B 39#define SB_16x8_T 38#define SB_8x16_R 37#define SB_8x16_L 36#define SB_8x8_TL 32#define SB_8x4_TL 24#define SB_4x8_TL 16#define SB_4x4_TL 0static const Ipp8s BlockList8x8[5][5] =    {        {0, 2, -1, -1, -1}, // 2 8x16 Blocks - 36        {1, 3, -1, -1, -1},        {0, 1, -1, -1, -1}, // 2 16x8 Blocks - 38        {2, 3, -1, -1, -1},        {0, 1, 2, 3, -1}    // 1 16x16 Block - 40    };////////////////////////////////////////////////////////////////////////////////// SubpelMVAdjust//// Local inline function which uses the input motion vector to adjust the// reference pointer to the correct full pel position for interpolation.// It sets iXType and iYType vars to for selection of the specific// interpolation type function to use and returns the full pel offset to// be added to the reference pointer. Optimized to avoid computation when not// required.//////////////////////////////////////////////////////////////////////////////////inline static Ipp32s SubpelMVAdjust(    const T_ECORE_MV *pMV,    Ipp32u uPitch,    Ipp32s& iXType,    Ipp32s& iYType){    Ipp32s iXOffset;    Ipp32s iYOffset;    Ipp32s iPelOffset = 0;    // convert 1/4 pel vector to pel vector and interpolation type    iXType = 0;     // init to no interpolation required    iYType = 0;    if (pMV->iMVx)    {        iXOffset = pMV->iMVx / SubPelFactor;        iXType = pMV->iMVx - iXOffset*SubPelFactor;        if (iXType < 0)        {            iXOffset -= 1;            iXType += SubPelFactor;        }        iPelOffset += iXOffset;    }    if (pMV->iMVy)    {        iYOffset = pMV->iMVy / SubPelFactor;        iYType = pMV->iMVy - iYOffset*SubPelFactor;        if (iYType < 0)        {            iYOffset -= 1;            iYType += SubPelFactor;        }        iPelOffset += iYOffset*uPitch;    }    return iPelOffset;}   // SubpelMVAdjust////////////////////////////////////////////////////////////////////////////////// CMEOneMB//// Main function to drive motion estimation for one macroblock.////////////////////////////////////////////////////////////////////////////////void  H264VideoEncoder::CMEOneMB(    Ipp32u uMB,    Ipp32u *puMBSAD){    Ipp32s uQP  = m_pCurrentFrame->pMBData[uMB].uMBQP;    Ipp32s uMVOffset = m_pCurrentFrame->pMBOffsets[uMB].uFirstBlockIndex;    T_RefIdx *pRefIdxL0 = &m_pCurrentFrame->pRefIdxL0[uMVOffset];    T_RefIdx *pRefIdxL1 = &m_pCurrentFrame->pRefIdxL1[uMVOffset];    T_ECORE_MV *pMVL0 = &m_pCurrentFrame->pMVL0[uMVOffset];    T_ECORE_MV *pMVL1 = &m_pCurrentFrame->pMVL1[uMVOffset];    T_ECORE_MV NullMV = {0, 0};    T_RefIdx bestRefIdxs[41];    T_RefIdx bestRefIdxs_Future[41];    T_ECORE_MV BestMVs[41];    Ipp32u  BestSADs[41];    T_ECORE_MV BestMVs_Future[41];    Ipp32u  BestSADs_Future[41];    T_ECORE_MV PredictedMV[41];    T_ECORE_MV PredictedMV_Future[41];    H264EncoderFrame **pRefPicList0 = m_pCurrentFrame->GetRefPicList(0, LIST_0)->m_RefPicList;    H264EncoderFrame **pRefPicList1 = m_pCurrentFrame->GetRefPicList(0, LIST_1)->m_RefPicList;    const Ipp32s ME_MAX_SAD = INT_MAX >> 5; // Scaled down to allow sums of MAX without overflow.    Ipp32s  iSearchHor, iSearchVer;    for (int block = 0; block < 41; block++)    {        BestSADs[block] = ME_MAX_SAD;        BestSADs_Future[block] = ME_MAX_SAD;        PredictedMV[block] = NullMV;        PredictedMV_Future[block] = NullMV;        BestMVs[block] = NullMV;        BestMVs_Future[block] = NullMV;        bestRefIdxs[block] = 0;        bestRefIdxs_Future[block] = 0;    }    if (bRDQMInitialized == false)    {        Ipp16s *pRDQM;        Ipp16s mvc;        Ipp32u uQP;        Ipp32s bits;        for (uQP = 0; uQP < 52; ++uQP) {            pRDQM = RDQM[uQP];            for (bits = 0; bits < 39; ++bits) {                mvc = (Ipp16s)((rd_quant[uQP] * bits)>>3);                *pRDQM++ = mvc; // + (mvc >> 3);    // TBD: VSI - Tune this...            }   // bits        }   // uQP        bRDQMInitialized = true;    }/*    switch (m_info.me_type) {    case ME_Type_Full:  // aka "Very High" in spec        iSearchHor = m_info.me_seek_radius;        iSearchVer = m_info.me_seek_radius;        break;    case ME_Type_High:        iSearchHor = m_info.me_seek_radius>>2;        iSearchVer = m_info.me_seek_radius>>3;        break;    case ME_Type_Medium:        iSearchHor = 4;        iSearchVer = 2;        break;    case ME_Type_Low:        iSearchHor = iSearchVer = 2;        break;    case ME_Type_None:        // Force the null vector to be returned        iSearchHor = iSearchVer = 0;        break;    default:        break;    }   // switch me_type*/    iSearchHor = m_info.me_search_x;    iSearchVer = m_info.me_search_y;    if (BPREDSLICE == m_info.current_slice_type)    {        Ipp32u SADFuture, SADPrev, SADDirect[4], SADDirectTotal, SADTemp;        T_RefIdx ref_idx_direct[32]; // for LIST_0. Ref idxs for LIST_1 is always 0.        T_ECORE_MV MVDir[32];   // MVs used (0-15 Prev, 16-31 Future)        // Not a real loop        // Permits "break" to bail early without a goto...        do {#ifndef BFRAME_NO_DIRECT_MODE            Ipp32u uCBPDirect;            CDirectBOneMB_worker(uMB, SADDirect,                                 ref_idx_direct,                                 MVDir);            // Sum up the 8x8 Distortions to get the distortion of the whole 16x16            SADDirectTotal = SADDirect[0] + SADDirect[1] + SADDirect[2] + SADDirect[3];            // declare empty if SAD is below threshold#ifndef NO_EMPTY_THRESH            if (SADDirectTotal < EmptyThreshold[m_pCurrentFrame->pMBData[uMB].uMBQP])            {                // mark all blocks as empty, excluding chroma                uCBPDirect = 0xff0000;            }            else#endif                uCBPDirect = 0xffffff;            // Bias the SAD so that Direct B mode is chosen more often            if (SADDirectTotal >= (Ipp32u)(10 + (rd_quant[uQP])))                SADDirectTotal -= 10 + (rd_quant[uQP]);            else                SADDirectTotal = 0;#else            SADDirectTotal = ME_MAX_SAD;#endif            SADFuture = SADPrev = SADTemp = *puMBSAD = ME_MAX_SAD;#ifdef B_EARLY_EXIT            // skip forward and backward motion search if 16x16 Direct Mode is good enough.            if (SADDirectTotal < DirectBSkipMEThres[uQP])            {                m_pCurrentFrame->pMBData[uMB].uMBType = MBTYPE_DIRECT;                m_pCurrentFrame->pMBData[uMB].uCBP4x4 = 0xffffff;                // If multiple reference frames are implemented this will                // need to change, because the actual reference indexs will                // need to be recorded, even in the Intra Case, and it will                // to be signalled separately.                // populate the MV array                for (int row = 0; row < 4; ++row) {                    for (int col = 0; col < 4; ++col) {                        pRefIdxL0[row*uWidthIn4x4Blocks + col] = ref_idx_direct[row*4 + col];                        pRefIdxL1[row*uWidthIn4x4Blocks + col] = ref_idx_direct[16 + row*4 + col];                        pMVL0[row*uWidthIn4x4Blocks + col] = MVDir[row*4 + col];                        pMVL1[row*uWidthIn4x4Blocks + col] = MVDir[row*4 + col + 16];                    }   // for col                }   // for row                SADTemp = SADDirectTotal;   // Set up to break out of loop                break;                      // Bail on the rest of ME            }#endif // B_EARLY_EXIT            // Start Integer Search            // Previous reference            // Initial MB_type            m_pCurrentFrame->pMBData[uMB].uMBType = MBTYPE_FORWARD;            VM_ASSERT(m_NumRefsInL0List && m_NumRefsInL1List);            int k;            for (k = 0; k < m_NumRefsInL0List; k++)            {                CMEOneMB_worker(uMB, k, pRefPicList0[k], iSearchHor, iSearchVer,                    true, bestRefIdxs, BestMVs, BestSADs, PredictedMV);            }            // Future reference            // Initial MB_type            m_pCurrentFrame->pMBData[uMB].uMBType = MBTYPE_BACKWARD;            for (k = 0; k < m_NumRefsInL1List; k++)            {                CMEOneMB_worker(uMB, k, pRefPicList1[k], iSearchHor, iSearchVer,                    true, bestRefIdxs_Future, BestMVs_Future, BestSADs_Future, PredictedMV_Future);            }            // end integer search            // Start Subpel Search            // Split the B slice MB            CMESplitOneMB_B_Slice(uMB, &SADTemp, MVDir, SADDirect, ref_idx_direct, bestRefIdxs,                                  BestMVs, BestSADs, PredictedMV, bestRefIdxs_Future,                                  BestMVs_Future, BestSADs_Future, PredictedMV_Future);            // end subpel search#if defined _DEBUG && defined BFRAME_PRINT_MVS            printf("MB %2d Previous: %3d,%3d %4d   Future: %3d,%3d %4d\n", uMB,
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -