📄 umc_h264_me.cpp
字号:
{-3, -3 -1, -1, 1, 1}, // Step 4 LULU // 52 = (1*18) + (1*6) + (1*2) + 26 {-3, -1, -1, -1, 1, 3}, // Step 4 LULD // 53 = (1*18) + (1*6) + (1*2) + 26 + 1 {-1, -3, -1, -1, 3, 1}, // Step 4 LURU // 54 = (1*18) + (1*6) + (2*2) + 26 {-1, -1, -1, -1, 3, 3}, // Step 4 LURD // 55 = (1*18) + (1*6) + (2*2) + 26 + 1 {-2, +1, -1, 0, 2, 1}, // Step 4 LDCU // 56 = (1*18) + (2*6) + (0*2) + 26 {-2, +3, -1, 0, 2, 3}, // Step 4 LDCD // 57 = (1*18) + (2*6) + (0*2) + 26 + 1 {-3, +1, -1, 0, 1, 1}, // Step 4 LDLU // 58 = (1*18) + (2*6) + (1*2) + 26 {-3, +3, -1, 0, 1, 3}, // Step 4 LDLD // 59 = (1*18) + (2*6) + (1*2) + 26 + 1 {-1, +1, -1, 0, 3, 1}, // Step 4 LDRU // 60 = (1*18) + (2*6) + (2*2) + 26 {-1, +3, -1, 0, 3, 3}, // Step 4 LDRD // 61 = (1*18) + (2*6) + (2*2) + 26 + 1 {+2, -1, 0, -1, 2, 3}, // Step 4 RCCU // 62 = (2*18) + (0*6) + (0*2) + 26 {+2, +1, 0, 0, 2, 1}, // Step 4 RCCD // 63 = (2*18) + (0*6) + (0*2) + 26 + 1 {+1, -1, 0, -1, 1, 3}, // Step 4 RCLU // 64 = (2*18) + (0*6) + (1*2) + 26 {+1, +1, 0, 0, 1, 1}, // Step 4 RCLD // 65 = (2*18) + (0*6) + (1*2) + 26 + 1 {+3, -1, 0, -1, 3, 3}, // Step 4 RCRU // 66 = (2*18) + (0*6) + (2*2) + 26 {+3, +1, 0, 0, 3, 1}, // Step 4 RCRD // 67 = (2*18) + (0*6) + (2*2) + 26 + 1 {+2, -3, 0, -1, 2, 1}, // Step 4 RUCU // 68 = (2*18) + (1*6) + (0*2) + 26 {+2, -1, 0, -1, 2, 3}, // Step 4 RUCD // 69 = (2*18) + (1*6) + (0*2) + 26 + 1 {+1, -3, 0, -1, 1, 1}, // Step 4 RULU // 70 = (2*18) + (1*6) + (1*2) + 26 {+1, -1, 0, -1, 1, 3}, // Step 4 RULD // 71 = (2*18) + (1*6) + (1*2) + 26 + 1 {+3, -3, 0, -1, 3, 1}, // Step 4 RURU // 72 = (2*18) + (1*6) + (2*2) + 26 {+3, -1, 0, -1, 3, 3}, // Step 4 RURD // 73 = (2*18) + (1*6) + (2*2) + 26 + 1 {+2, +1, 0, 0, 2, 1}, // Step 4 RDCU // 74 = (2*18) + (2*6) + (0*2) + 26 {+2, +3, 0, 0, 2, 3}, // Step 4 RDCD // 75 = (2*18) + (2*6) + (0*2) + 26 + 1 {+1, +1, 0, 0, 1, 1}, // Step 4 RDLU // 76 = (2*18) + (2*6) + (1*2) + 26 {+1, +3, 0, 0, 1, 3}, // Step 4 RDLD // 77 = (2*18) + (2*6) + (1*2) + 26 + 1 {+3, +1, 0, 0, 3, 1}, // Step 4 RDRU // 78 = (2*18) + (2*6) + (2*2) + 26 {+3, +3, 0, 0, 3, 3}, // Step 4 RDRD // 79 = (2*18) + (2*6) + (2*2) + 26 + 1};// Defines for the various subblock constants#define SB_16x16 40#define SB_16x8_B 39#define SB_16x8_T 38#define SB_8x16_R 37#define SB_8x16_L 36#define SB_8x8_TL 32#define SB_8x4_TL 24#define SB_4x8_TL 16#define SB_4x4_TL 0static const Ipp8s BlockList8x8[5][5] = { {0, 2, -1, -1, -1}, // 2 8x16 Blocks - 36 {1, 3, -1, -1, -1}, {0, 1, -1, -1, -1}, // 2 16x8 Blocks - 38 {2, 3, -1, -1, -1}, {0, 1, 2, 3, -1} // 1 16x16 Block - 40 };////////////////////////////////////////////////////////////////////////////////// SubpelMVAdjust//// Local inline function which uses the input motion vector to adjust the// reference pointer to the correct full pel position for interpolation.// It sets iXType and iYType vars to for selection of the specific// interpolation type function to use and returns the full pel offset to// be added to the reference pointer. Optimized to avoid computation when not// required.//////////////////////////////////////////////////////////////////////////////////inline static Ipp32s SubpelMVAdjust( const T_ECORE_MV *pMV, Ipp32u uPitch, Ipp32s& iXType, Ipp32s& iYType){ Ipp32s iXOffset; Ipp32s iYOffset; Ipp32s iPelOffset = 0; // convert 1/4 pel vector to pel vector and interpolation type iXType = 0; // init to no interpolation required iYType = 0; if (pMV->iMVx) { iXOffset = pMV->iMVx / SubPelFactor; iXType = pMV->iMVx - iXOffset*SubPelFactor; if (iXType < 0) { iXOffset -= 1; iXType += SubPelFactor; } iPelOffset += iXOffset; } if (pMV->iMVy) { iYOffset = pMV->iMVy / SubPelFactor; iYType = pMV->iMVy - iYOffset*SubPelFactor; if (iYType < 0) { iYOffset -= 1; iYType += SubPelFactor; } iPelOffset += iYOffset*uPitch; } return iPelOffset;} // SubpelMVAdjust////////////////////////////////////////////////////////////////////////////////// CMEOneMB//// Main function to drive motion estimation for one macroblock.////////////////////////////////////////////////////////////////////////////////void H264VideoEncoder::CMEOneMB( Ipp32u uMB, Ipp32u *puMBSAD){ Ipp32s uQP = m_pCurrentFrame->pMBData[uMB].uMBQP; Ipp32s uMVOffset = m_pCurrentFrame->pMBOffsets[uMB].uFirstBlockIndex; T_RefIdx *pRefIdxL0 = &m_pCurrentFrame->pRefIdxL0[uMVOffset]; T_RefIdx *pRefIdxL1 = &m_pCurrentFrame->pRefIdxL1[uMVOffset]; T_ECORE_MV *pMVL0 = &m_pCurrentFrame->pMVL0[uMVOffset]; T_ECORE_MV *pMVL1 = &m_pCurrentFrame->pMVL1[uMVOffset]; T_ECORE_MV NullMV = {0, 0}; T_RefIdx bestRefIdxs[41]; T_RefIdx bestRefIdxs_Future[41]; T_ECORE_MV BestMVs[41]; Ipp32u BestSADs[41]; T_ECORE_MV BestMVs_Future[41]; Ipp32u BestSADs_Future[41]; T_ECORE_MV PredictedMV[41]; T_ECORE_MV PredictedMV_Future[41]; H264EncoderFrame **pRefPicList0 = m_pCurrentFrame->GetRefPicList(0, LIST_0)->m_RefPicList; H264EncoderFrame **pRefPicList1 = m_pCurrentFrame->GetRefPicList(0, LIST_1)->m_RefPicList; const Ipp32s ME_MAX_SAD = INT_MAX >> 5; // Scaled down to allow sums of MAX without overflow. Ipp32s iSearchHor, iSearchVer; for (int block = 0; block < 41; block++) { BestSADs[block] = ME_MAX_SAD; BestSADs_Future[block] = ME_MAX_SAD; PredictedMV[block] = NullMV; PredictedMV_Future[block] = NullMV; BestMVs[block] = NullMV; BestMVs_Future[block] = NullMV; bestRefIdxs[block] = 0; bestRefIdxs_Future[block] = 0; } if (bRDQMInitialized == false) { Ipp16s *pRDQM; Ipp16s mvc; Ipp32u uQP; Ipp32s bits; for (uQP = 0; uQP < 52; ++uQP) { pRDQM = RDQM[uQP]; for (bits = 0; bits < 39; ++bits) { mvc = (Ipp16s)((rd_quant[uQP] * bits)>>3); *pRDQM++ = mvc; // + (mvc >> 3); // TBD: VSI - Tune this... } // bits } // uQP bRDQMInitialized = true; }/* switch (m_info.me_type) { case ME_Type_Full: // aka "Very High" in spec iSearchHor = m_info.me_seek_radius; iSearchVer = m_info.me_seek_radius; break; case ME_Type_High: iSearchHor = m_info.me_seek_radius>>2; iSearchVer = m_info.me_seek_radius>>3; break; case ME_Type_Medium: iSearchHor = 4; iSearchVer = 2; break; case ME_Type_Low: iSearchHor = iSearchVer = 2; break; case ME_Type_None: // Force the null vector to be returned iSearchHor = iSearchVer = 0; break; default: break; } // switch me_type*/ iSearchHor = m_info.me_search_x; iSearchVer = m_info.me_search_y; if (BPREDSLICE == m_info.current_slice_type) { Ipp32u SADFuture, SADPrev, SADDirect[4], SADDirectTotal, SADTemp; T_RefIdx ref_idx_direct[32]; // for LIST_0. Ref idxs for LIST_1 is always 0. T_ECORE_MV MVDir[32]; // MVs used (0-15 Prev, 16-31 Future) // Not a real loop // Permits "break" to bail early without a goto... do {#ifndef BFRAME_NO_DIRECT_MODE Ipp32u uCBPDirect; CDirectBOneMB_worker(uMB, SADDirect, ref_idx_direct, MVDir); // Sum up the 8x8 Distortions to get the distortion of the whole 16x16 SADDirectTotal = SADDirect[0] + SADDirect[1] + SADDirect[2] + SADDirect[3]; // declare empty if SAD is below threshold#ifndef NO_EMPTY_THRESH if (SADDirectTotal < EmptyThreshold[m_pCurrentFrame->pMBData[uMB].uMBQP]) { // mark all blocks as empty, excluding chroma uCBPDirect = 0xff0000; } else#endif uCBPDirect = 0xffffff; // Bias the SAD so that Direct B mode is chosen more often if (SADDirectTotal >= (Ipp32u)(10 + (rd_quant[uQP]))) SADDirectTotal -= 10 + (rd_quant[uQP]); else SADDirectTotal = 0;#else SADDirectTotal = ME_MAX_SAD;#endif SADFuture = SADPrev = SADTemp = *puMBSAD = ME_MAX_SAD;#ifdef B_EARLY_EXIT // skip forward and backward motion search if 16x16 Direct Mode is good enough. if (SADDirectTotal < DirectBSkipMEThres[uQP]) { m_pCurrentFrame->pMBData[uMB].uMBType = MBTYPE_DIRECT; m_pCurrentFrame->pMBData[uMB].uCBP4x4 = 0xffffff; // If multiple reference frames are implemented this will // need to change, because the actual reference indexs will // need to be recorded, even in the Intra Case, and it will // to be signalled separately. // populate the MV array for (int row = 0; row < 4; ++row) { for (int col = 0; col < 4; ++col) { pRefIdxL0[row*uWidthIn4x4Blocks + col] = ref_idx_direct[row*4 + col]; pRefIdxL1[row*uWidthIn4x4Blocks + col] = ref_idx_direct[16 + row*4 + col]; pMVL0[row*uWidthIn4x4Blocks + col] = MVDir[row*4 + col]; pMVL1[row*uWidthIn4x4Blocks + col] = MVDir[row*4 + col + 16]; } // for col } // for row SADTemp = SADDirectTotal; // Set up to break out of loop break; // Bail on the rest of ME }#endif // B_EARLY_EXIT // Start Integer Search // Previous reference // Initial MB_type m_pCurrentFrame->pMBData[uMB].uMBType = MBTYPE_FORWARD; VM_ASSERT(m_NumRefsInL0List && m_NumRefsInL1List); int k; for (k = 0; k < m_NumRefsInL0List; k++) { CMEOneMB_worker(uMB, k, pRefPicList0[k], iSearchHor, iSearchVer, true, bestRefIdxs, BestMVs, BestSADs, PredictedMV); } // Future reference // Initial MB_type m_pCurrentFrame->pMBData[uMB].uMBType = MBTYPE_BACKWARD; for (k = 0; k < m_NumRefsInL1List; k++) { CMEOneMB_worker(uMB, k, pRefPicList1[k], iSearchHor, iSearchVer, true, bestRefIdxs_Future, BestMVs_Future, BestSADs_Future, PredictedMV_Future); } // end integer search // Start Subpel Search // Split the B slice MB CMESplitOneMB_B_Slice(uMB, &SADTemp, MVDir, SADDirect, ref_idx_direct, bestRefIdxs, BestMVs, BestSADs, PredictedMV, bestRefIdxs_Future, BestMVs_Future, BestSADs_Future, PredictedMV_Future); // end subpel search#if defined _DEBUG && defined BFRAME_PRINT_MVS printf("MB %2d Previous: %3d,%3d %4d Future: %3d,%3d %4d\n", uMB,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -