📄 umc_h264_aic.cpp

📁 audio-video-codecs.rar语音编解码器
💻 CPP
📖 第 1 页 / 共 5 页
字号:
                ModeSAD[Mode] = BITS_COST(4, glob_RDQM[iQP]) + SAD4x4(pSrcBlock, pitchPixels*sizeof(PixType), &uPred[Mode*64], 16*sizeof(PixType));
            }       // for mode
        }else{
            for (i=0; i < nmodes; i++){
                Mode = modes[i];
                GetPredBlock(Mode, &uPred[Mode*64], PredPel);
                ModeSAD[Mode] = BITS_COST(4, glob_RDQM[iQP]) + SATD4x4(pSrcBlock, pitchPixels*sizeof(PixType), &uPred[Mode*64], 16*sizeof(PixType));
            }   // for mode
        }
        if( ProbMode != 2 ) ModeSAD[ProbMode] += BITS_COST(1, glob_RDQM[iQP])-BITS_COST(4, glob_RDQM[iQP]);
        for(i=0;i<nmodes;i++){
             Mode = modes[i];
            if (ModeSAD[Mode] <= uMinSAD) {
                 if( ModeSAD[Mode] != uMinSAD || Mode < *pMode ){ //Prefer mode with less number
                    uMinSAD = ModeSAD[Mode];
                    *pMode = Mode;
                 }
            }
        }
    }


    if (pPred) {
        // copy selected predictor pels to provided buffer (pitch=16)
        Ipp32s i;
        PixType *pSrc = (PixType*)&uPred[(*pMode)*64];
        PixType *pDst = (PixType*)pPred;
        for(i = 0; i < 4; i++) {
            pDst[0] = pSrc[0];
            pDst[1] = pSrc[1];
            pDst[2] = pSrc[2];
            pDst[3] = pSrc[3];
            pDst += 16;
            pSrc += 16;
        }
        if (curr_slice->m_use_transform_for_intra_decision)
            Encode4x4IntraBlock(curr_slice, uBlock);
    }

    return uMinSAD;
}   // AIModeSelectOneBlock

typedef enum {
    PRED16x16_VERT   = 0,
    PRED16x16_HORZ   = 1,
    PRED16x16_DC     = 2,
    PRED16x16_PLANAR = 3
} Enum16x16PredType;

// These are numbered by the chroma pred mode bitstream codes used,
// which may not match the documentation "Mode n" used to describe the modes
// in the JVT FCD.
typedef enum {
    PRED8x8_DC     = 0,
    PRED8x8_HORZ   = 1,
    PRED8x8_VERT   = 2,
    PRED8x8_PLANAR = 3
} Enum8x8PredType;

typedef enum {
    LEFT_AVAILABLE = 1, // Pixels to the left from the MB are available.
    TOP_AVAILABLE  = 2  // Pixels above the MB are available.
} PixelsAvailabilityType;

inline void MemorySet(Ipp8u *dst, Ipp32s val, Ipp32s length)
{
    memset(dst, val, length);
}
inline void MemorySet(Ipp16u *dst, Ipp32s val, Ipp32s length)
{
    ippsSet_16s((Ipp16s)val, (Ipp16s*)dst, length);
}

////////////////////////////////////////////////////////////////////////////////
//
// AIModeSelectOneMB_16x16
//
// Choose the best advanced intra mode for coding the MB, return at
// *pMode. Also return the SAD for the chosen mode. Also
// save predictor pels for the MB in provided buffer.
//
////////////////////////////////////////////////////////////////////////////////

template <class PixType, class CoeffsType>
Ipp32u H264CoreEncoder<PixType,CoeffsType>::AIModeSelectOneMB_16x16(
            H264EncoderThreadPrivateSlice<PixType, CoeffsType> *curr_slice,
            PixType  *pSrc,            // pointer to upper left pel of source MB
            PixType  *pRef,            // pointer to same MB in reference picture
            Ipp32s    pitchPixels,     // of source and ref data
            T_AIMode *pMode,           // selected mode goes here
            PixType  *pPredBuf)        // predictor pels for selected mode goes here
{
    Ipp32u uSAD[4]; // MB SAD accumulators
    PixType* pAbove;
    PixType* pLeft;
    __ALIGN16 PixType uVertPred[64];    // 4 4x4 predictor blocks
    __ALIGN16 PixType uHorizPred[64];   // 4 4x4 predictor blocks
    __ALIGN16 PixType uDCPred[16];      // 1 4x4 predictor block
    PixType *pPlanarPred = 0;
    PixType *pSrcBlock = 0;
    Ipp32u i, row;
    Ipp32u uBlock;
    Ipp32u uSum = 0;            // to get DC predictor
    Ipp32u uSmallestSAD;
    Enum16x16PredType Best16x16Type;

    bool topAvailable = curr_slice->m_cur_mb.CurrentBlockNeighbours.mb_above.mb_num >= 0;
    bool leftAvailable = curr_slice->m_cur_mb.CurrentBlockNeighbours.mbs_left[0].mb_num >= 0;
    bool left_above_aval = curr_slice->m_cur_mb.CurrentBlockNeighbours.mb_above_left.mb_num >= 0;

//f
    Ipp32u iQP = getLumaQP51(curr_slice->m_cur_mb.LocalMacroblockInfo->QP, m_PicParamSet.bit_depth_luma);

    // Initialize uVertPred with prediction from above
    if (!topAvailable)
        uSAD[PRED16x16_VERT] = MAX_SAD;
    else {
        uSAD[PRED16x16_VERT] =  BITS_COST(1, glob_RDQM[iQP]);
        pAbove = pRef - pitchPixels;
        // fill each of 4 predictor blocks from above, using 32-bit copy operations for efficiency
        PixType *pDst = uVertPred;
        for (i=0; i<4; i++) {
            memcpy(pDst, pAbove, 16*sizeof(PixType));
            pDst += 16;
        }
        for(i = 0; i < 16; i++) {
            // Accumulate sum for DC predictor
            uSum += pAbove[i];
        }
    }

    // Initialize uHorizPred with prediction from left
    if (!leftAvailable)
        uSAD[PRED16x16_HORZ] = MAX_SAD;
    else {
        uSAD[PRED16x16_HORZ] =  BITS_COST(3, glob_RDQM[iQP]);
        pLeft = pRef - 1;
        // fill each of 4 predictor blocks from left
        for (i=0; i<16; i++, pLeft += pitchPixels) {
            uHorizPred[i*4+0] = *pLeft;
            uHorizPred[i*4+1] = *pLeft;
            uHorizPred[i*4+2] = *pLeft;
            uHorizPred[i*4+3] = *pLeft;
            uSum += *pLeft;     // accumulate for DC predictor
        }
    }

    uSAD[PRED16x16_DC] =  BITS_COST(3, glob_RDQM[iQP]);
    // Initialize uDCPred with average of above and left divide by 32 to get average
    if (!leftAvailable && !topAvailable)
        uSum = 1<<(m_PicParamSet.bit_depth_luma - 1);
    else {
        // For MB on left or top edge not at upper left, accumulated sum is sum of 16, so Ipp64f it for the following divide by 32.
        if (!topAvailable || !leftAvailable)
            uSum <<= 1;
        uSum = (uSum + 16) >> 5;
    }
    MemorySet(uDCPred, (PixType)uSum, 16);

    // Get planar prediction, save 16x16 PixType result at pPredBuf
    if (leftAvailable && topAvailable && left_above_aval) {
        uSAD[PRED16x16_PLANAR] =  BITS_COST(5, glob_RDQM[iQP]);
        PlanarPredictLuma(pRef, pitchPixels, pPredBuf);
    } else
        uSAD[PRED16x16_PLANAR] = MAX_SAD;

    // Mode select: Loop through all luma blocks, accumulate a MB SAD for each mode.
    if (m_Analyse & ANALYSE_SAD){
        if (leftAvailable && topAvailable && left_above_aval){
            for (uBlock=0; uBlock<16; uBlock++) {
                if ((uBlock & 3) == 0) {
                    // init pPlanarPred for new row of blocks
                    pPlanarPred = pPredBuf + uBlock*16;
                    pSrcBlock = pSrc + uBlock*pitchPixels;
                }
                uSAD[PRED16x16_DC] += SAD4x4(pSrcBlock, pitchPixels*sizeof(PixType),uDCPred, 4*sizeof(PixType));
                uSAD[PRED16x16_VERT] += SAD4x4(pSrcBlock, pitchPixels*sizeof(PixType), &uVertPred[(uBlock & 3)*4], 16*sizeof(PixType));
                uSAD[PRED16x16_HORZ] += SAD4x4(pSrcBlock, pitchPixels*sizeof(PixType), &uHorizPred[(uBlock>>2)*16], 4*sizeof(PixType));
                uSAD[PRED16x16_PLANAR] += SAD4x4(pSrcBlock, pitchPixels*sizeof(PixType), pPlanarPred, 16*sizeof(PixType));
                pSrcBlock += 4;
                pPlanarPred += 4;
            }
        }else{
            for (uBlock=0; uBlock<16; uBlock++) {
                if ((uBlock & 3) == 0) pSrcBlock = pSrc + uBlock*pitchPixels;
                uSAD[PRED16x16_DC] += SAD4x4(pSrcBlock, pitchPixels*sizeof(PixType),uDCPred, 4*sizeof(PixType));
                if (topAvailable)
                    uSAD[PRED16x16_VERT] += SAD4x4(pSrcBlock, pitchPixels*sizeof(PixType), &uVertPred[(uBlock & 3)*4], 16*sizeof(PixType));
                if (leftAvailable)
                    uSAD[PRED16x16_HORZ] += SAD4x4(pSrcBlock, pitchPixels*sizeof(PixType), &uHorizPred[(uBlock>>2)*16], 4*sizeof(PixType));
                pSrcBlock += 4;
            }
        }
    }else{
        if (leftAvailable && topAvailable && left_above_aval){
            for (uBlock=0; uBlock<16; uBlock++) {
                if ((uBlock & 3) == 0) {
                    // init pPlanarPred for new row of blocks
                    pPlanarPred = pPredBuf + uBlock*16;
                    pSrcBlock = pSrc + uBlock*pitchPixels;
                }
                uSAD[PRED16x16_DC] += SATD4x4(pSrcBlock, pitchPixels*sizeof(PixType),uDCPred, 4*sizeof(PixType));
                uSAD[PRED16x16_VERT] += SATD4x4(pSrcBlock, pitchPixels*sizeof(PixType), &uVertPred[(uBlock & 3)*4], 16*sizeof(PixType));
                uSAD[PRED16x16_HORZ] += SATD4x4(pSrcBlock, pitchPixels*sizeof(PixType), &uHorizPred[(uBlock>>2)*16], 4*sizeof(PixType));
                uSAD[PRED16x16_PLANAR] += SATD4x4(pSrcBlock, pitchPixels*sizeof(PixType), pPlanarPred, 16*sizeof(PixType));
                pSrcBlock += 4;
                pPlanarPred += 4;
            }
        }else{
            for (uBlock=0; uBlock<16; uBlock++) {
                if ((uBlock & 3) == 0) pSrcBlock = pSrc + uBlock*pitchPixels;
                uSAD[PRED16x16_DC] += SATD4x4(pSrcBlock, pitchPixels*sizeof(PixType),uDCPred, 4*sizeof(PixType));
                if (topAvailable)
                    uSAD[PRED16x16_VERT] += SATD4x4(pSrcBlock, pitchPixels*sizeof(PixType), &uVertPred[(uBlock & 3)*4], 16*sizeof(PixType));
                if (leftAvailable)
                    uSAD[PRED16x16_HORZ] += SATD4x4(pSrcBlock, pitchPixels*sizeof(PixType), &uHorizPred[(uBlock>>2)*16], 4*sizeof(PixType));
                pSrcBlock += 4;
            }
        }
    }
    // choose smallest
    uSmallestSAD = uSAD[PRED16x16_DC];
    Best16x16Type = PRED16x16_DC;
    if (uSAD[PRED16x16_VERT] < uSmallestSAD) {
        uSmallestSAD = uSAD[PRED16x16_VERT];
        Best16x16Type = PRED16x16_VERT;
    }
    if (uSAD[PRED16x16_HORZ] < uSmallestSAD) {
        uSmallestSAD = uSAD[PRED16x16_HORZ];
        Best16x16Type = PRED16x16_HORZ;
    }
    if (uSAD[PRED16x16_PLANAR] < uSmallestSAD) {
        uSmallestSAD = uSAD[PRED16x16_PLANAR];
        Best16x16Type = PRED16x16_PLANAR;
    }

    // Set MB type for smallest, fill PredBuf with predictors
    switch (Best16x16Type) {
    case PRED16x16_VERT:
        // copy from uVertPred to PredBuf, duplicating for each row of the MB
        for (row=0; row<16; row++)
            memcpy(pPredBuf + row*16, uVertPred, 16*sizeof(PixType));
        break;
    case PRED16x16_HORZ:
        // copy from uHorizPred to PredBuf
        for (row=0; row<16; row++)
            MemorySet(pPredBuf+row*16, uHorizPred[row*4], 16);
        break;
    case PRED16x16_DC:
        // set all prediction pels to the single predictor
        MemorySet(pPredBuf, uDCPred[0], 256);
        break;
    case PRED16x16_PLANAR:
        //  nothing to do, planar prediction already filled PredBuf
        break;
    default:
        VM_ASSERT(0); // Can't find a suitable intra prediction mode!!!
        break;
    }
    *pMode = (T_AIMode)Best16x16Type;

    return uSmallestSAD;
}   // C_AIModeSelectOneMB_16x16

////////////////////////////////////////////////////////////////////////////////
//
// C_AIModeSelectChromaMBs_8x8
//
// Choose the best intra mode for coding the U & V 8x8 MBs, return at
// *pMode. Also return the combined SAD for the chosen mode.  Also
// save predictor pels for the MBs in provided buffer.
//
////////////////////////////////////////////////////////////////////////////////

template <class PixType, class CoeffsType>
Ipp32u  H264CoreEncoder<PixType,CoeffsType>::AIModeSelectChromaMBs_8x8(
            H264EncoderThreadPrivateSlice<PixType, CoeffsType> *curr_slice,
            PixType* pUSrc,             // pointer to upper left pel of U source MB
            PixType* pURef,             // pointer to same MB in U reference picture
            PixType* pVSrc,             // pointer to upper left pel of V source MB
            PixType* pVRef,             // pointer to same MB in V reference picture
            Ipp32u   uPitch,            // of source and ref data
            Ipp8u*   pMode,             // selected mode goes here
            PixType *pUPredBuf,         // U predictor pels for selected mode go here
            PixType *pVPredBuf)         // V predictor pels for selected mode go here
{
    Ipp32u uSAD[4]; // MB SAD accumulators
    __ALIGN16 PixType uVertPred[2][64];   // predictors from above, 4 4x4 Predictor blocks, U & V //first up lines for each block, then line+1 for each block and so on
    __ALIGN16 PixType uHorizPred[2][64];  // predictors from left, 4 4x4 Predictor blocks, U & V
    __ALIGN16 PixType uDCPred[2][256];    // DC prediction, 1 8x8 predictor block, U & V
    Ipp32u uSum[2][16] =  {0};     // for DC, U & V - 16 predictors (a - d) each
    PixType *pAbove, *pLeft, *pPred = 0, *pSrcBlock = 0;
    Ipp32u *pCopySrc, *pCopyDst;
    Enum8x8PredType Best8x8Type;
    Ipp32u i, j, uBlock, plane, uSmallestSAD, num_rows, num_cols, wide, blocks,idx;
    bool topAvailable = curr_slice->m_cur_mb.CurrentBlockNeighbours.mb_above.mb_num >= 0;
    bool leftAvailable = curr_slice->m_cur_mb.CurrentBlockNeighbours.mbs_left[0].mb_num >= 0;
    bool left_above_aval = curr_slice->m_cur_mb.CurrentBlockNeighbours.mb_above_left.mb_num >= 0;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -