📄 umc_h264_aic.cpp
字号:
ModeSAD[Mode] = BITS_COST(4, glob_RDQM[iQP]) + SAD4x4(pSrcBlock, pitchPixels*sizeof(PixType), &uPred[Mode*64], 16*sizeof(PixType));
} // for mode
}else{
for (i=0; i < nmodes; i++){
Mode = modes[i];
GetPredBlock(Mode, &uPred[Mode*64], PredPel);
ModeSAD[Mode] = BITS_COST(4, glob_RDQM[iQP]) + SATD4x4(pSrcBlock, pitchPixels*sizeof(PixType), &uPred[Mode*64], 16*sizeof(PixType));
} // for mode
}
if( ProbMode != 2 ) ModeSAD[ProbMode] += BITS_COST(1, glob_RDQM[iQP])-BITS_COST(4, glob_RDQM[iQP]);
for(i=0;i<nmodes;i++){
Mode = modes[i];
if (ModeSAD[Mode] <= uMinSAD) {
if( ModeSAD[Mode] != uMinSAD || Mode < *pMode ){ //Prefer mode with less number
uMinSAD = ModeSAD[Mode];
*pMode = Mode;
}
}
}
}
if (pPred) {
// copy selected predictor pels to provided buffer (pitch=16)
Ipp32s i;
PixType *pSrc = (PixType*)&uPred[(*pMode)*64];
PixType *pDst = (PixType*)pPred;
for(i = 0; i < 4; i++) {
pDst[0] = pSrc[0];
pDst[1] = pSrc[1];
pDst[2] = pSrc[2];
pDst[3] = pSrc[3];
pDst += 16;
pSrc += 16;
}
if (curr_slice->m_use_transform_for_intra_decision)
Encode4x4IntraBlock(curr_slice, uBlock);
}
return uMinSAD;
} // AIModeSelectOneBlock
typedef enum {
PRED16x16_VERT = 0,
PRED16x16_HORZ = 1,
PRED16x16_DC = 2,
PRED16x16_PLANAR = 3
} Enum16x16PredType;
// These are numbered by the chroma pred mode bitstream codes used,
// which may not match the documentation "Mode n" used to describe the modes
// in the JVT FCD.
typedef enum {
PRED8x8_DC = 0,
PRED8x8_HORZ = 1,
PRED8x8_VERT = 2,
PRED8x8_PLANAR = 3
} Enum8x8PredType;
typedef enum {
LEFT_AVAILABLE = 1, // Pixels to the left from the MB are available.
TOP_AVAILABLE = 2 // Pixels above the MB are available.
} PixelsAvailabilityType;
inline void MemorySet(Ipp8u *dst, Ipp32s val, Ipp32s length)
{
memset(dst, val, length);
}
inline void MemorySet(Ipp16u *dst, Ipp32s val, Ipp32s length)
{
ippsSet_16s((Ipp16s)val, (Ipp16s*)dst, length);
}
////////////////////////////////////////////////////////////////////////////////
//
// AIModeSelectOneMB_16x16
//
// Choose the best advanced intra mode for coding the MB, return at
// *pMode. Also return the SAD for the chosen mode. Also
// save predictor pels for the MB in provided buffer.
//
////////////////////////////////////////////////////////////////////////////////
template <class PixType, class CoeffsType>
Ipp32u H264CoreEncoder<PixType,CoeffsType>::AIModeSelectOneMB_16x16(
H264EncoderThreadPrivateSlice<PixType, CoeffsType> *curr_slice,
PixType *pSrc, // pointer to upper left pel of source MB
PixType *pRef, // pointer to same MB in reference picture
Ipp32s pitchPixels, // of source and ref data
T_AIMode *pMode, // selected mode goes here
PixType *pPredBuf) // predictor pels for selected mode goes here
{
Ipp32u uSAD[4]; // MB SAD accumulators
PixType* pAbove;
PixType* pLeft;
__ALIGN16 PixType uVertPred[64]; // 4 4x4 predictor blocks
__ALIGN16 PixType uHorizPred[64]; // 4 4x4 predictor blocks
__ALIGN16 PixType uDCPred[16]; // 1 4x4 predictor block
PixType *pPlanarPred = 0;
PixType *pSrcBlock = 0;
Ipp32u i, row;
Ipp32u uBlock;
Ipp32u uSum = 0; // to get DC predictor
Ipp32u uSmallestSAD;
Enum16x16PredType Best16x16Type;
bool topAvailable = curr_slice->m_cur_mb.CurrentBlockNeighbours.mb_above.mb_num >= 0;
bool leftAvailable = curr_slice->m_cur_mb.CurrentBlockNeighbours.mbs_left[0].mb_num >= 0;
bool left_above_aval = curr_slice->m_cur_mb.CurrentBlockNeighbours.mb_above_left.mb_num >= 0;
//f
Ipp32u iQP = getLumaQP51(curr_slice->m_cur_mb.LocalMacroblockInfo->QP, m_PicParamSet.bit_depth_luma);
// Initialize uVertPred with prediction from above
if (!topAvailable)
uSAD[PRED16x16_VERT] = MAX_SAD;
else {
uSAD[PRED16x16_VERT] = BITS_COST(1, glob_RDQM[iQP]);
pAbove = pRef - pitchPixels;
// fill each of 4 predictor blocks from above, using 32-bit copy operations for efficiency
PixType *pDst = uVertPred;
for (i=0; i<4; i++) {
memcpy(pDst, pAbove, 16*sizeof(PixType));
pDst += 16;
}
for(i = 0; i < 16; i++) {
// Accumulate sum for DC predictor
uSum += pAbove[i];
}
}
// Initialize uHorizPred with prediction from left
if (!leftAvailable)
uSAD[PRED16x16_HORZ] = MAX_SAD;
else {
uSAD[PRED16x16_HORZ] = BITS_COST(3, glob_RDQM[iQP]);
pLeft = pRef - 1;
// fill each of 4 predictor blocks from left
for (i=0; i<16; i++, pLeft += pitchPixels) {
uHorizPred[i*4+0] = *pLeft;
uHorizPred[i*4+1] = *pLeft;
uHorizPred[i*4+2] = *pLeft;
uHorizPred[i*4+3] = *pLeft;
uSum += *pLeft; // accumulate for DC predictor
}
}
uSAD[PRED16x16_DC] = BITS_COST(3, glob_RDQM[iQP]);
// Initialize uDCPred with average of above and left divide by 32 to get average
if (!leftAvailable && !topAvailable)
uSum = 1<<(m_PicParamSet.bit_depth_luma - 1);
else {
// For MB on left or top edge not at upper left, accumulated sum is sum of 16, so Ipp64f it for the following divide by 32.
if (!topAvailable || !leftAvailable)
uSum <<= 1;
uSum = (uSum + 16) >> 5;
}
MemorySet(uDCPred, (PixType)uSum, 16);
// Get planar prediction, save 16x16 PixType result at pPredBuf
if (leftAvailable && topAvailable && left_above_aval) {
uSAD[PRED16x16_PLANAR] = BITS_COST(5, glob_RDQM[iQP]);
PlanarPredictLuma(pRef, pitchPixels, pPredBuf);
} else
uSAD[PRED16x16_PLANAR] = MAX_SAD;
// Mode select: Loop through all luma blocks, accumulate a MB SAD for each mode.
if (m_Analyse & ANALYSE_SAD){
if (leftAvailable && topAvailable && left_above_aval){
for (uBlock=0; uBlock<16; uBlock++) {
if ((uBlock & 3) == 0) {
// init pPlanarPred for new row of blocks
pPlanarPred = pPredBuf + uBlock*16;
pSrcBlock = pSrc + uBlock*pitchPixels;
}
uSAD[PRED16x16_DC] += SAD4x4(pSrcBlock, pitchPixels*sizeof(PixType),uDCPred, 4*sizeof(PixType));
uSAD[PRED16x16_VERT] += SAD4x4(pSrcBlock, pitchPixels*sizeof(PixType), &uVertPred[(uBlock & 3)*4], 16*sizeof(PixType));
uSAD[PRED16x16_HORZ] += SAD4x4(pSrcBlock, pitchPixels*sizeof(PixType), &uHorizPred[(uBlock>>2)*16], 4*sizeof(PixType));
uSAD[PRED16x16_PLANAR] += SAD4x4(pSrcBlock, pitchPixels*sizeof(PixType), pPlanarPred, 16*sizeof(PixType));
pSrcBlock += 4;
pPlanarPred += 4;
}
}else{
for (uBlock=0; uBlock<16; uBlock++) {
if ((uBlock & 3) == 0) pSrcBlock = pSrc + uBlock*pitchPixels;
uSAD[PRED16x16_DC] += SAD4x4(pSrcBlock, pitchPixels*sizeof(PixType),uDCPred, 4*sizeof(PixType));
if (topAvailable)
uSAD[PRED16x16_VERT] += SAD4x4(pSrcBlock, pitchPixels*sizeof(PixType), &uVertPred[(uBlock & 3)*4], 16*sizeof(PixType));
if (leftAvailable)
uSAD[PRED16x16_HORZ] += SAD4x4(pSrcBlock, pitchPixels*sizeof(PixType), &uHorizPred[(uBlock>>2)*16], 4*sizeof(PixType));
pSrcBlock += 4;
}
}
}else{
if (leftAvailable && topAvailable && left_above_aval){
for (uBlock=0; uBlock<16; uBlock++) {
if ((uBlock & 3) == 0) {
// init pPlanarPred for new row of blocks
pPlanarPred = pPredBuf + uBlock*16;
pSrcBlock = pSrc + uBlock*pitchPixels;
}
uSAD[PRED16x16_DC] += SATD4x4(pSrcBlock, pitchPixels*sizeof(PixType),uDCPred, 4*sizeof(PixType));
uSAD[PRED16x16_VERT] += SATD4x4(pSrcBlock, pitchPixels*sizeof(PixType), &uVertPred[(uBlock & 3)*4], 16*sizeof(PixType));
uSAD[PRED16x16_HORZ] += SATD4x4(pSrcBlock, pitchPixels*sizeof(PixType), &uHorizPred[(uBlock>>2)*16], 4*sizeof(PixType));
uSAD[PRED16x16_PLANAR] += SATD4x4(pSrcBlock, pitchPixels*sizeof(PixType), pPlanarPred, 16*sizeof(PixType));
pSrcBlock += 4;
pPlanarPred += 4;
}
}else{
for (uBlock=0; uBlock<16; uBlock++) {
if ((uBlock & 3) == 0) pSrcBlock = pSrc + uBlock*pitchPixels;
uSAD[PRED16x16_DC] += SATD4x4(pSrcBlock, pitchPixels*sizeof(PixType),uDCPred, 4*sizeof(PixType));
if (topAvailable)
uSAD[PRED16x16_VERT] += SATD4x4(pSrcBlock, pitchPixels*sizeof(PixType), &uVertPred[(uBlock & 3)*4], 16*sizeof(PixType));
if (leftAvailable)
uSAD[PRED16x16_HORZ] += SATD4x4(pSrcBlock, pitchPixels*sizeof(PixType), &uHorizPred[(uBlock>>2)*16], 4*sizeof(PixType));
pSrcBlock += 4;
}
}
}
// choose smallest
uSmallestSAD = uSAD[PRED16x16_DC];
Best16x16Type = PRED16x16_DC;
if (uSAD[PRED16x16_VERT] < uSmallestSAD) {
uSmallestSAD = uSAD[PRED16x16_VERT];
Best16x16Type = PRED16x16_VERT;
}
if (uSAD[PRED16x16_HORZ] < uSmallestSAD) {
uSmallestSAD = uSAD[PRED16x16_HORZ];
Best16x16Type = PRED16x16_HORZ;
}
if (uSAD[PRED16x16_PLANAR] < uSmallestSAD) {
uSmallestSAD = uSAD[PRED16x16_PLANAR];
Best16x16Type = PRED16x16_PLANAR;
}
// Set MB type for smallest, fill PredBuf with predictors
switch (Best16x16Type) {
case PRED16x16_VERT:
// copy from uVertPred to PredBuf, duplicating for each row of the MB
for (row=0; row<16; row++)
memcpy(pPredBuf + row*16, uVertPred, 16*sizeof(PixType));
break;
case PRED16x16_HORZ:
// copy from uHorizPred to PredBuf
for (row=0; row<16; row++)
MemorySet(pPredBuf+row*16, uHorizPred[row*4], 16);
break;
case PRED16x16_DC:
// set all prediction pels to the single predictor
MemorySet(pPredBuf, uDCPred[0], 256);
break;
case PRED16x16_PLANAR:
// nothing to do, planar prediction already filled PredBuf
break;
default:
VM_ASSERT(0); // Can't find a suitable intra prediction mode!!!
break;
}
*pMode = (T_AIMode)Best16x16Type;
return uSmallestSAD;
} // C_AIModeSelectOneMB_16x16
////////////////////////////////////////////////////////////////////////////////
//
// C_AIModeSelectChromaMBs_8x8
//
// Choose the best intra mode for coding the U & V 8x8 MBs, return at
// *pMode. Also return the combined SAD for the chosen mode. Also
// save predictor pels for the MBs in provided buffer.
//
////////////////////////////////////////////////////////////////////////////////
template <class PixType, class CoeffsType>
Ipp32u H264CoreEncoder<PixType,CoeffsType>::AIModeSelectChromaMBs_8x8(
H264EncoderThreadPrivateSlice<PixType, CoeffsType> *curr_slice,
PixType* pUSrc, // pointer to upper left pel of U source MB
PixType* pURef, // pointer to same MB in U reference picture
PixType* pVSrc, // pointer to upper left pel of V source MB
PixType* pVRef, // pointer to same MB in V reference picture
Ipp32u uPitch, // of source and ref data
Ipp8u* pMode, // selected mode goes here
PixType *pUPredBuf, // U predictor pels for selected mode go here
PixType *pVPredBuf) // V predictor pels for selected mode go here
{
Ipp32u uSAD[4]; // MB SAD accumulators
__ALIGN16 PixType uVertPred[2][64]; // predictors from above, 4 4x4 Predictor blocks, U & V //first up lines for each block, then line+1 for each block and so on
__ALIGN16 PixType uHorizPred[2][64]; // predictors from left, 4 4x4 Predictor blocks, U & V
__ALIGN16 PixType uDCPred[2][256]; // DC prediction, 1 8x8 predictor block, U & V
Ipp32u uSum[2][16] = {0}; // for DC, U & V - 16 predictors (a - d) each
PixType *pAbove, *pLeft, *pPred = 0, *pSrcBlock = 0;
Ipp32u *pCopySrc, *pCopyDst;
Enum8x8PredType Best8x8Type;
Ipp32u i, j, uBlock, plane, uSmallestSAD, num_rows, num_cols, wide, blocks,idx;
bool topAvailable = curr_slice->m_cur_mb.CurrentBlockNeighbours.mb_above.mb_num >= 0;
bool leftAvailable = curr_slice->m_cur_mb.CurrentBlockNeighbours.mbs_left[0].mb_num >= 0;
bool left_above_aval = curr_slice->m_cur_mb.CurrentBlockNeighbours.mb_above_left.mb_num >= 0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -