📄 umc_h264_aic.cpp
字号:
//f
Ipp32u iQP = getLumaQP(curr_slice->m_cur_mb.LocalMacroblockInfo->QP, m_PicParamSet.bit_depth_luma);
iQP = getChromaQP(iQP, m_PicParamSet.chroma_qp_index_offset, m_SeqParamSet.bit_depth_chroma);
wide = ((m_PicParamSet.chroma_format_idc-1) & 0x2)>>1;
num_cols = (wide+1)<<3;
num_rows = (((m_PicParamSet.chroma_format_idc&0x2)>>1)+1)<<3;
blocks = 2<<m_PicParamSet.chroma_format_idc;
// Initialize uVertPred with prediction from above
if (!topAvailable)
uSAD[PRED8x8_VERT] = MAX_SAD;
else {
uSAD[PRED8x8_VERT] = BITS_COST(3, glob_RDQM[iQP]);
for (plane = 0; plane < 2; plane++) {
pAbove = plane ? pVRef - uPitch : pURef - uPitch;
// Get predictors from above and copy into 4x4 blocks for SAD calculations
for (i=0; i<num_cols; i++) {
uVertPred[plane][((i>>2)<<4)+(i&0x3)] = pAbove[i]; //fill the up row
uSum[plane][i>>2] += pAbove[i]; // accumulate to A & B for DC predictor
}
//Copy down
if( m_PicParamSet.chroma_format_idc == 1){
for( i=0; i<2; i++ )
uSum[plane][i+4] = uSum[plane][i];
}else{
for( i=0; i<(num_cols>>2); i++ )
uSum[plane][i+4] = uSum[plane][i+8] = uSum[plane][i+12] = uSum[plane][i];
}
for (i=0; i < (num_cols>>2); ++i) {
memcpy(&uVertPred[plane][i*16+4],&uVertPred[plane][i*16],4*sizeof(PixType));
memcpy(&uVertPred[plane][i*16+8],&uVertPred[plane][i*16],4*sizeof(PixType));
memcpy(&uVertPred[plane][i*16+12],&uVertPred[plane][i*16],4*sizeof(PixType));
}
}
}
// Initialize uHorizPred with prediction from left
if (!leftAvailable)
uSAD[PRED8x8_HORZ] = MAX_SAD;
else {
uSAD[PRED8x8_HORZ] = BITS_COST(3, glob_RDQM[iQP]);
for (plane = 0; plane < 2; plane++) {
Ipp32u tmpSum = uSum[plane][1];
uSum[plane][4] = 0; // Reset Block C to zero in this case.
switch( m_PicParamSet.chroma_format_idc ){
case 2:
uSum[plane][8] = 0;
uSum[plane][12] = 0;
break;
case 3:
//? uSum[plane][8] = 0;
//? uSum[plane][10] = 0;
//? tmpSum1 = uSum[plane][4];
//? tmpSum2 = uSum[plane][5];
break;
}
pLeft = plane ? pVRef - 1 : pURef - 1;
// Get predictors from the left and copy into 4x4 blocks for SAD calculations
for (i=0; i<num_rows; i++)
{
idx = i & ~0x03; //Reset low 2 bits
uSum[plane][idx] += *pLeft; // accumulate to A or C for DC predictor
uSum[plane][idx+1] += *pLeft; // accumulate to B or D for DC predictor
MemorySet(&uHorizPred[plane][i<<2], *pLeft, 4);
if( m_PicParamSet.chroma_format_idc == 3 ){
uSum[plane][idx+2] += *pLeft;
uSum[plane][idx+3] += *pLeft;
}
pLeft += uPitch;
}
if (topAvailable){ // Conditionally restore the previous sum
uSum[plane][1] = tmpSum; // unless this is on the top edge
if( m_PicParamSet.chroma_format_idc == 3 ){
// uSum[plane][4]=tmpSum1;
// uSum[plane][5]=tmpSum2;
}
}
}
}
// DC prediction, store predictor in all 64 pels of the predictor block for each plane.
uSAD[PRED8x8_DC] = BITS_COST(1, glob_RDQM[iQP]);
if (!topAvailable && !leftAvailable) {
Ipp32s size = 32<<m_PicParamSet.chroma_format_idc;
for( plane = 0; plane < 2; ++plane )
MemorySet(&uDCPred[plane][0], 1<<(m_info.bit_depth_chroma - 1), size); // Fill the block, both Planes
} else {
for (plane = 0; plane < 2; plane++) {
// Divide & round A & D properly, depending on how many terms are in the sum.
if (topAvailable && leftAvailable) {
// 8 Pixels
uSum[plane][0] = (uSum[plane][0] + 4) >> 3;
uSum[plane][5] = (uSum[plane][5] + 4) >> 3;
} else {
// 4 pixels
uSum[plane][0] = (uSum[plane][0] + 2) >> 2;
uSum[plane][5] = (uSum[plane][5] + 2) >> 2;
}
// Always 4 pixels
uSum[plane][1] = (uSum[plane][1] + 2) >> 2;
uSum[plane][4] = (uSum[plane][4] + 2) >> 2;
switch( m_PicParamSet.chroma_format_idc ){
case 2:
uSum[plane][8] = (uSum[plane][8] + 2) >> 2;
uSum[plane][12] = (uSum[plane][12] + 2) >> 2;
if (topAvailable && leftAvailable) {
uSum[plane][9] = (uSum[plane][9] + 4) >> 3;
uSum[plane][13] = (uSum[plane][13] + 4) >> 3;
}else{
uSum[plane][9] = (uSum[plane][9] + 2) >> 2;
uSum[plane][13] = (uSum[plane][13] + 2) >> 2;
}
break;
case 3:
uSum[plane][4] = (uSum[plane][4] + 2) >> 2;
uSum[plane][5] = (uSum[plane][5] + 2) >> 2;
uSum[plane][8] = (uSum[plane][8] + 2) >> 2;
uSum[plane][10] = (uSum[plane][10] + 2) >> 2;
if (topAvailable && leftAvailable) {
uSum[plane][6] = (uSum[plane][6] + 4) >> 3;
uSum[plane][7] = (uSum[plane][7] + 4) >> 3;
uSum[plane][9] = (uSum[plane][9] + 4) >> 3;
uSum[plane][11] = (uSum[plane][11] + 4) >> 3;
uSum[plane][12] = (uSum[plane][12] + 4) >> 3;
uSum[plane][13] = (uSum[plane][13] + 4) >> 3;
uSum[plane][14] = (uSum[plane][14] + 4) >> 3;
uSum[plane][15] = (uSum[plane][15] + 4) >> 3;
}else{
uSum[plane][6] = (uSum[plane][6] + 2) >> 2;
uSum[plane][9] = (uSum[plane][9] + 2) >> 2;
uSum[plane][7] = (uSum[plane][7] + 2) >> 2;
uSum[plane][11] = (uSum[plane][11] + 2) >> 2;
uSum[plane][12] = (uSum[plane][12] + 2) >> 2;
uSum[plane][13] = (uSum[plane][13] + 2) >> 2;
uSum[plane][14] = (uSum[plane][14] + 2) >> 2;
uSum[plane][15] = (uSum[plane][15] + 2) >> 2;
}
break;
}
// Fill the correct pixel values into the uDCPred buffer
if( m_PicParamSet.chroma_format_idc == 3 ){
for( j = 0; j<blocks; j++ )
MemorySet(&uDCPred[plane][j*16], (PixType)uSum[plane][j], 16);
}else{
for( j = 0; j<blocks; j++ )
MemorySet(&uDCPred[plane][j*16], (PixType)uSum[plane][((j>>1)<<2) + (j&0x01)], 16);
}
}
}
// Get planar prediction, save 8x8 PixType result at pPredBuf,
if (topAvailable && leftAvailable && left_above_aval) {
PlanarPredictChroma(pURef, uPitch, pUPredBuf);
PlanarPredictChroma(pVRef, uPitch, pVPredBuf);
uSAD[PRED8x8_PLANAR] = BITS_COST(5, glob_RDQM[iQP]);
} else
uSAD[PRED8x8_PLANAR] = MAX_SAD;
// Mode select: Loop through all chroma blocks, accumulate a MB SAD for each mode.
if (m_Analyse & ANALYSE_SAD){
if (topAvailable && leftAvailable && left_above_aval){
for (plane=0; plane<2; plane++) {
for (uBlock=0; uBlock<blocks; uBlock++) {
if ((uBlock & 1) == 0) {
// init pPlanarPred for new row of blocks
pPred = plane ? pVPredBuf + uBlock*32 : pUPredBuf + uBlock*32;
pSrcBlock = plane ? pVSrc + (uBlock<<1)*uPitch : pUSrc + (uBlock<<1)*uPitch;
}
uSAD[PRED8x8_DC] += SAD4x4(pSrcBlock, uPitch*sizeof(PixType), &uDCPred[plane][uBlock<<4], 4*sizeof(PixType));
uSAD[PRED8x8_VERT] += SAD4x4(pSrcBlock, uPitch*sizeof(PixType), &uVertPred[plane][(uBlock&1)<<4], 4*sizeof(PixType));
uSAD[PRED8x8_HORZ] += SAD4x4(pSrcBlock, uPitch*sizeof(PixType), &uHorizPred[plane][(uBlock>>1)<<4], 4*sizeof(PixType));
uSAD[PRED8x8_PLANAR] += SAD4x4(pSrcBlock, uPitch*sizeof(PixType), pPred, 16*sizeof(PixType));
// next block
pSrcBlock += 4;
pPred += 4;
}
}
}else{
for (plane=0; plane<2; plane++) {
for (uBlock=0; uBlock<blocks; uBlock++) {
if ((uBlock & 1) == 0) {
// init pPlanarPred for new row of blocks
pPred = plane ? pVPredBuf + uBlock*32 : pUPredBuf + uBlock*32;
pSrcBlock = plane ? pVSrc + (uBlock<<1)*uPitch : pUSrc + (uBlock<<1)*uPitch;
}
uSAD[PRED8x8_DC] += SAD4x4(pSrcBlock, uPitch*sizeof(PixType), &uDCPred[plane][uBlock<<4], 4*sizeof(PixType));
if (topAvailable)
uSAD[PRED8x8_VERT] += SAD4x4(pSrcBlock, uPitch*sizeof(PixType), &uVertPred[plane][(uBlock&1)<<4], 4*sizeof(PixType));
if (leftAvailable)
uSAD[PRED8x8_HORZ] += SAD4x4(pSrcBlock, uPitch*sizeof(PixType), &uHorizPred[plane][(uBlock>>1)<<4], 4*sizeof(PixType));
// next block
pSrcBlock += 4;
pPred += 4;
}
}
}
}else{
if (topAvailable && leftAvailable && left_above_aval){
for (plane=0; plane<2; plane++) {
for (uBlock=0; uBlock<blocks; uBlock++) {
if ((uBlock & 1) == 0) {
// init pPlanarPred for new row of blocks
pPred = plane ? pVPredBuf + uBlock*32 : pUPredBuf + uBlock*32;
pSrcBlock = plane ? pVSrc + (uBlock<<1)*uPitch : pUSrc + (uBlock<<1)*uPitch;
}
uSAD[PRED8x8_DC] += SATD4x4(pSrcBlock, uPitch*sizeof(PixType), &uDCPred[plane][uBlock<<4], 4*sizeof(PixType));
uSAD[PRED8x8_VERT] += SATD4x4(pSrcBlock, uPitch*sizeof(PixType), &uVertPred[plane][(uBlock&1)<<4], 4*sizeof(PixType));
uSAD[PRED8x8_HORZ] += SATD4x4(pSrcBlock, uPitch*sizeof(PixType), &uHorizPred[plane][(uBlock>>1)<<4], 4*sizeof(PixType));
uSAD[PRED8x8_PLANAR] += SATD4x4(pSrcBlock, uPitch*sizeof(PixType), pPred, 16*sizeof(PixType));
// next block
pSrcBlock += 4;
pPred += 4;
}
}
}else{
for (plane=0; plane<2; plane++) {
for (uBlock=0; uBlock<blocks; uBlock++) {
if ((uBlock & 1) == 0) {
// init pPlanarPred for new row of blocks
pPred = plane ? pVPredBuf + uBlock*32 : pUPredBuf + uBlock*32;
pSrcBlock = plane ? pVSrc + (uBlock<<1)*uPitch : pUSrc + (uBlock<<1)*uPitch;
}
uSAD[PRED8x8_DC] += SATD4x4(pSrcBlock, uPitch*sizeof(PixType), &uDCPred[plane][uBlock<<4], 4*sizeof(PixType));
if (topAvailable)
uSAD[PRED8x8_VERT] += SATD4x4(pSrcBlock, uPitch*sizeof(PixType), &uVertPred[plane][(uBlock&1)<<4], 4*sizeof(PixType));
if (leftAvailable)
uSAD[PRED8x8_HORZ] += SATD4x4(pSrcBlock, uPitch*sizeof(PixType), &uHorizPred[plane][(uBlock>>1)<<4], 4*sizeof(PixType));
// next block
pSrcBlock += 4;
pPred += 4;
}
}
}
}
// choose smallest
uSmallestSAD = uSAD[PRED8x8_DC];
Best8x8Type = PRED8x8_DC;
if (uSAD[PRED8x8_VERT] < uSmallestSAD) {
uSmallestSAD = uSAD[PRED8x8_VERT];
Best8x8Type = PRED8x8_VERT;
}
if (uSAD[PRED8x8_HORZ] < uSmallestSAD) {
uSmallestSAD = uSAD[PRED8x8_HORZ];
Best8x8Type = PRED8x8_HORZ;
}
if (uSAD[PRED8x8_PLANAR] < uSmallestSAD) {
uSmallestSAD = uSAD[PRED8x8_PLANAR];
Best8x8Type = PRED8x8_PLANAR;
}
// Set MB type for smallest, fill PredBuf with predictors
switch (Best8x8Type) {
case PRED8x8_VERT:
for (plane=0; plane<2; plane++) {
// Prediction from above, fill rows of prediction blocks with the corresponding pels from above.
pCopySrc = (Ipp32u *)uVertPred[plane];
pCopyDst = plane ? (Ipp32u *)pVPredBuf : (Ipp32u *)pUPredBuf;
for (i=0; i<num_rows; i++) // Rewritten for 16 bits.
{
Ipp32u *srcPtr = pCopySrc;
Ipp32u *dstPtr = pCopyDst;
for(j = 0; j < sizeof(PixType); j++) {
*dstPtr = *srcPtr;
*(dstPtr+sizeof(PixType)) = *(srcPtr + 4*sizeof(PixType));
if(m_PicParamSet.chroma_format_idc == 3) {
*(dstPtr + 2*sizeof(PixType)) = *(pCopySrc + 8*sizeof(PixType));
*(dstPtr + 3*sizeof(PixType)) = *(pCopySrc + 12*sizeof(PixType));
}
dstPtr++;
srcPtr++;
}
pCopyDst += 4*sizeof(PixType);
}
/* *pCopyDst = *pCopySrc; // 4 bytes left block
*(pCopyDst + 1) = *(pCopySrc+4); // 4 bytes right block
if( m_PicParamSet.chroma_format_idc == 3 ){
*(pCopyDst+2) = *(pCopySrc+8); // 4 bytes left block
*(pCopyDst + 3) = *(pCopySrc+12); // 4 bytes right block
}
pCopyDst += 4; // pitch is 16
*/
}
break;
case PRED8x8_HORZ:
for (plane=0; plane<2; plane++) {
// Prediction from left. Fill rows of prediction blocks with the corresponding pels from the left.
pCopySrc = (Ipp32u *)uHorizPred[plane];
pCopyDst = plane ? (Ipp32u *)pVPredBuf : (Ipp32u *)pUPredBuf;
for (i=0; i<num_rows; i++) // Rewritten for 16 bits.
{
Ipp32u *dstPtr = pCopyDst;
Ipp32u *srcPtr = pCopySrc;
for(Ipp32s j = 0; j < sizeof(PixType); j++) {
*dstPtr = *(dstPtr + sizeof(PixType)) = *srcPtr;
/* if( m_PicParamSet.chroma_format_idc == 3) {
*(dstPtr + 2*sizeof(PixType)) =
*(dstPtr + 3*sizeof(PixType)) =
*dstPtr;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -