📄 umc_h264_ermb.cpp
字号:
//
// INTEL CORPORATION PROPRIETARY INFORMATION
// This software is supplied under the terms of a license agreement or
// nondisclosure agreement with Intel Corporation and may not be copied
// or disclosed except in accordance with the terms of that agreement.
// Copyright (c) 2004 - 2007 Intel Corporation. All Rights Reserved.
//
#include "umc_h264_video_encoder.h"
#include "umc_h264_tables.h"
#include "umc_h264_to_ipp.h"
#include "umc_h264_bme.h"
#include "ippdefs.h"
#include "ippvc.h"
//#define TRACE_INTRA 50
//#define TRACE_INTER 5
//#define TRACE_INTRA_16X16 185
#if (defined(TRACE_INTRA) || defined(TRACE_INTER) || defined(TRACE_INTRA_16x16))
template <class T> void printMatr(T *a, Ipp32s pitchA, Ipp32s m, Ipp32s n, Ipp32s rshift = 0, const char* name = NULL)
{
fprintf(stderr,"Matrix \"%s\" {\n", name);
for(Ipp32s i = 0; i < m; i++) {
fprintf(stderr," ");
for(Ipp32s j = 0; j < n; j++) {
fprintf(stderr," %3d,", a[i*pitchA + j] >> rshift);
}
fprintf(stderr,"\n");
}
fprintf(stderr,"}\n");
}
#endif
#define LUMA_MB_MAX_COST 6
#define CHROMA_COEFF_MAX_COST 7
#define LUMA_8X8_MAX_COST 4
#define LUMA_COEFF_8X8_MAX_COST 0 //4
#define LUMA_COEFF_MB_8X8_MAX_COST 0 //6
namespace UMC_H264_ENCODER
{
#define PRINT 0
static Ipp32s chromaPredInc[3][16] = {
{ 4, 60, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, //pitch 16
{ 4, 60, 4, 60, 4, 60, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, //pitch 16
{ 4, 60, 4, -60, 4, 60, 4, 52, 4, 60, 4,-60, 4, 60, 4, 0 } //pitch 16
};
static Ipp32s chromaDCOffset[3][16] = {
{ 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0 , 0, 0, 0, 0, 0 },
{ 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}
};
////////////////////////////////////////////////////////////////////////////////
// CEncAndRec4x4IntraBlock
//
// Encode and Reconstruct one blocks in an Intra macroblock with 4x4 prediction
//
////////////////////////////////////////////////////////////////////////////////
template <class PixType, class CoeffsType>
void H264CoreEncoder<PixType, CoeffsType>::Encode4x4IntraBlock(H264EncoderThreadPrivateSlice<PixType, CoeffsType> *curr_slice, Ipp32s block)
{
Ipp32s iNumCoeffs=0;
Ipp32s iLastCoeff=0;
__ALIGN16 Ipp16s pDiffBuf[16];
CoeffsType* pTransformResult;
H264CurrentMacroblockDescriptor<PixType, CoeffsType> &cur_mb = curr_slice->m_cur_mb;
Ipp32u uMBQP = getLumaQP(cur_mb.LocalMacroblockInfo->QP, m_PicParamSet.bit_depth_luma);
Ipp32s pitchPixels = cur_mb.mbPitchPixels;
Ipp32s pitchBytes = pitchPixels*sizeof(PixType);
Ipp32u uCBPLuma = curr_slice->m_uIntraCBP4x4;
PixType* pBlockData = cur_mb.mbPtr + xoff[block] + yoff[block]*pitchPixels;
PixType* pPredBuf = cur_mb.mb4x4.prediction + xoff[block] + yoff[block]*16;
PixType* pReconBuf = cur_mb.mb4x4.reconstruct + xoff[block] + yoff[block]*16;
__ALIGN16 CoeffsType pTransRes[16];
pTransformResult = &cur_mb.mb4x4.transform[block*16];
Diff4x4(pPredBuf, pBlockData, pitchBytes, pDiffBuf);
if (!m_SeqParamSet.qpprime_y_zero_transform_bypass_flag || uMBQP != 0) {
ippiTransformQuantResidual_H264(pDiffBuf, pTransformResult, uMBQP, &iNumCoeffs, (curr_slice->m_slice_type == INTRASLICE), enc_single_scan[curr_slice->m_is_cur_mb_field], &iLastCoeff);
if (!iNumCoeffs) {
Copy4x4(pPredBuf, 16*sizeof(PixType), pReconBuf, sizeof(PixType)*16);
uCBPLuma &= ~CBP4x4Mask[block];
} else {
memcpy( pTransRes, pTransformResult, 16*sizeof( CoeffsType ));
ippiDequantTransformResidualAndAdd_H264(pPredBuf, pTransRes, NULL, pReconBuf, 16, 16, uMBQP, ((iNumCoeffs < -1) || (iNumCoeffs > 0)), m_PicParamSet.bit_depth_luma);
}
} else {
// Transform bypass => lossless.
Copy4x4(pBlockData, pitchBytes, pReconBuf, 16*sizeof(PixType));
for( Ipp32s i = 0; i < 16; i++) pTransformResult[i] = pDiffBuf[i];
ippiCountCoeffs(pTransformResult, &iNumCoeffs, enc_single_scan[curr_slice->m_is_cur_mb_field], &iLastCoeff, 16);
if (iNumCoeffs == 0) {
uCBPLuma &= ~CBP4x4Mask[block];
Copy4x4(pBlockData, pitchBytes, pPredBuf, 16*sizeof(PixType));
}
}
curr_slice->m_iNumCoeffs4x4[ block ] = iNumCoeffs;
curr_slice->m_iLastCoeff4x4[ block ] = iLastCoeff;
curr_slice->m_uIntraCBP4x4 = uCBPLuma;
}
template <class PixType, class CoeffsType>
void H264CoreEncoder<PixType, CoeffsType>::Encode8x8IntraBlock(H264EncoderThreadPrivateSlice<PixType, CoeffsType> *curr_slice, Ipp32s block)
{
Ipp32s iNumCoeffs;
Ipp32s iLastCoeff;
H264CurrentMacroblockDescriptor<PixType, CoeffsType> &cur_mb = curr_slice->m_cur_mb;
Ipp32u uMBQP = getLumaQP(cur_mb.LocalMacroblockInfo->QP, m_PicParamSet.bit_depth_luma);
Ipp32s pitchPixels = cur_mb.mbPitchPixels;
Ipp32s pitchBytes = pitchPixels*sizeof(PixType);
Ipp32s idxb = block<<2; //Start 4x4 subblock index
Ipp32s idxe = idxb+4; //End 4x4 subblock index
PixType* pBlockData = cur_mb.mbPtr + xoff[4*block] + yoff[4*block]*pitchPixels;
// loop over all 8x8 blocks in Y plane for the MB
PixType* pPredBuf = cur_mb.mb8x8.prediction + xoff[block<<2] + yoff[block<<2]*16;
PixType* pReconBuf = cur_mb.mb8x8.reconstruct + xoff[block<<2] + yoff[block<<2]*16;
Ipp32u uCBPLuma = curr_slice->m_uIntraCBP8x8;
CoeffsType* pTransformResult = &cur_mb.mb8x8.transform[block*64];
__ALIGN16 Ipp16s pDiffBuf[64];
__ALIGN16 CoeffsType pTransRes[64];
Diff8x8(pPredBuf, pBlockData, pitchBytes, pDiffBuf);
if (!m_SeqParamSet.qpprime_y_zero_transform_bypass_flag || uMBQP != 0) {
ippiTransformLuma8x8Fwd_H264(pDiffBuf, pTransformResult);
ippiQuantLuma8x8_H264(pTransformResult,pTransformResult, QP_DIV_6[uMBQP], 0, enc_single_scan_8x8[curr_slice->m_is_cur_mb_field], m_SeqParamSet.seq_scaling_matrix_8x8[0][QP_MOD_6[uMBQP]], &iNumCoeffs, &iLastCoeff);
if (!iNumCoeffs) {
Copy8x8(pPredBuf, 16*sizeof(PixType), pReconBuf, 16*sizeof(PixType));
uCBPLuma &= ~CBP8x8Mask[block];
} else {
memcpy( pTransRes, pTransformResult, 64*sizeof( CoeffsType ));
ippiQuantLuma8x8Inv_H264(pTransRes, QP_DIV_6[uMBQP], m_SeqParamSet.seq_scaling_inv_matrix_8x8[0][QP_MOD_6[uMBQP]]);
ippiTransformLuma8x8InvAddPred_H264(pPredBuf, 16, pTransRes, pReconBuf, 16, m_PicParamSet.bit_depth_luma);
}
} else {
// Transform bypass => lossless.
Copy8x8(pBlockData, pitchBytes, pReconBuf, 16*sizeof(PixType));
for (Ipp32s i = 0; i < 64; i++)
pTransformResult[i] = pDiffBuf[i];
ippiCountCoeffs(pTransformResult, &iNumCoeffs, enc_single_scan_8x8[curr_slice->m_is_cur_mb_field], &iLastCoeff, 64);
if (iNumCoeffs == 0) {
uCBPLuma &= ~CBP8x8Mask[block];
Copy8x8(pBlockData, pitchBytes, pPredBuf, 16*sizeof(PixType));
}
}
curr_slice->m_iNumCoeffs8x8[ block ] = iNumCoeffs;
curr_slice->m_iLastCoeff8x8[ block ] = iLastCoeff;
curr_slice->m_uIntraCBP8x8 = uCBPLuma;
}
template <class PixType, class CoeffsType>
void H264CoreEncoder<PixType, CoeffsType>::Encode16x16IntraBlock(H264EncoderThreadPrivateSlice<PixType, CoeffsType> *curr_slice)
{
Ipp32u uBlock; // block number, 0 to 23
Ipp32u uMBQP; // QP of current MB
Ipp32u uMB;
Ipp32u uCBPLuma; // coded flags for all 4x4 blocks
CoeffsType* pDCBuf; // chroma & luma dc coeffs pointer
PixType* pPredBuf; // prediction block pointer
PixType* pReconBuf; // prediction block pointer
Ipp16s* pDiffBuf; // difference block pointer
Ipp16s* pTempDiffBuf; // difference block pointer
CoeffsType *pTransformResult; // for transform results.
Ipp16s* pMassDiffBuf; // difference block pointer
CoeffsType* pQBuf; // quantized block pointer
Ipp32s pitchPixels; // buffer pitch in pixels
Ipp8u bCoded; // coded block flag
Ipp32s iNumCoeffs; // Number of nonzero coeffs after quant (negative if DC is nonzero)
Ipp32s iLastCoeff; // Number of nonzero coeffs after quant (negative if DC is nonzero)
H264CurrentMacroblockDescriptor<PixType, CoeffsType> &cur_mb = curr_slice->m_cur_mb;
Ipp32s is_cur_mb_field = curr_slice->m_is_cur_mb_field;
EnumSliceType slice_type = curr_slice->m_slice_type;
pitchPixels = cur_mb.mbPitchPixels;
uCBPLuma = cur_mb.LocalMacroblockInfo->cbp_luma;
uMBQP = getLumaQP(cur_mb.LocalMacroblockInfo->QP, m_PicParamSet.bit_depth_luma);
pDiffBuf = (Ipp16s*) (curr_slice->m_pMBEncodeBuffer + 512);
pTransformResult = (CoeffsType*)(pDiffBuf + 16);
pQBuf = (CoeffsType*) (pTransformResult + 16);
pDCBuf = (CoeffsType*) (pQBuf + 16); // Used for both luma and chroma DC blocks
pMassDiffBuf = (Ipp16s*) (pDCBuf + 16);
uMB = cur_mb.uMB;
bool transform_bypass = m_SeqParamSet.qpprime_y_zero_transform_bypass_flag && uMBQP == 0;
//--------------------------------------------------------------------------
// encode Y plane blocks (0-15)
//--------------------------------------------------------------------------
// initialize pointers and offset
pPredBuf = cur_mb.mb16x16.prediction; // 16-byte aligned work buffer
pReconBuf = cur_mb.mb16x16.reconstruct; // 16-byte aligned work buffer
Ipp32s pitchPix = 16;
cur_mb.MacroblockCoeffsInfo->lumaAC = 0;
ippiSumsDiff16x16Blocks4x4(cur_mb.mbPtr, pitchPixels, pPredBuf, 16, pDCBuf, pMassDiffBuf); // compute the 4x4 luma DC transform coeffs
if(!transform_bypass) {
// apply second transform on the luma DC transform coeffs
ippiTransformQuantLumaDC_H264(pDCBuf,pQBuf,uMBQP,&iNumCoeffs,1, enc_single_scan[is_cur_mb_field],&iLastCoeff);
}else {
for(Ipp32s i = 0; i < 4; i++) {
for(Ipp32s j = 0; j < 4; j++) {
Ipp32s x, y;
x = j*16;
y = i*64;
pDCBuf[i*4 + j] = pMassDiffBuf[x+y];
}
}
ippiCountCoeffs(pDCBuf, &iNumCoeffs, enc_single_scan[is_cur_mb_field], &iLastCoeff, 16);
}
if (m_PicParamSet.entropy_coding_mode){
ScanSignificant_CABAC(pDCBuf,BLOCK_LUMA_DC_LEVELS,16,dec_single_scan[is_cur_mb_field], &curr_slice->Block_CABAC[Y_DC_RLE]);
bCoded = curr_slice->Block_CABAC[Y_DC_RLE].uNumSigCoeffs;
}else{
ippiEncodeCoeffsCAVLC_H264(pDCBuf,0, dec_single_scan[is_cur_mb_field],iLastCoeff,
&curr_slice->Block_RLE[Y_DC_RLE].uTrailing_Ones,
&curr_slice->Block_RLE[Y_DC_RLE].uTrailing_One_Signs,
&curr_slice->Block_RLE[Y_DC_RLE].uNumCoeffs,
&curr_slice->Block_RLE[Y_DC_RLE].uTotalZeros,
curr_slice->Block_RLE[Y_DC_RLE].iLevels,
curr_slice->Block_RLE[Y_DC_RLE].uRuns);
bCoded = curr_slice->Block_RLE[Y_DC_RLE].uNumCoeffs;
}
if(!transform_bypass) {
ippiTransformDequantLumaDC_H264(pDCBuf, uMBQP);
}
// loop over all 4x4 blocks in Y plane for the MB
for (uBlock = 0; uBlock < 16; uBlock++ ){
pPredBuf = cur_mb.mb16x16.prediction + xoff[uBlock] + yoff[uBlock]*16;
pReconBuf = cur_mb.mb16x16.reconstruct + xoff[uBlock] + yoff[uBlock]*16;
cur_mb.MacroblockCoeffsInfo->numCoeff[uBlock] = 0; // This will be updated if the block is coded
if (m_PicParamSet.entropy_coding_mode) {
curr_slice->Block_CABAC[uBlock].uNumSigCoeffs = 0;
} else {
curr_slice->Block_RLE[uBlock].uNumCoeffs = 0;
curr_slice->Block_RLE[uBlock].uTrailing_Ones = 0;
curr_slice->Block_RLE[uBlock].uTrailing_One_Signs = 0;
curr_slice->Block_RLE[uBlock].uTotalZeros = 15;
}
bCoded = ((uCBPLuma & CBP4x4Mask[uBlock])?(1):(0)); // check if block is coded
if (!bCoded){
Copy4x4(pPredBuf, 16*sizeof(PixType), pReconBuf, pitchPix*sizeof(PixType)); // update reconstruct frame for the empty block
}else{ // block not declared empty, encode
pTempDiffBuf = pMassDiffBuf+ xoff[uBlock]*4 + yoff[uBlock]*16;
if(!transform_bypass) {
ippiTransformQuantResidual_H264(pTempDiffBuf, pTransformResult, uMBQP,&iNumCoeffs,(slice_type == INTRASLICE), enc_single_scan[is_cur_mb_field],&iLastCoeff);
}else{
for(Ipp32s i = 0; i < 16; i++){
pTransformResult[i] = pTempDiffBuf[i];
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -