📄 umc_h264_ermb.cpp

📁 audio-video-codecs.rar语音编解码器
💻 CPP
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
//
//               INTEL CORPORATION PROPRIETARY INFORMATION
//  This software is supplied under the terms of a license agreement or
//  nondisclosure agreement with Intel Corporation and may not be copied
//  or disclosed except in accordance with the terms of that agreement.
//        Copyright (c) 2004 - 2007 Intel Corporation. All Rights Reserved.
//

#include "umc_h264_video_encoder.h"
#include "umc_h264_tables.h"
#include "umc_h264_to_ipp.h"
#include "umc_h264_bme.h"
#include "ippdefs.h"
#include "ippvc.h"

//#define TRACE_INTRA 50
//#define TRACE_INTER 5
//#define TRACE_INTRA_16X16 185

#if (defined(TRACE_INTRA) || defined(TRACE_INTER) || defined(TRACE_INTRA_16x16))
template <class T> void printMatr(T *a, Ipp32s pitchA, Ipp32s m, Ipp32s n, Ipp32s rshift = 0, const char* name = NULL)
{
    fprintf(stderr,"Matrix \"%s\" {\n", name);
    for(Ipp32s i = 0; i < m; i++) {
        fprintf(stderr,"   ");
        for(Ipp32s j = 0; j < n; j++) {
            fprintf(stderr," %3d,", a[i*pitchA + j] >> rshift);
        }
        fprintf(stderr,"\n");
    }
    fprintf(stderr,"}\n");
}
#endif

#define LUMA_MB_MAX_COST 6
#define CHROMA_COEFF_MAX_COST 7
#define LUMA_8X8_MAX_COST 4
#define LUMA_COEFF_8X8_MAX_COST 0 //4
#define LUMA_COEFF_MB_8X8_MAX_COST 0 //6

namespace UMC_H264_ENCODER
{

#define PRINT 0

static Ipp32s chromaPredInc[3][16] = {
     { 4, 60, 4,   0,  0,  0, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0 }, //pitch 16
     { 4, 60, 4,  60,  4, 60, 4,  0,  0,  0,  0,  0,  0,  0,  0,  0  }, //pitch 16
     { 4, 60, 4, -60,  4, 60, 4, 52,  4, 60,  4,-60,  4, 60,  4,  0  } //pitch 16
};

static Ipp32s chromaDCOffset[3][16] = {
    { 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0 },
    { 0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0 , 0,  0,  0,  0,  0 },
    { 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}
};

////////////////////////////////////////////////////////////////////////////////
// CEncAndRec4x4IntraBlock
//
// Encode and Reconstruct one blocks in an Intra macroblock with 4x4 prediction
//
////////////////////////////////////////////////////////////////////////////////
template <class PixType, class CoeffsType>
void H264CoreEncoder<PixType, CoeffsType>::Encode4x4IntraBlock(H264EncoderThreadPrivateSlice<PixType, CoeffsType> *curr_slice, Ipp32s block)
{
    Ipp32s      iNumCoeffs=0;
    Ipp32s      iLastCoeff=0;
    __ALIGN16 Ipp16s pDiffBuf[16];
    CoeffsType*  pTransformResult;
    H264CurrentMacroblockDescriptor<PixType, CoeffsType> &cur_mb = curr_slice->m_cur_mb;
    Ipp32u uMBQP       = getLumaQP(cur_mb.LocalMacroblockInfo->QP, m_PicParamSet.bit_depth_luma);
    Ipp32s pitchPixels = cur_mb.mbPitchPixels;
    Ipp32s pitchBytes = pitchPixels*sizeof(PixType);
    Ipp32u uCBPLuma     = curr_slice->m_uIntraCBP4x4;
    PixType* pBlockData = cur_mb.mbPtr + xoff[block] + yoff[block]*pitchPixels;
    PixType* pPredBuf = cur_mb.mb4x4.prediction + xoff[block] + yoff[block]*16;
    PixType* pReconBuf = cur_mb.mb4x4.reconstruct + xoff[block] + yoff[block]*16;
    __ALIGN16 CoeffsType pTransRes[16];

    pTransformResult = &cur_mb.mb4x4.transform[block*16];
    Diff4x4(pPredBuf, pBlockData, pitchBytes, pDiffBuf);
    if (!m_SeqParamSet.qpprime_y_zero_transform_bypass_flag || uMBQP != 0) {
        ippiTransformQuantResidual_H264(pDiffBuf, pTransformResult, uMBQP, &iNumCoeffs, (curr_slice->m_slice_type == INTRASLICE), enc_single_scan[curr_slice->m_is_cur_mb_field], &iLastCoeff);
        if (!iNumCoeffs) {
            Copy4x4(pPredBuf, 16*sizeof(PixType), pReconBuf, sizeof(PixType)*16);
            uCBPLuma &= ~CBP4x4Mask[block];
        } else {
            memcpy( pTransRes, pTransformResult, 16*sizeof( CoeffsType ));
            ippiDequantTransformResidualAndAdd_H264(pPredBuf, pTransRes, NULL, pReconBuf, 16, 16, uMBQP, ((iNumCoeffs < -1) || (iNumCoeffs > 0)), m_PicParamSet.bit_depth_luma);
        }
    } else {
        // Transform bypass => lossless.
        Copy4x4(pBlockData, pitchBytes, pReconBuf, 16*sizeof(PixType));
        for( Ipp32s i = 0; i < 16; i++) pTransformResult[i] = pDiffBuf[i];
        ippiCountCoeffs(pTransformResult, &iNumCoeffs, enc_single_scan[curr_slice->m_is_cur_mb_field], &iLastCoeff, 16);
        if (iNumCoeffs == 0) {
            uCBPLuma &= ~CBP4x4Mask[block];
            Copy4x4(pBlockData, pitchBytes, pPredBuf, 16*sizeof(PixType));
        }
    }
    curr_slice->m_iNumCoeffs4x4[ block ] = iNumCoeffs;
    curr_slice->m_iLastCoeff4x4[ block ] = iLastCoeff;
    curr_slice->m_uIntraCBP4x4 = uCBPLuma;
}


template <class PixType, class CoeffsType>
void H264CoreEncoder<PixType, CoeffsType>::Encode8x8IntraBlock(H264EncoderThreadPrivateSlice<PixType, CoeffsType> *curr_slice, Ipp32s block)
{
    Ipp32s     iNumCoeffs;
    Ipp32s     iLastCoeff;
    H264CurrentMacroblockDescriptor<PixType, CoeffsType> &cur_mb = curr_slice->m_cur_mb;
    Ipp32u uMBQP       = getLumaQP(cur_mb.LocalMacroblockInfo->QP, m_PicParamSet.bit_depth_luma);
    Ipp32s pitchPixels = cur_mb.mbPitchPixels;
    Ipp32s pitchBytes  = pitchPixels*sizeof(PixType);
    Ipp32s idxb = block<<2;  //Start 4x4 subblock index
    Ipp32s idxe = idxb+4;    //End 4x4 subblock index

    PixType* pBlockData = cur_mb.mbPtr + xoff[4*block] + yoff[4*block]*pitchPixels;
    // loop over all 8x8 blocks in Y plane for the MB
    PixType* pPredBuf = cur_mb.mb8x8.prediction + xoff[block<<2] + yoff[block<<2]*16;
    PixType* pReconBuf = cur_mb.mb8x8.reconstruct + xoff[block<<2] + yoff[block<<2]*16;

    Ipp32u uCBPLuma     = curr_slice->m_uIntraCBP8x8;
    CoeffsType* pTransformResult = &cur_mb.mb8x8.transform[block*64];
    __ALIGN16 Ipp16s pDiffBuf[64];
    __ALIGN16 CoeffsType pTransRes[64];

    Diff8x8(pPredBuf, pBlockData, pitchBytes, pDiffBuf);
    if (!m_SeqParamSet.qpprime_y_zero_transform_bypass_flag || uMBQP != 0) {
        ippiTransformLuma8x8Fwd_H264(pDiffBuf, pTransformResult);
        ippiQuantLuma8x8_H264(pTransformResult,pTransformResult, QP_DIV_6[uMBQP], 0, enc_single_scan_8x8[curr_slice->m_is_cur_mb_field], m_SeqParamSet.seq_scaling_matrix_8x8[0][QP_MOD_6[uMBQP]], &iNumCoeffs, &iLastCoeff);
        if (!iNumCoeffs) {
            Copy8x8(pPredBuf, 16*sizeof(PixType), pReconBuf, 16*sizeof(PixType));
            uCBPLuma &= ~CBP8x8Mask[block];
        } else {
            memcpy( pTransRes, pTransformResult, 64*sizeof( CoeffsType ));
            ippiQuantLuma8x8Inv_H264(pTransRes, QP_DIV_6[uMBQP], m_SeqParamSet.seq_scaling_inv_matrix_8x8[0][QP_MOD_6[uMBQP]]);
            ippiTransformLuma8x8InvAddPred_H264(pPredBuf, 16, pTransRes, pReconBuf, 16, m_PicParamSet.bit_depth_luma);
        }
    } else {
        // Transform bypass => lossless.
        Copy8x8(pBlockData, pitchBytes, pReconBuf, 16*sizeof(PixType));
        for (Ipp32s i = 0; i < 64; i++)
            pTransformResult[i] = pDiffBuf[i];
        ippiCountCoeffs(pTransformResult, &iNumCoeffs, enc_single_scan_8x8[curr_slice->m_is_cur_mb_field], &iLastCoeff, 64);
        if (iNumCoeffs == 0) {
            uCBPLuma &= ~CBP8x8Mask[block];
            Copy8x8(pBlockData, pitchBytes, pPredBuf, 16*sizeof(PixType));
        }
    }
    curr_slice->m_iNumCoeffs8x8[ block ] = iNumCoeffs;
    curr_slice->m_iLastCoeff8x8[ block ] = iLastCoeff;
    curr_slice->m_uIntraCBP8x8 = uCBPLuma;
}

template <class PixType, class CoeffsType>
void H264CoreEncoder<PixType, CoeffsType>::Encode16x16IntraBlock(H264EncoderThreadPrivateSlice<PixType, CoeffsType> *curr_slice)
{
    Ipp32u  uBlock;     // block number, 0 to 23
    Ipp32u  uMBQP;          // QP of current MB
    Ipp32u  uMB;
    Ipp32u  uCBPLuma;        // coded flags for all 4x4 blocks
    CoeffsType* pDCBuf;     // chroma & luma dc coeffs pointer
    PixType*  pPredBuf;       // prediction block pointer
    PixType*  pReconBuf;       // prediction block pointer
    Ipp16s* pDiffBuf;       // difference block pointer
    Ipp16s* pTempDiffBuf;       // difference block pointer
    CoeffsType *pTransformResult; // for transform results.
    Ipp16s* pMassDiffBuf;   // difference block pointer
    CoeffsType* pQBuf;          // quantized block pointer
    Ipp32s  pitchPixels;     // buffer pitch in pixels
    Ipp8u   bCoded; // coded block flag
    Ipp32s  iNumCoeffs; // Number of nonzero coeffs after quant (negative if DC is nonzero)
    Ipp32s  iLastCoeff; // Number of nonzero coeffs after quant (negative if DC is nonzero)
    H264CurrentMacroblockDescriptor<PixType, CoeffsType> &cur_mb = curr_slice->m_cur_mb;
    Ipp32s is_cur_mb_field = curr_slice->m_is_cur_mb_field;
    EnumSliceType slice_type = curr_slice->m_slice_type;

    pitchPixels = cur_mb.mbPitchPixels;
    uCBPLuma    = cur_mb.LocalMacroblockInfo->cbp_luma;
    uMBQP       = getLumaQP(cur_mb.LocalMacroblockInfo->QP, m_PicParamSet.bit_depth_luma);
    pDiffBuf    = (Ipp16s*) (curr_slice->m_pMBEncodeBuffer + 512);
    pTransformResult = (CoeffsType*)(pDiffBuf + 16);
    pQBuf       = (CoeffsType*) (pTransformResult + 16);
    pDCBuf      = (CoeffsType*) (pQBuf + 16);   // Used for both luma and chroma DC blocks
    pMassDiffBuf = (Ipp16s*) (pDCBuf + 16);
    uMB = cur_mb.uMB;

    bool transform_bypass = m_SeqParamSet.qpprime_y_zero_transform_bypass_flag && uMBQP == 0;
    //--------------------------------------------------------------------------
    // encode Y plane blocks (0-15)
    //--------------------------------------------------------------------------

    // initialize pointers and offset
    pPredBuf    = cur_mb.mb16x16.prediction; // 16-byte aligned work buffer
    pReconBuf    = cur_mb.mb16x16.reconstruct; // 16-byte aligned work buffer
    Ipp32s pitchPix = 16;

    cur_mb.MacroblockCoeffsInfo->lumaAC = 0;
    ippiSumsDiff16x16Blocks4x4(cur_mb.mbPtr, pitchPixels, pPredBuf, 16, pDCBuf, pMassDiffBuf); // compute the 4x4 luma DC transform coeffs

    if(!transform_bypass) {
        // apply second transform on the luma DC transform coeffs
        ippiTransformQuantLumaDC_H264(pDCBuf,pQBuf,uMBQP,&iNumCoeffs,1, enc_single_scan[is_cur_mb_field],&iLastCoeff);
    }else {
       for(Ipp32s i = 0; i < 4; i++) {
            for(Ipp32s j = 0; j < 4; j++) {
                Ipp32s x, y;
                x = j*16;
                y = i*64;
                pDCBuf[i*4 + j] = pMassDiffBuf[x+y];
            }
        }
        ippiCountCoeffs(pDCBuf, &iNumCoeffs, enc_single_scan[is_cur_mb_field], &iLastCoeff, 16);
    }

    if (m_PicParamSet.entropy_coding_mode){
        ScanSignificant_CABAC(pDCBuf,BLOCK_LUMA_DC_LEVELS,16,dec_single_scan[is_cur_mb_field], &curr_slice->Block_CABAC[Y_DC_RLE]);
        bCoded = curr_slice->Block_CABAC[Y_DC_RLE].uNumSigCoeffs;
    }else{
        ippiEncodeCoeffsCAVLC_H264(pDCBuf,0, dec_single_scan[is_cur_mb_field],iLastCoeff,
                                   &curr_slice->Block_RLE[Y_DC_RLE].uTrailing_Ones,
                                   &curr_slice->Block_RLE[Y_DC_RLE].uTrailing_One_Signs,
                                   &curr_slice->Block_RLE[Y_DC_RLE].uNumCoeffs,
                                   &curr_slice->Block_RLE[Y_DC_RLE].uTotalZeros,
                                   curr_slice->Block_RLE[Y_DC_RLE].iLevels,
                                   curr_slice->Block_RLE[Y_DC_RLE].uRuns);
        bCoded = curr_slice->Block_RLE[Y_DC_RLE].uNumCoeffs;
    }

    if(!transform_bypass) {
        ippiTransformDequantLumaDC_H264(pDCBuf, uMBQP);
    }

    // loop over all 4x4 blocks in Y plane for the MB
    for (uBlock = 0; uBlock < 16; uBlock++ ){
        pPredBuf = cur_mb.mb16x16.prediction + xoff[uBlock] + yoff[uBlock]*16;
        pReconBuf = cur_mb.mb16x16.reconstruct + xoff[uBlock] + yoff[uBlock]*16;

        cur_mb.MacroblockCoeffsInfo->numCoeff[uBlock] = 0;        // This will be updated if the block is coded
        if (m_PicParamSet.entropy_coding_mode) {
            curr_slice->Block_CABAC[uBlock].uNumSigCoeffs = 0;
        } else {
            curr_slice->Block_RLE[uBlock].uNumCoeffs = 0;
            curr_slice->Block_RLE[uBlock].uTrailing_Ones = 0;
            curr_slice->Block_RLE[uBlock].uTrailing_One_Signs = 0;
            curr_slice->Block_RLE[uBlock].uTotalZeros = 15;
        }

        bCoded = ((uCBPLuma & CBP4x4Mask[uBlock])?(1):(0)); // check if block is coded

        if (!bCoded){
            Copy4x4(pPredBuf, 16*sizeof(PixType), pReconBuf, pitchPix*sizeof(PixType)); // update reconstruct frame for the empty block
        }else{   // block not declared empty, encode
            pTempDiffBuf = pMassDiffBuf+ xoff[uBlock]*4 + yoff[uBlock]*16;
            if(!transform_bypass) {
                ippiTransformQuantResidual_H264(pTempDiffBuf, pTransformResult, uMBQP,&iNumCoeffs,(slice_type == INTRASLICE), enc_single_scan[is_cur_mb_field],&iLastCoeff);
            }else{
                for(Ipp32s i = 0; i < 16; i++){
                    pTransformResult[i] = pTempDiffBuf[i];
                }
12 3 4 5 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -