📄 umc_h264_me_new.cpp
字号:
////////////////////////////////////////////////////////////////////////////////
//
// INTEL CORPORATION PROPRIETARY INFORMATION
// This software is supplied under the terms of a license agreement or
// nondisclosure agreement with Intel Corporation and may not be copied
// or disclosed except in accordance with the terms of that agreement.
// Copyright (c) 2004 - 2007 Intel Corporation. All Rights Reserved.
//
#include <string.h>
#include <limits.h>
#include <math.h>
#include "umc_h264_tables.h"
#include "umc_h264_to_ipp.h"
#include "umc_h264_bme.h"
#include "umc_h264_video_encoder.h"
namespace UMC_H264_ENCODER
{
#ifdef H264_NEW_ME
#define BS_16x16 20
#define BS_16x8 18
#define BS_8x16 12
#define BS_8x8 10
#define BS_8x4 9
#define BS_4x8 6
#define BS_4x4 5
#define BS_4x2 4
#define BS_2x4 3
#define BS_2x2 2
#define MVADJUST(ptr, pitchPixels, x, y) ((ptr) + Ipp32s((pitchPixels)*(y) + (x)))
#define CHECK_CBP_EMPTY_THRESH(sad, mask, factor) \
if ((sad) < (m_EmptyThreshold[iQP] >> (factor))) \
cur_mb.LocalMacroblockInfo->cbp_luma &= (mask)
#define TR_SHIFT 8
#define MV_SEARCH_TYPE_FULL 0
#define MV_SEARCH_TYPE_CLASSIC_LOG 1
#define MV_SEARCH_TYPE_LOG 2
#define MV_SEARCH_TYPE_EPZS 3
#define MV_SEARCH_TYPE_FULL_ORTHOGONAL 4
#define MV_SEARCH_TYPE_LOG_ORTHOGONAL 5
#define MV_SEARCH_TYPE_TTS 6
#define MV_SEARCH_TYPE_NEW_EPZS 7
#define MV_SEARCH_TYPE_UMH 8
enum PredType
{
MVPRED_MEDIAN,
MVPRED_A,
MVPRED_B,
MVPRED_C
};
template <class PixType> void DirectB_PredictOneMB_Lu(
PixType *const pDirB, // pointer to current direct mode MB buffer
const PixType *const pPrev, // pointer to previous ref plane buffer
const PixType *const pFutr, // pointer to future ref plane buffer
const Ipp32s pitchPixels, // reference buffers pitch in pixels
const Ipp32u uInterpType,// 0 = Skip, 1 = Default, 2 = Implicit Weighted
const Ipp32s W1,
const Ipp32s W0,
const IppiSize & roiSize
)
{
if (!uInterpType) {
for (Ipp32s i = 0, k = 0; i < roiSize.height; i ++, k += pitchPixels)
memcpy(pDirB + i * 16, pPrev + k, roiSize.width * sizeof(PixType));
} else if (uInterpType == 2) {
for (Ipp32s i = 0, k = 0; i < roiSize.height; i ++, k += pitchPixels)
for (Ipp32s j = 0; j < roiSize.width; j ++)
pDirB[i * 16 + j] = (PixType) ((pPrev[k + j] * W0 + pFutr[k + j] * W1 + 32) >> 6);
} else {
ippiInterpolateBlock_H264(pPrev, pFutr, pDirB, roiSize.width, roiSize.height, pitchPixels*sizeof(PixType));
}
}
template <class PixType, class CoeffsType>void H264CoreEncoder<PixType,CoeffsType>::CalcMVPredictor(
H264EncoderThreadPrivateSlice<PixType, CoeffsType> *curr_slice,
Ipp32u block_idx,
Ipp32u uList, // 0, 1
Ipp32u uBlocksWide, // 1, 2, or 4
Ipp32u uBlocksHigh, // 1, 2, or 4 (4x16 and 16x4 not permitted)
H264MotionVector *pMVPred // resulting MV predictor
)
{
H264BlockLocation neighbours[MB_ALL_NEIGHBOURS];
H264CurrentMacroblockDescriptor<PixType, CoeffsType> &cur_mb = curr_slice->m_cur_mb;
neighbours[MB_A].block_num = block_idx;
neighbours[MB_B].block_num = block_idx;
neighbours[MB_C].block_num = block_idx + uBlocksWide - 1;
if (m_SliceHeader.MbaffFrameFlag) {
GetLeftLocationForCurrentMBLumaMBAFF(cur_mb, &neighbours[MB_A]);
GetTopLocationForCurrentMBLumaMBAFF(cur_mb, &neighbours[MB_B], false);
GetTopRightLocationForCurrentMBLumaMBAFF(cur_mb, &neighbours[MB_C]);
if (neighbours[MB_C].mb_num < 0) {
neighbours[MB_C].block_num = block_idx;
GetTopLeftLocationForCurrentMBLumaMBAFF(cur_mb, &neighbours[MB_C]);
}
} else {
GetLeftLocationForCurrentMBLumaNonMBAFF(cur_mb, &neighbours[MB_A]);
GetTopLocationForCurrentMBLumaNonMBAFF(cur_mb, &neighbours[MB_B]);
GetTopRightLocationForCurrentMBLumaNonMBAFF(cur_mb, &neighbours[MB_C]);
if (neighbours[MB_C].mb_num < 0) {
neighbours[MB_C].block_num = block_idx;
GetTopLeftLocationForCurrentMBLumaNonMBAFF(cur_mb, &neighbours[MB_C]);
}
}
const T_RefIdx &curr_ref_idx = cur_mb.RefIdxs[uList]->RefIdxs[block_idx];
const H264MotionVector *mvs[MB_ALL_NEIGHBOURS];
H264MotionVector mvs_temp[MB_ALL_NEIGHBOURS];
bool is_diff_ref_idxs[MB_ALL_NEIGHBOURS];
H264MacroblockGlobalInfo *gmbs = m_pCurrentFrame->m_mbinfo.mbs;
for (Ipp32s n = MB_A; n <= MB_C; n++) {
if (neighbours[n].mb_num >= 0) {
T_RefIdx ref_idx = m_pCurrentFrame->m_mbinfo.RefIdxs[uList][neighbours[n].mb_num].RefIdxs[neighbours[n].block_num];
if (m_SliceHeader.MbaffFrameFlag) {
Ipp32u ls = (pGetMBFieldDecodingFlag(cur_mb.GlobalMacroblockInfo) - GetMBFieldDecodingFlag(gmbs[neighbours[n].mb_num])) > 0;
Ipp32u rs = (pGetMBFieldDecodingFlag(cur_mb.GlobalMacroblockInfo) - GetMBFieldDecodingFlag(gmbs[neighbours[n].mb_num])) < 0;
if (ref_idx == -1 || IS_INTRA_MBTYPE(m_pCurrentFrame->m_mbinfo.mbs[neighbours[n].mb_num].mbtype))
mvs[n] = &null_mv;
else{
mvs_temp[n] = m_pCurrentFrame->m_mbinfo.MV[uList][neighbours[n].mb_num].MotionVectors[neighbours[n].block_num];
mvs_temp[n].mvy = ((mvs_temp[n].mvy + ((mvs_temp[n].mvy < 0)&&ls)) << rs) >>ls;
mvs[n] = &mvs_temp[n];
}
is_diff_ref_idxs[n] = ( ((ref_idx<<ls)>>rs) != curr_ref_idx );
} else {
if (ref_idx == -1 || IS_INTRA_MBTYPE(m_pCurrentFrame->m_mbinfo.mbs[neighbours[n].mb_num].mbtype))
mvs[n] = &null_mv;
else
mvs[n] = &m_pCurrentFrame->m_mbinfo.MV[uList][neighbours[n].mb_num].MotionVectors[neighbours[n].block_num];
is_diff_ref_idxs[n] = (curr_ref_idx != ref_idx);
}
} else {
mvs[n] = &null_mv;
is_diff_ref_idxs[n] = true;
}
}
PredType pred_type = MVPRED_MEDIAN;
if (!is_diff_ref_idxs[MB_A] && is_diff_ref_idxs[MB_B] && is_diff_ref_idxs[MB_C])
pred_type = MVPRED_A;
else if (is_diff_ref_idxs[MB_A] && !is_diff_ref_idxs[MB_B] && is_diff_ref_idxs[MB_C])
pred_type = MVPRED_B;
else if (is_diff_ref_idxs[MB_A] && is_diff_ref_idxs[MB_B] && !is_diff_ref_idxs[MB_C])
pred_type = MVPRED_C;
if ((uBlocksHigh + uBlocksWide) == 6) // 8x16 and 16x8 block sizes
{
if (uBlocksHigh == 2) // 16x8
{
if ((block_idx == 0) && !is_diff_ref_idxs[MB_B])
{
// 16x8 - Top block - Above uses same ref and not Intra. Predict from Above in this case
pred_type = MVPRED_B;
} else {
if ((block_idx == 8) && !is_diff_ref_idxs[MB_A])
{
// 16x8 - Bottom block - Left uses same ref and not Intra. Predict from Left in this case
pred_type = MVPRED_A;
}
}
} else { // 8x16
if ((block_idx == 0) && !is_diff_ref_idxs[MB_A]) {
// 8x16 - Left block - Left uses same ref and not Intra. Predict from Left in this case
pred_type = MVPRED_A;
} else if ((block_idx == 2) && !is_diff_ref_idxs[MB_C]) {
// 8x16 - Right block - Above Right uses same ref and not Intra. Predict from Above-Right in this case
pred_type = MVPRED_C;
}
}
}
if (neighbours[MB_B].mb_num < 0 && neighbours[MB_C].mb_num < 0)
pred_type = MVPRED_A;
switch(pred_type) {
case MVPRED_MEDIAN:
pMVPred[0].mvx = MIN(mvs[MB_A]->mvx, mvs[MB_B]->mvx) ^ MIN(mvs[MB_B]->mvx, mvs[MB_C]->mvx) ^ MIN(mvs[MB_C]->mvx, mvs[MB_A]->mvx);
pMVPred[0].mvy = MIN(mvs[MB_A]->mvy, mvs[MB_B]->mvy) ^ MIN(mvs[MB_B]->mvy, mvs[MB_C]->mvy) ^ MIN(mvs[MB_C]->mvy, mvs[MB_A]->mvy);
break;
case MVPRED_A:
pMVPred[0].mvx = mvs[MB_A]->mvx;
pMVPred[0].mvy = mvs[MB_A]->mvy;
break;
case MVPRED_B:
pMVPred[0].mvx = mvs[MB_B]->mvx;
pMVPred[0].mvy = mvs[MB_B]->mvy;
break;
case MVPRED_C:
pMVPred[0].mvx = mvs[MB_C]->mvx;
pMVPred[0].mvy = mvs[MB_C]->mvy;
break;
default:
VM_ASSERT(false);
}
}
template <class PixType> Ipp32s SAD(PixType *pCur, Ipp32s pitchPixelsCur, PixType *pRef, Ipp32s pitchPixelsRef, Ipp32s blockSize)
{
Ipp32s sad;
switch (blockSize) {
case BS_16x16:
sad = SAD16x16(pCur, pitchPixelsCur*sizeof(PixType), pRef, pitchPixelsRef*sizeof(PixType));
break;
case BS_16x8:
sad = SAD16x8(pCur, pitchPixelsCur*sizeof(PixType), pRef, pitchPixelsRef*sizeof(PixType));
break;
case BS_8x16:
sad = SAD8x16(pCur, pitchPixelsCur*sizeof(PixType), pRef, pitchPixelsRef*sizeof(PixType));
break;
case BS_8x8:
sad = SAD8x8(pCur, pitchPixelsCur*sizeof(PixType), pRef, pitchPixelsRef*sizeof(PixType));
break;
case BS_8x4:
sad = SAD8x4(pCur, pitchPixelsCur*sizeof(PixType), pRef, pitchPixelsRef*sizeof(PixType));
break;
case BS_4x8:
sad = SAD4x8(pCur, pitchPixelsCur*sizeof(PixType), pRef, pitchPixelsRef*sizeof(PixType));
break;
case BS_4x4:
sad = SAD4x4(pCur, pitchPixelsCur*sizeof(PixType), pRef, pitchPixelsRef*sizeof(PixType));
break;
case BS_4x2:
{
Ipp32s d0 = pCur[0] - pRef[0];
Ipp32s d1 = pCur[1] - pRef[1];
Ipp32s d2 = pCur[pitchPixelsCur+0] - pRef[pitchPixelsRef+0];
Ipp32s d3 = pCur[pitchPixelsCur+1] - pRef[pitchPixelsRef+1];
Ipp32s d4 = pCur[2] - pRef[2];
Ipp32s d5 = pCur[3] - pRef[3];
Ipp32s d6 = pCur[pitchPixelsCur+2] - pRef[pitchPixelsRef+2];
Ipp32s d7 = pCur[pitchPixelsCur+3] - pRef[pitchPixelsRef+3];
sad = ABS(d0) + ABS(d1) + ABS(d2) + ABS(d3) + ABS(d4) + ABS(d5) + ABS(d6) + ABS(d7);
break;
}
case BS_2x4:
{
Ipp32s d0 = pCur[0] - pRef[0];
Ipp32s d1 = pCur[1] - pRef[1];
Ipp32s d2 = pCur[pitchPixelsCur+0] - pRef[pitchPixelsRef+0];
Ipp32s d3 = pCur[pitchPixelsCur+1] - pRef[pitchPixelsRef+1];
pCur += pitchPixelsCur * 2;
pRef += pitchPixelsRef * 2;
Ipp32s d4 = pCur[0] - pRef[0];
Ipp32s d5 = pCur[1] - pRef[1];
Ipp32s d6 = pCur[pitchPixelsCur+0] - pRef[pitchPixelsRef+0];
Ipp32s d7 = pCur[pitchPixelsCur+1] - pRef[pitchPixelsRef+1];
sad = ABS(d0) + ABS(d1) + ABS(d2) + ABS(d3) + ABS(d4) + ABS(d5) + ABS(d6) + ABS(d7);
break;
}
case BS_2x2:
{
Ipp32s d0 = pCur[0] - pRef[0];
Ipp32s d1 = pCur[1] - pRef[1];
Ipp32s d2 = pCur[pitchPixelsCur+0] - pRef[pitchPixelsRef+0];
Ipp32s d3 = pCur[pitchPixelsCur+1] - pRef[pitchPixelsRef+1];
sad = ABS(d0) + ABS(d1) + ABS(d2) + ABS(d3);
break;
}
default:
sad = 0;
break;
}
return sad;
}
template <class PixType> Ipp32s SATD(PixType *pCur, Ipp32s pitchPixelsCur, PixType *pRef, Ipp32s pitchPixelsRef, Ipp32s blockSize)
{
Ipp32s sad;
switch (blockSize) {
case BS_16x16:
sad = SATD16x16(pCur, pitchPixelsCur*sizeof(PixType), pRef, pitchPixelsRef*sizeof(PixType));
break;
case BS_16x8:
sad = SATD16x8(pCur, pitchPixelsCur*sizeof(PixType), pRef, pitchPixelsRef*sizeof(PixType));
break;
case BS_8x16:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -