📄 umc_h264_me.cpp
字号:
////////////////////////////////////////////////////////////////////////////////
//
// INTEL CORPORATION PROPRIETARY INFORMATION
// This software is supplied under the terms of a license agreement or
// nondisclosure agreement with Intel Corporation and may not be copied
// or disclosed except in accordance with the terms of that agreement.
// Copyright (c) 2004 - 2007 Intel Corporation. All Rights Reserved.
//
#include <string.h>
#include <limits.h>
#include "umc_h264_tables.h"
#include "umc_h264_to_ipp.h"
#include "umc_h264_bme.h"
#include "umc_h264_video_encoder.h"
template<typename T>
inline void fill_n(T *first, size_t count, const T& val)
{
for (; 0 < count; --count, ++first)
*first = val;
}
#ifndef NO_EMPTY_THRESH
// TODO add ref constraints ??
#define EMPTY_THRESH(me_info, mask, factor)\
if (me_info.sad -\
MVConstraint(me_info.mv.mvx - me_info.predicted_mv.mvx,\
me_info.mv.mvy - me_info.predicted_mv.mvy,\
pRDQM) < (m_EmptyThreshold[iQP] >> (factor)) )\
{\
cur_mb.LocalMacroblockInfo->cbp_luma &= (mask);\
}
#define BI_EMPTY_THRESH(me_info, futr_me_info, mask, factor)\
if (me_info.sad -\
MVConstraint(me_info.mv.mvx - me_info.predicted_mv.mvx,\
me_info.mv.mvy - me_info.predicted_mv.mvy,\
pRDQM) - \
MVConstraint(futr_me_info.mv.mvx - futr_me_info.predicted_mv.mvx,\
futr_me_info.mv.mvy - futr_me_info.predicted_mv.mvy,\
pRDQM)\
< (m_EmptyThreshold[iQP] >> (factor)) )\
{\
cur_mb.LocalMacroblockInfo->cbp_luma &= (mask);\
}
#else
#define EMPTY_THRESH(me_info, mask, factor)
#define BI_EMPTY_THRESH(me_info, futr_me_info, mask, factor)
#endif
//#define TRACE_MV_PRED
//#define NO_EMPTY_THRESH
#define B_EARLY_EXIT
#define P_EARLY_EXIT
#define BESTOF5_EARLY_EXIT // skip rest of search if current best is good enough
//#define PRINT_MVS
//#define BFRAME_FORCE_FUTURE_REFERENCE // for B frames
//#define BFRAME_FORCE_PREVIOUS_REFERENCE // for B frames
//#define BFRAME_NO_DIRECT_MODE // for B frames
//#define BFRAME_NO_BIPRED_MODE // for B frames
//#define BFRAME_NO_BIPRED_MODE_SUBDIV
//#define BFRAME_PRINT_MVS
#define RANGECHECK(x, low, high) (((x) >= (low)) && ((x) <= (high)))
// Macro to verify that vectors will not exceed the size of an Ipp8s after
// conversion from integer to subpel. Theoretically this is possible even though
// the signature search only searches +/- 32 around 0 (thus a max vector of
// +/- 96 plus the subpel search of =/- 2), because the initial integer search
// positions include predictor vectors, which could already be at a +/- 32 point.
// Then the integer search goes a little further, the new larger vector is a
// future predictor, so the future search go could even a little further, ...
// The code currently converts the vectors to Ipp8s from Ipp32s without clipping on
// the assumption that the above scenario in practice will not occur. The
// macros are used in VM_ASSERTs to verify this assumption. Clipping may need
// to be added if the assumption proves false.
// 01/28/00 update: The asserts have been hit a few times, indicating vectors
// are sometimes exceeding Ipp8s size, so clipping has been added when calculating
// the search ranges (left, right, up, down).
#define VINTRANGE(v) RANGECHECK((v), -MAX_MV_INT-1, MAX_MV_INT)
#define TRUNCATE_LO(val, lim) \
{ \
Ipp32s (tmp) = (lim); \
if ((tmp) < (val)) \
(val) = (Ipp16s) (tmp); \
}
#define TRUNCATE_HI(val, lim) \
{ \
Ipp32s (tmp) = (lim); \
if ((tmp) > (val)) \
(val) = (Ipp16s) (tmp); \
}
namespace UMC_H264_ENCODER
{
inline void SAD16x16Blocks4x4(const Ipp8u* pSource0, Ipp32s pitchBytes0, const Ipp8u* pSource1, Ipp32s pitchBytes1, Ipp16u* tempSADs)
{
ippiSAD16x16Blocks4x4_8u16u(pSource0, pitchBytes0, pSource1, pitchBytes1, tempSADs, 0);
}
#if defined BITDEPTH_9_12
inline void SAD16x16Blocks4x4(const Ipp16u* pSource0, Ipp32s pitchBytes0, const Ipp16u* pSource1, Ipp32s pitchBytes1, Ipp32u* tempSADs)
{
ippiSAD16x16Blocks4x4_16u32u_C1R(pSource0, pitchBytes0, pSource1, pitchBytes1, tempSADs, 0);
}
#endif // BITDEPTH_9_12
static const Ipp8s BlockList4x4[25][17] = {
{0, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 8 4x8 Blocks - 16
{1, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{4, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{5, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{8, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{9, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{12, 14, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{13, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{0, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 8 8x4 Blocks - 24
{2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{4, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{8, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{10, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{12, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 4 8x8 Blocks - 32
{4, 5, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{8, 9, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{0, 1, 2, 3, 8, 9, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 2 8x16 Blocks - 36
{4, 5, 6, 7, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 2 16x8 Blocks - 38
{8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -1} // 1 16x16 Block - 40
};
////////////////////////////////////////////////////////////////////////////////
//
// SAD16x16Block_Sb16
//
// Returns the 16 partial sums of the absolute differences of two 16x16 blocks.
// Both blocks have the same pitch.
//
////////////////////////////////////////////////////////////////////////////////
template <class PixType, class SADType>
Ipp32u SAD16x16Block_Sb16(
const PixType *pSource0,
Ipp32s pitchBytes0,
const PixType *pSource1,
Ipp32s pitchBytes1,
SADType *ThisSAD)
{
Ipp32u block, tempBlock;
Ipp32s tempSAD;
SADType tempSADs[16];
const Ipp8s *pBlockList;
SAD16x16Blocks4x4(pSource0, pitchBytes0, pSource1, pitchBytes0, tempSADs);
for (block = 0; block < 16; block ++) {
tempBlock = block_subblock_mapping[block];
ThisSAD[tempBlock] = tempSADs[block];
}
// Accumulate SAD and MVs for all other block sizes
for (block = 16; block < 41; ++block) {
pBlockList = BlockList4x4[block-16];
tempSAD = 0;
while (*pBlockList >= 0)
tempSAD += ThisSAD[*pBlockList++];
ThisSAD[block] = (Ipp16u)tempSAD;
}
return 0;
}
inline void SAD16x16Blocks8x8(const Ipp8u *pSource0, Ipp32s pitchBytes0, const Ipp8u *pSource1, Ipp32s pitchBytes1, Ipp32u *pResults)
{
Ipp16u sads[4];
ippiSAD16x16Blocks8x8_8u16u(pSource0, pitchBytes0, pSource1, pitchBytes1, sads, 0);
pResults[0] = sads[0];
pResults[1] = sads[1];
pResults[2] = sads[2];
pResults[3] = sads[3];
}
#if defined BITDEPTH_9_12
inline void SAD16x16Blocks8x8(const Ipp16u *pSource0, Ipp32s pitchBytes0, const Ipp16u *pSource1, Ipp32s pitchBytes1, Ipp32u *pResults)
{
ippiSAD16x16Blocks8x8_16u32u_C1R(pSource0, pitchBytes0, pSource1, pitchBytes1, pResults, 0);
}
#endif // BITDEPTH_9_12
#define SAD16x16Block_P16(pSource0, pSource1, pitchBytes0) \
SAD16x16(pSource0, pitchBytes0, pSource1, 16 * sizeof(PixType))
#define SAD8x8Block_P16(pSource0, pSource1, pitchBytes0) \
SAD8x8(pSource0, pitchBytes0, pSource1, 16 * sizeof(PixType))
#define SAD4x4Block_P16(pSource0, pSource1, pitchBytes0) \
SAD4x4(pSource0, pitchBytes0, pSource1, 16 * sizeof(PixType))
inline void SADComp41(Ipp16u *ThisSAD, T_RefIdx this_ref, H264MotionVector *this_pred_MV, ME_Info me_info[41], Ipp32s xvec, Ipp32s yvec, Ipp32s start, Ipp32s end)
{
Ipp32u tempSAD;
for (Ipp32s block = start; block < end; ++block) {
tempSAD = (Ipp32u)ThisSAD[block];
if (tempSAD < me_info[block].sad) {
me_info[block].sad = tempSAD;
me_info[block].mv.mvx = (Ipp16s)xvec*SubPelFactor;
me_info[block].mv.mvy = (Ipp16s)yvec*SubPelFactor;
me_info[block].ref_idx = this_ref;
me_info[block].predicted_mv = this_pred_MV[block];
}
}
}
inline void SADComp41(Ipp32u *ThisSAD, T_RefIdx this_ref, H264MotionVector *this_pred_MV, ME_Info me_info[41], Ipp32s xvec, Ipp32s yvec, Ipp32s start, Ipp32s end)
{
Ipp32u tempSAD;
for (Ipp32s block = start; block < end; ++block) {
tempSAD = (Ipp32u)ThisSAD[block];
if (tempSAD < me_info[block].sad) {
me_info[block].sad = tempSAD;
me_info[block].mv.mvx = (Ipp16s)xvec*SubPelFactor;
me_info[block].mv.mvy = (Ipp16s)yvec*SubPelFactor;
me_info[block].ref_idx = this_ref;
me_info[block].predicted_mv = this_pred_MV[block];
}
}
}
inline void Move_ME_Info(T_RefIdx this_ref, ME_Info temp_me_info[41], ME_Info me_info[41], Ipp32s start, Ipp32s end)
{
for (Ipp32s block = start; block < end; ++block) {
me_info[block].sad = temp_me_info[block].sad;
me_info[block].mv = temp_me_info[block].mv;
me_info[block].ref_idx = this_ref;
me_info[block].predicted_mv = temp_me_info[block].predicted_mv;
}
}
inline void MemCopy(Ipp8u *pDst, const Ipp8u *pSrc, const Ipp32s Height, const Ipp32s Width)
{
IppiSize s;
s.height=Height;
s.width=Width;
ippiCopy_8u_C1R(pSrc,16,pDst,16,s);
}
#if defined BITDEPTH_9_12
inline void MemCopy(Ipp16u *pDst, const Ipp16u *pSrc, const Ipp32s Height, const Ipp32s Width)
{
IppiSize s;
s.height=Height;
s.width=Width;
ippiCopy_16s_C1R((Ipp16s*)pSrc,16*sizeof(Ipp16s),(Ipp16s*)pDst,16*sizeof(Ipp16s),s);
}
#endif // BITDEPTH_9_12
#define TR_SHIFT 8
//MVSub2FullPels
void inline
MVSub2FullPels(const Ipp32s mv, Ipp32s& mv_intg, Ipp32s& mv_frac)
{
mv_intg = mv / SubPelFactor;
mv_frac = mv - mv_intg * SubPelFactor;
if (mv_frac < 0)
{
mv_intg -= 1;
mv_frac += SubPelFactor;
}
}
enum BlockBestMode_8x8{
MODE_8x8,
MODE_8x4,
MODE_4x8,
MODE_4x4,
#ifndef BFRAME_NO_DIRECT_MODE
MODE_DIRECT_8x8,
#endif
};
////////////////////////////////////////////////////////////////////////////////
// MVConstraint
////////////////////////////////////////////////////////////////////////////////
#define MVADJUST(ptr, pitchPixels, x, y) ((ptr) + Ipp32s((pitchPixels)*(y) + (x)))
#define ALLOK 0
#define COPY_FROM_TOP 1
#define COPY_FROM_BOTTOM 2
#define PREDICTION_FROM_TOP 1
#define PREDICTION_FROM_BOTTOM 2
inline Ipp8s SelectPredictionMethod(Ipp32s MBYoffset, Ipp32s mvy, Ipp32s sbheight, Ipp32s height)
{
Ipp32s padded_y = (mvy&3) > 0 ? 3 : 0;
mvy >>= 2;
if (mvy - padded_y + MBYoffset < 0)
{
return PREDICTION_FROM_TOP;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -