📄 umc_h264_me.cpp

📁 audio-video-codecs.rar语音编解码器
💻 CPP
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
////////////////////////////////////////////////////////////////////////////////
//
//               INTEL CORPORATION PROPRIETARY INFORMATION
//  This software is supplied under the terms of a license agreement or
//  nondisclosure agreement with Intel Corporation and may not be copied
//  or disclosed except in accordance with the terms of that agreement.
//        Copyright (c) 2004 - 2007 Intel Corporation. All Rights Reserved.
//
#include <string.h>
#include <limits.h>

#include "umc_h264_tables.h"
#include "umc_h264_to_ipp.h"
#include "umc_h264_bme.h"
#include "umc_h264_video_encoder.h"

template<typename T>
inline void fill_n(T *first, size_t count, const T& val)
{
    for (; 0 < count; --count, ++first)
        *first = val;
}


#ifndef NO_EMPTY_THRESH
// TODO add ref constraints ??
#define EMPTY_THRESH(me_info, mask, factor)\
    if (me_info.sad -\
    MVConstraint(me_info.mv.mvx - me_info.predicted_mv.mvx,\
    me_info.mv.mvy - me_info.predicted_mv.mvy,\
    pRDQM) < (m_EmptyThreshold[iQP] >> (factor)) )\
{\
    cur_mb.LocalMacroblockInfo->cbp_luma &= (mask);\
}

#define BI_EMPTY_THRESH(me_info, futr_me_info, mask, factor)\
    if (me_info.sad -\
    MVConstraint(me_info.mv.mvx - me_info.predicted_mv.mvx,\
    me_info.mv.mvy - me_info.predicted_mv.mvy,\
    pRDQM) - \
    MVConstraint(futr_me_info.mv.mvx - futr_me_info.predicted_mv.mvx,\
    futr_me_info.mv.mvy - futr_me_info.predicted_mv.mvy,\
    pRDQM)\
    < (m_EmptyThreshold[iQP] >> (factor)) )\
{\
    cur_mb.LocalMacroblockInfo->cbp_luma &= (mask);\
}
#else
#define EMPTY_THRESH(me_info, mask, factor)
#define BI_EMPTY_THRESH(me_info, futr_me_info, mask, factor)
#endif

//#define TRACE_MV_PRED
//#define NO_EMPTY_THRESH
#define B_EARLY_EXIT
#define P_EARLY_EXIT
#define BESTOF5_EARLY_EXIT      // skip rest of search if current best is good enough

//#define PRINT_MVS
//#define BFRAME_FORCE_FUTURE_REFERENCE     // for B frames
//#define BFRAME_FORCE_PREVIOUS_REFERENCE   // for B frames
//#define BFRAME_NO_DIRECT_MODE             // for B frames
//#define BFRAME_NO_BIPRED_MODE             // for B frames
//#define BFRAME_NO_BIPRED_MODE_SUBDIV
//#define BFRAME_PRINT_MVS


#define RANGECHECK(x, low, high) (((x) >= (low)) && ((x) <= (high)))
// Macro to verify that vectors will not exceed the size of an Ipp8s after
// conversion from integer to subpel. Theoretically this is possible even though
// the signature search only searches +/- 32 around 0 (thus a max vector of
// +/- 96 plus the subpel search of =/- 2), because the initial integer search
// positions include predictor vectors, which could already be at a +/- 32 point.
// Then the integer search goes a little further, the new larger vector is a
// future predictor, so the future search go could even a little further, ...
// The code currently converts the vectors to Ipp8s from Ipp32s without clipping on
// the assumption that the above scenario in practice will not occur. The
// macros are used in VM_ASSERTs to verify this assumption. Clipping may need
// to be added if the assumption proves false.
// 01/28/00 update: The asserts have been hit a few times, indicating vectors
// are sometimes exceeding Ipp8s size, so clipping has been added when calculating
// the search ranges (left, right, up, down).
#define VINTRANGE(v)    RANGECHECK((v), -MAX_MV_INT-1, MAX_MV_INT)

#define TRUNCATE_LO(val, lim) \
{ \
    Ipp32s (tmp) = (lim); \
    if ((tmp) < (val)) \
        (val) = (Ipp16s) (tmp); \
}

#define TRUNCATE_HI(val, lim) \
{ \
    Ipp32s (tmp) = (lim); \
    if ((tmp) > (val)) \
        (val) = (Ipp16s) (tmp); \
}

namespace UMC_H264_ENCODER
{

inline void SAD16x16Blocks4x4(const Ipp8u* pSource0, Ipp32s pitchBytes0, const Ipp8u* pSource1, Ipp32s pitchBytes1, Ipp16u* tempSADs)
{
    ippiSAD16x16Blocks4x4_8u16u(pSource0, pitchBytes0, pSource1, pitchBytes1, tempSADs, 0);
}

#if defined BITDEPTH_9_12
inline void SAD16x16Blocks4x4(const Ipp16u* pSource0, Ipp32s pitchBytes0, const Ipp16u* pSource1, Ipp32s pitchBytes1, Ipp32u* tempSADs)
{
    ippiSAD16x16Blocks4x4_16u32u_C1R(pSource0, pitchBytes0, pSource1, pitchBytes1, tempSADs, 0);
}
#endif // BITDEPTH_9_12


static const Ipp8s BlockList4x4[25][17] = {
                {0, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},     // 8 4x8 Blocks - 16
                {1, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
                {4, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
                {5, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
                {8, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
                {9, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
                {12, 14, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
                {13, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
                {0, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},     // 8 8x4 Blocks - 24
                {2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
                {4, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
                {6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
                {8, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
                {10, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
                {12, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
                {14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
                {0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},       // 4 8x8 Blocks - 32
                {4, 5, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
                {8, 9, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
                {12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
                {0, 1, 2, 3, 8, 9, 10, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 2 8x16 Blocks - 36
                {4, 5, 6, 7, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1},
                {0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1},   // 2 16x8 Blocks - 38
                {8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1},
                {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -1}      // 1 16x16 Block - 40
        };


////////////////////////////////////////////////////////////////////////////////
//
// SAD16x16Block_Sb16
//
// Returns the 16 partial sums of the absolute differences of two 16x16 blocks.
// Both blocks have the same pitch.
//
////////////////////////////////////////////////////////////////////////////////
template <class PixType, class SADType>
Ipp32u SAD16x16Block_Sb16(
        const PixType *pSource0,
              Ipp32s   pitchBytes0,
        const PixType *pSource1,
              Ipp32s   pitchBytes1,
              SADType *ThisSAD)
{
    Ipp32u block, tempBlock;
    Ipp32s tempSAD;
    SADType tempSADs[16];
    const Ipp8s *pBlockList;

    SAD16x16Blocks4x4(pSource0, pitchBytes0, pSource1, pitchBytes0, tempSADs);
    for (block = 0; block < 16; block ++) {
        tempBlock = block_subblock_mapping[block];
        ThisSAD[tempBlock] = tempSADs[block];
    }
    // Accumulate SAD and MVs for all other block sizes
    for (block = 16; block < 41; ++block) {
        pBlockList = BlockList4x4[block-16];
        tempSAD = 0;
        while (*pBlockList >= 0)
            tempSAD += ThisSAD[*pBlockList++];
        ThisSAD[block] = (Ipp16u)tempSAD;
    }
    return 0;
}

inline void SAD16x16Blocks8x8(const Ipp8u *pSource0, Ipp32s pitchBytes0, const Ipp8u *pSource1, Ipp32s pitchBytes1, Ipp32u *pResults)
{
    Ipp16u sads[4];
    ippiSAD16x16Blocks8x8_8u16u(pSource0, pitchBytes0, pSource1, pitchBytes1, sads, 0);
    pResults[0] = sads[0];
    pResults[1] = sads[1];
    pResults[2] = sads[2];
    pResults[3] = sads[3];
}

#if defined BITDEPTH_9_12
inline void SAD16x16Blocks8x8(const Ipp16u *pSource0, Ipp32s pitchBytes0, const Ipp16u *pSource1, Ipp32s pitchBytes1, Ipp32u *pResults)
{
    ippiSAD16x16Blocks8x8_16u32u_C1R(pSource0, pitchBytes0, pSource1, pitchBytes1, pResults, 0);
}
#endif // BITDEPTH_9_12

#define SAD16x16Block_P16(pSource0, pSource1, pitchBytes0) \
    SAD16x16(pSource0, pitchBytes0, pSource1, 16 * sizeof(PixType))

#define SAD8x8Block_P16(pSource0, pSource1, pitchBytes0) \
    SAD8x8(pSource0, pitchBytes0, pSource1, 16 * sizeof(PixType))

#define SAD4x4Block_P16(pSource0, pSource1, pitchBytes0) \
    SAD4x4(pSource0, pitchBytes0, pSource1, 16 * sizeof(PixType))

inline void SADComp41(Ipp16u *ThisSAD, T_RefIdx this_ref, H264MotionVector *this_pred_MV, ME_Info me_info[41], Ipp32s xvec, Ipp32s yvec, Ipp32s start, Ipp32s end)
{
    Ipp32u tempSAD;

    for (Ipp32s block = start; block < end; ++block) {
        tempSAD = (Ipp32u)ThisSAD[block];
        if (tempSAD < me_info[block].sad) {
            me_info[block].sad = tempSAD;
            me_info[block].mv.mvx = (Ipp16s)xvec*SubPelFactor;
            me_info[block].mv.mvy = (Ipp16s)yvec*SubPelFactor;
            me_info[block].ref_idx = this_ref;
            me_info[block].predicted_mv = this_pred_MV[block];
        }
    }
}

inline void SADComp41(Ipp32u *ThisSAD, T_RefIdx this_ref, H264MotionVector *this_pred_MV, ME_Info me_info[41], Ipp32s xvec, Ipp32s yvec, Ipp32s start, Ipp32s end)
{
    Ipp32u tempSAD;

    for (Ipp32s block = start; block < end; ++block) {
        tempSAD = (Ipp32u)ThisSAD[block];
        if (tempSAD < me_info[block].sad) {
            me_info[block].sad = tempSAD;
            me_info[block].mv.mvx = (Ipp16s)xvec*SubPelFactor;
            me_info[block].mv.mvy = (Ipp16s)yvec*SubPelFactor;
            me_info[block].ref_idx = this_ref;
            me_info[block].predicted_mv = this_pred_MV[block];
        }
    }
}

inline void Move_ME_Info(T_RefIdx this_ref, ME_Info temp_me_info[41], ME_Info me_info[41], Ipp32s start, Ipp32s end)
{
    for (Ipp32s block = start; block < end; ++block) {
        me_info[block].sad = temp_me_info[block].sad;
        me_info[block].mv = temp_me_info[block].mv;
        me_info[block].ref_idx = this_ref;
        me_info[block].predicted_mv = temp_me_info[block].predicted_mv;
    }
}

inline void MemCopy(Ipp8u *pDst, const Ipp8u *pSrc, const Ipp32s Height, const Ipp32s Width)
{
    IppiSize s;
    s.height=Height;
    s.width=Width;
    ippiCopy_8u_C1R(pSrc,16,pDst,16,s);
}

#if defined BITDEPTH_9_12
inline void MemCopy(Ipp16u *pDst, const Ipp16u *pSrc, const Ipp32s Height, const Ipp32s Width)
{
    IppiSize s;
    s.height=Height;
    s.width=Width;
    ippiCopy_16s_C1R((Ipp16s*)pSrc,16*sizeof(Ipp16s),(Ipp16s*)pDst,16*sizeof(Ipp16s),s);
}
#endif // BITDEPTH_9_12


#define TR_SHIFT 8

//MVSub2FullPels
void inline
MVSub2FullPels(const Ipp32s mv, Ipp32s& mv_intg, Ipp32s& mv_frac)
{
    mv_intg = mv / SubPelFactor;
    mv_frac = mv - mv_intg * SubPelFactor;
    if (mv_frac < 0)
    {
        mv_intg -= 1;
        mv_frac += SubPelFactor;
    }
}


enum BlockBestMode_8x8{
    MODE_8x8,
    MODE_8x4,
    MODE_4x8,
    MODE_4x4,
#ifndef BFRAME_NO_DIRECT_MODE
    MODE_DIRECT_8x8,
#endif

};

////////////////////////////////////////////////////////////////////////////////
// MVConstraint
////////////////////////////////////////////////////////////////////////////////
#define MVADJUST(ptr, pitchPixels, x, y)  ((ptr) + Ipp32s((pitchPixels)*(y) + (x)))

#define ALLOK 0
#define COPY_FROM_TOP 1
#define COPY_FROM_BOTTOM 2
#define PREDICTION_FROM_TOP 1
#define PREDICTION_FROM_BOTTOM 2

inline Ipp8s SelectPredictionMethod(Ipp32s MBYoffset, Ipp32s mvy, Ipp32s sbheight, Ipp32s height)
{
    Ipp32s padded_y = (mvy&3) > 0 ? 3 : 0;
    mvy >>= 2;

    if (mvy - padded_y + MBYoffset < 0)
    {
        return PREDICTION_FROM_TOP;
12 3 4 5 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -