📄 umc_me.cpp
字号:
/* /////////////////////////////////////////////////////////////////////////////
//
// INTEL CORPORATION PROPRIETARY INFORMATION
// This software is supplied under the terms of a license agreement or
// nondisclosure agreement with Intel Corporation and may not be copied
// or disclosed except in accordance with the terms of that agreement.
// Copyright(c) 2007 Intel Corporation. All Rights Reserved.
//
//
// VC-1 (VC1) encoder, motion estimation
//
*/
#include "umc_defs.h"
#if defined (UMC_ENABLE_VC1_VIDEO_ENCODER)
#include <memory.h>
#include "umc_me.h"
#include "me_sadt.h"
#include "assert.h"
#include "ippcv.h"
//*******************************************************************************************************
namespace UMC
{
//future IPP functions
IppStatus ippiSAD8x8_8u32s(const Ipp8u* pSrc, Ipp32s srcStep, const Ipp8u* pRef, Ipp32s refStep, Ipp32s* pSAD, Ipp32s mcType)
{
Ipp32s sum;
*pSAD = 0;
for(int i=0; i<8; i+=4)
for(int j=0; j<8; j+=4){
ippiSAD4x4_8u32s(pSrc+i*srcStep+j, srcStep, pRef+i*refStep+j, refStep, &sum, mcType);
*pSAD += sum;
}
return ippStsNoErr;
}
IppStatus ippiSAD4x4Blocks2x2_8u16u(const Ipp8u* pSrc, Ipp32s srcStep, const Ipp8u* pRef, Ipp32s refStep, Ipp16u* pDstSAD, Ipp32s mcType )
{
int i;
for(i=0; i<4; i++) pDstSAD[i]=0;
for(i=0; i<4; i++)
for(int j=0; j<4; j++)
pDstSAD[(2&i)+(2&j)/2] += (Ipp16u)abs( (int)*(pSrc+i*srcStep+j) - (int)*(pRef+i*refStep+j));
return ippStsNoErr;
}
template<typename T, Ipp32s size> void HadamardFwd(const T* pSrc, Ipp32s srcStep, Ipp16s* pDst)
{
Ipp32s Hadamard8x8[64] =
{ 1, 1, 1, 1, 1, 1, 1, 1,
1, -1, 1, -1, 1, -1, 1, -1,
1, 1,-1, -1, 1, 1, -1,-1,
1, -1,-1, 1, 1, -1,-1, 1,
1, 1, 1, 1, -1, -1, -1, -1,
1, -1, 1, -1, -1, 1, -1, 1,
1, 1, -1, -1, -1, -1, 1, 1,
1, -1, -1, 1, -1, 1, 1, -1};
Ipp32s Hadamard4x4[16] =
{1, 1, 1, 1,
1, -1, 1,-1,
1, 1, -1,-1,
1, -1,-1, 1};
Ipp32s *HadamardTbl;
Ipp32s tmp[size*size];
if(size == 4) HadamardTbl = Hadamard4x4;
else if(size == 8) HadamardTbl = Hadamard8x8;
else assert(0);
//D*T
for(int i=0; i<size; i++)
for(int j=0; j<size; j++){
tmp[size*i + j]=0;
for(int ii=0; ii<size; ii++){
tmp[size*i + j]+= ((Ipp32s)*(pSrc + i*(srcStep/sizeof(T)) + ii)) * HadamardTbl[size*ii + j];
}
}
//T'*(D*T)
for(int i=0; i<size; i++)
for(int j=0; j<size; j++){
pDst[size*i + j]=0;
for(int ii=0; ii<size; ii++){
pDst[size*i + j] += HadamardTbl[ii*size + i] * tmp[size*ii + j];
}
}
}
bool MeParams::SetSpeedQualityParams()
{
//parse Speed parameter
if(inPars.SearchSpeed<-1 || inPars.SearchSpeed>127) return false;
if(inPars.SearchSpeed == 127){
inPars.UseMvPrediction = 1;
inPars.UseLowThresholdForMvPrediction = 0;
inPars.UseDownSampledImageForSearch = 1;
inPars.UseFastIntraInterDecision = 1;
}else if(inPars.SearchSpeed >63 ){ //best speed/quality ratio
inPars.UseMvPrediction = 1;
inPars.UseLowThresholdForMvPrediction = 1;
inPars.UseDownSampledImageForSearch = 1;
inPars.UseFastIntraInterDecision = 0;
}else if(inPars.SearchSpeed >31 ){
inPars.UseMvPrediction = 0;
inPars.UseLowThresholdForMvPrediction = 0;
inPars.UseDownSampledImageForSearch = 1;
inPars.UseFastIntraInterDecision = 0;
}else if(inPars.SearchSpeed == 0){
inPars.UseMvPrediction = 0;
inPars.UseLowThresholdForMvPrediction = 0;
inPars.UseDownSampledImageForSearch = 0;
inPars.UseFastIntraInterDecision = 0;
}
return true;
}
MeBase::MeBase()
{
m_pSrcFrameY = NULL;
m_pRefFrameY = NULL;
m_pSrcFrameYDwn4 = NULL;
m_pRefFrameYDwn4 = NULL;
m_pRefFrameFYDwn4 = NULL;
m_pRefFrameBYDwn4 = NULL;
m_16x16buf = NULL;
m_16x16bufB = NULL;
m_bufAvrg = NULL;
m_ResMB = NULL;
m_BestCost = NULL;
m_BestMV = NULL;
m_BestBCost = NULL;
m_BestBMV = NULL;
m_MVDirectFW = NULL;
m_MVDirectBW = NULL;
MaxBlock = 5;
MaxRefFrame = 1;
m_stream_type = VC1_VIDEO;
}
MeBase::~MeBase()
{
Close();
};
bool MeBase::Init(MeInitParams *par)
{
Ipp32s i;
m_MeParInit = par;
if(!CheckParams(true))
return false;
// m_stream_type = par->m_stream_type;
//allocate memory
m_16x16buf=(Ipp8u*)malloc(256);
m_16x16bufB=(Ipp8u*)malloc(256);
m_bufAvrg=(Ipp8u*)malloc(256);
m_BestCost = (Ipp32s*)malloc(MaxBlock * sizeof(Ipp32s));
if(m_BestCost == NULL)return false;
m_BestMV = (MeMV*)malloc(MaxBlock * sizeof(MeMV));
if(m_BestMV == NULL)return false;
m_BestBCost = (Ipp32s**)malloc(MaxRefFrame * sizeof(Ipp32s*));
if(m_BestBCost == NULL)return false;
m_BestBMV = (MeMV**)malloc(MaxRefFrame * sizeof(MeMV*));
if(m_BestBMV == NULL)return false;
for(i = 0; i < MaxRefFrame; i++)
{
m_BestBCost[i] = (Ipp32s*)malloc(MaxBlock * sizeof(Ipp32s));
if(m_BestBCost[i] == NULL)return false;
m_BestBMV[i] = (MeMV*)malloc(MaxBlock * sizeof(MeMV));
if(m_BestBMV[i] == NULL)return false;
}
Ipp32s NumOfMBs = m_WidthMB*m_HeightMB;
if(m_MeParInit->SearchDirection == bidir_search)
{
Ipp32s size_directMV = NumOfMBs*sizeof(MeMV);
m_MVDirectFW = (MeMV*)malloc(size_directMV);
m_MVDirectBW = (MeMV*)malloc(size_directMV);
memset(m_MVDirectFW,0,size_directMV);
memset(m_MVDirectBW,0,size_directMV);
if(m_MVDirectFW == NULL || m_MVDirectBW == NULL) return false;
}
m_ResMB = (MeMB*)malloc(NumOfMBs*(sizeof(MeMB)+m_NumOfMVs*(sizeof(MeMV)+sizeof(Ipp32s))));
if(m_16x16buf==NULL || m_16x16bufB==NULL || m_bufAvrg == NULL || m_ResMB==NULL) return false;
memset(m_ResMB, 0, NumOfMBs*(sizeof(MeMB)+m_NumOfMVs*(sizeof(MeMV)+sizeof(Ipp32s))));
for(i=0; i<NumOfMBs; i++){
m_ResMB[i].MVs = (MeMV*)((Ipp8u*)m_ResMB + NumOfMBs*sizeof(MeMB) + i*m_NumOfMVs*(sizeof(MeMV)+sizeof(Ipp32s)));
m_ResMB[i].MbCosts = (Ipp32s*)((Ipp8u*)(m_ResMB[i].MVs)+ m_NumOfMVs*sizeof(MeMV));
}
Ipp32s src_size = m_HeightMB*16*m_WidthMB*sizeof(Ipp8u);
Ipp32s ref_size = (16*m_HeightMB+2*m_Padding)*(16*m_WidthMB+2*m_Padding)*sizeof(Ipp8u)/16;
m_pSrcFrameYDwn4=(Ipp8u*)malloc(src_size);
m_pRefFrameFYDwn4=(Ipp8u*)malloc(ref_size);
m_SrcStepDwn4 = 16*m_WidthMB;
m_RefStepDwn4 = 16*m_WidthMB+2*m_Padding;
if(m_pSrcFrameYDwn4==NULL ||m_pRefFrameFYDwn4==NULL) return false;
memset(m_pSrcFrameYDwn4, 0, src_size);
memset(m_pRefFrameFYDwn4, 0, ref_size);
if(m_MeParInit->SearchDirection == bidir_search){
m_pRefFrameBYDwn4=(Ipp8u*)malloc(ref_size);
if(m_pRefFrameBYDwn4==NULL) return false;
memset(m_pRefFrameBYDwn4, 0, ref_size);
}
return true;
}
// TODO: rewrite GetxMV and GEtT2()
MeMV MeBase::GetAMV()
{
//xAB
//C.x
if(m_CurMB.y == 0 /*|| m_ResMB[m_adr-m_WidthMB].MbType == MbIntra*/)
{
//return MeMV(0x7fff);
return MeMV(0);
}
return m_ResMB[m_adr-m_WidthMB].MVs[m_predictor_index];
}
MeMV MeBase::GetAMVHibrid()
{
//xAB
//C.x
m_ResMB[m_adr].is_A_active = true;
if(m_CurMB.y == 0 || m_ResMB[m_adr-m_WidthMB].MbType == MbIntra)
{
//return MeMV(0x7fff);
m_ResMB[m_adr].is_A_active = false;
return MeMV(0);
}
return m_ResMB[m_adr-m_WidthMB].MVs[m_predictor_index];
}
MeMV MeBase::GetBMV()
{
if(m_CurMB.y == 0 || m_CurMB.x >= m_WidthMB-1
/*|| m_ResMB[m_adr-m_WidthMB+1].MbType == MbIntra*/)
//return MeMV(0x7fff);
return MeMV(0);
return m_ResMB[m_adr-m_WidthMB+1].MVs[m_predictor_index];
}
MeMV MeBase::GetCMV()
{
if(m_CurMB.x == 0 /*|| m_ResMB[m_adr-1].MbType == MbIntra*/)
{
//return MeMV(0x7fff);
return MeMV(0);
}
return m_ResMB[m_adr-1].MVs[m_predictor_index];
}
MeMV MeBase::GetCMVHibrid()
{
m_ResMB[m_adr].is_C_active = true;
if(m_CurMB.x == 0 || m_ResMB[m_adr-1].MbType == MbIntra)
{
//return MeMV(0x7fff);
m_ResMB[m_adr].is_C_active = false;
return MeMV(0);
}
return m_ResMB[m_adr-1].MVs[m_predictor_index];
}
MeMV MeBase::GetPredictorMPEG2()
{
MeMV tmp;
tmp = GetCMV();
return tmp;
}
MeMV MeBase::GetMedian()
{
MeMV tmp;
MeMV A=GetAMV();
MeMV B=GetBMV();
MeMV C=GetCMV();
tmp.x=median3(A.x, B.x, C.x);
tmp.y=median3(A.y, B.y, C.y);
return tmp;
}
MeMV MeBase::GetMedianHibrid()
{
MeMV tmp;
Ipp32u sumA, sumC;
MeMV A=GetAMVHibrid();
MeMV B=GetBMV();
MeMV C=GetCMVHibrid();
tmp.x=median3(A.x, B.x, C.x);
tmp.y=median3(A.y, B.y, C.y);
if(m_ResMB[m_adr].is_A_active && m_ResMB[m_adr].is_C_active)
{
sumA = VC1ABS(A.x - tmp.x) + VC1ABS(A.y - tmp.y);
sumC = VC1ABS(C.x - tmp.x) + VC1ABS(C.y - tmp.y);
if (sumA > 32 || sumC>32)
{
sumA < sumC ? tmp = A : tmp = C;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -