📄 umc_me.cpp
字号:
IntraCost = 0;
InterCost = 0;
//for interpolation
const Ipp8u *pSrc, *pRefF, *pRefB;
pSrc = m_pSrcFrameY+16*m_CurMB.y*m_SrcStep+16*m_CurMB.x;
pRefF = m_MePar->pRefFY[0]+16*m_CurMB.y*m_RefStep+16*m_CurMB.x;
//1 choose best Inter mode
if(m_MePar->SearchDirection == forward_search){
//FORWARD
//interpolate
// TODO: skip for integer pixel and for fast intra decision?
Interpolate(pRefF,m_RefStep,m_16x16buf,16, m_BestMV[0]);
//chose best cost
m_ResMB[m_adr].MbType = MbFrw;
m_ResMB[m_adr].MbCosts[0] = m_BestCost[0];
m_ResMB[m_adr].MVs[0] = m_BestMV[0];
}else if(m_MePar->SearchDirection == bidir_search){
//BIDIR
//interpolate
// TODO: skip for integer pixel?
pRefB = m_MePar->pRefBY[0]+16*m_CurMB.y*m_RefStep+16*m_CurMB.x;
Interpolate(pRefF,m_RefStep,m_16x16buf,16, m_BestMV[0]);
Interpolate(pRefB,m_RefStep,m_16x16bufB,16, m_BestBMV[0][0]);
ippiAverage16x16_8u_C1R(m_16x16buf, 16, m_16x16bufB, 16, m_bufAvrg, 16);
//calculate bidir SAD to compare with frw, bkw SADs
MeCost cost;
//GetCost<Mb16x16,IntegerPixIn,SAD>(pSrc, m_SrcStep, m_bufAvrg, 16, &cost, 0);
CalcCost<Mb16x16,IntegerPixIn>(pSrc, m_SrcStep, m_bufAvrg, 16, &cost,0,m_CostMetrics);
BidirCost = cost.sum+WeightMV(m_BestMV[0])+WeightMV(m_BestBMV[0][0])+32; // TODO: replace 32 by something meaningful.
//chose best cost
Ipp32s MinCost = IPP_MIN(m_BestCost[0], IPP_MIN(m_BestBCost[0][0], BidirCost));
if(MinCost == m_BestCost[0]){
m_ResMB[m_adr].MbType = MbFrw;
m_ResMB[m_adr].MbCosts[0] = m_BestCost[0];
m_ResMB[m_adr].MVs[0] = m_BestMV[0];
}else if(MinCost == m_BestBCost[0][0]){
m_ResMB[m_adr].MbType = MbBkw;
m_ResMB[m_adr].MbCosts[0] = m_BestBCost[0][0];
m_ResMB[m_adr].MVs[0] = m_BestBMV[0][0];
}else if(MinCost == BidirCost){
m_ResMB[m_adr].MbType = MbBidir;
m_ResMB[m_adr].MbCosts[0] = BidirCost;
m_ResMB[m_adr].MVs[0] = m_BestMV[0];
m_ResMB[m_adr].MVs[1] = m_BestBMV[0][0];;
}else assert(0);
} else assert(0);
//1 choose Intra mode
MeInnerParams* inPars = m_MePar->GetMeInnerParams();
if(inPars->UseFastIntraInterDecision == 1){
//fast intra/inter decision
InterCost = m_ResMB[m_adr].MbCosts[0];
ippiMeanAbsDev16x16_8u32s_C1R(pSrc, m_SrcStep, &IntraCost);
IntraCost += 256; // TODO: should be qp related!
}
else{
//accurate decision, calculate difference
if(m_ResMB[m_adr].MbType == MbFrw){
ippiSub16x16_8u16s_C1R(pSrc, m_SrcStep, m_16x16buf, 16, bufDiff, 32);
}else if(m_ResMB[m_adr].MbType == MbBkw){
ippiSub16x16_8u16s_C1R(pSrc, m_SrcStep, m_16x16bufB, 16, bufDiff, 32);
}else if(m_ResMB[m_adr].MbType == MbBidir){
ippiSub16x16_8u16s_C1R(pSrc, m_SrcStep, m_bufAvrg, 16, bufDiff, 32);
}else assert(0);
//calculate Intra cost
for(Ipp32s i=0; i<4; i++){
HadamardFwd<Ipp8u,8>(pSrc+8*(i/2)*m_SrcStep+8*(i&1), m_SrcStep, bufHDMR);
for(Ipp32s j=(i==0?0:1)/*DC predicted!*/; j<64; j++) // TODO: if one of the neighbor is I then DC is predicted, if not first block DC should be transmited
IntraCost+=abs((Ipp32s)bufHDMR[j]);
HadamardFwd<Ipp16s,8>(bufDiff+16*8*(i/2)+8*(i&1), 32, bufHDMR);
for(Ipp32s j=0; j<64; j++)
InterCost+=abs((Ipp32s)bufHDMR[j]);
}
}
if(InterCost>IntraCost)
{
m_ResMB[m_adr].MbType = MbIntra;
memset(m_ResMB[m_adr].MVs,0,m_NumOfMVs * sizeof(MeMV)); //to simplify prediction calculation
}
}
template<MeMbPart mt, MeInPixType pix> void MeBase::EstimatePoint(MeMV mv)
{
if( mv.x<-m_SearchRangeX || mv.x>=m_SearchRangeX || mv.y<-m_SearchRangeY || mv.y>=m_SearchRangeY)
return;
Ipp32s MbWidth, MvShift, XboardSub, YboardSub;
Ipp32s LeftLimit, TopLimit, RightLimit, BottomLimit;
switch(pix)
{
case QuadPixIn:
MbWidth = 4;
MvShift = 4;
LeftLimit = m_PicRange.top_left.x >> 2;
TopLimit = m_PicRange.top_left.y >> 2;
RightLimit = m_PicRange.bottom_right.x >> 2;
BottomLimit = m_PicRange.bottom_right.y >> 2;
break;
case IntegerPixIn:
MbWidth = 16;
MvShift = 2;
LeftLimit = m_PicRange.top_left.x;
TopLimit = m_PicRange.top_left.y;
RightLimit = m_PicRange.bottom_right.x;
BottomLimit = m_PicRange.bottom_right.y;
break;
case HalfPixIn:
MbWidth = 16;
MvShift = 2;
((mv.x & 2) != 0) ? XboardSub = 1 : XboardSub = 0;
((mv.y & 2) != 0) ? YboardSub = 1 : YboardSub = 0;
LeftLimit = m_PicRange.top_left.x;
TopLimit = m_PicRange.top_left.y;
RightLimit = m_PicRange.bottom_right.x - XboardSub;
BottomLimit = m_PicRange.bottom_right.y - YboardSub;
break;
case QuarterPixIn:
MbWidth = 16;
MvShift = 2;
((mv.x & 3) != 0) ? XboardSub = 1 : XboardSub = 0;
((mv.y & 3) != 0) ? YboardSub = 1 : YboardSub = 0;
LeftLimit = m_PicRange.top_left.x;
TopLimit = m_PicRange.top_left.y;
RightLimit = m_PicRange.bottom_right.x - XboardSub;
BottomLimit = m_PicRange.bottom_right.y - YboardSub;
break;
}
Ipp32s x = MbWidth*m_CurMB.x + (mv.x>>MvShift);
Ipp32s y = MbWidth*m_CurMB.y + (mv.y>>MvShift);
if(x < LeftLimit) x = LeftLimit;
else if(x > RightLimit - MbWidth) x = RightLimit - MbWidth;
if(y < TopLimit) y = TopLimit;
else if(y > BottomLimit - MbWidth) y = BottomLimit - MbWidth;
MeCost cost;
const Ipp8u *pSrc=m_pSrcFrameY+16*m_CurMB.y*m_SrcStep+16*m_CurMB.x;
const Ipp8u *pRef=m_pRefFrameY+y*m_RefStep+x;
Ipp32s srcStep=m_SrcStep;
Ipp32s refStep=m_RefStep;
Ipp32s mc_type=0;
switch(pix){
case QuadPixIn:
srcStep=m_SrcStepDwn4/4, refStep=m_RefStepDwn4/4;
pSrc = m_pSrcFrameYDwn4+4*m_CurMB.y*m_SrcStepDwn4/4+4*m_CurMB.x;
pRef= m_pRefFrameYDwn4+(y+(m_Padding >> 2))*m_RefStepDwn4/4+(x+(m_Padding >> 2));
break;
case IntegerPixIn:
if(mt == Mb8x8){
Ipp32s dx = 8*((m_BlockIdx-1)&1);
Ipp32s dy = 8*((m_BlockIdx-1)/2);
pSrc += m_SrcStep*dy+dx;
pRef += m_RefStep*dy+dx;
}
break;
case HalfPixIn:
// TODO: interpolate for SATD here
assert(m_MePar->Interpolation == Bilinear);
mc_type = ((x & 0x2)<< 2) | ((y & 0x2)<< 1);
break;
case QuarterPixIn:
Interpolate(pRef,refStep,m_16x16buf,16, MeMV(3&mv.x,3&mv.y));
pRef=m_16x16buf;
refStep=16;
break;
}
//GetCost<mt,pix,SAD>(pSrc, srcStep, pRef, refStep, &cost, mc_type);
CalcCost<mt,pix>(pSrc, srcStep, pRef, refStep, &cost,mc_type,m_CostMetrics);
Ipp32s sum = cost.sum;
switch(mt){
case Mb4x8x8:
sum = 0;
for(Ipp32s i=0; i<4; i++){
sum+=cost.sum8x8[i];
if((Ipp32s)cost.sum8x8[i]<m_BestCost[i+1]){
m_BestCost[i+1]=cost.sum8x8[i];
m_BestMV[i+1] = mv;
}
}
//no break to compute 16x16 SAD
assert(m_BlockIdx == 0);
case Mb16x16:
case Mb8x8:
sum += WeightMV(mv);
if(sum<m_BestCost[m_BlockIdx]){
m_BestCost[m_BlockIdx] = sum;
m_BestMV[m_BlockIdx] = mv;
}
break;
default:
assert(false);
}
}
template<MeMbPart mt, MeInPixType pix> void MeBase::FullSearch(Ipp32s RangeX, Ipp32s RangeY)
{
Ipp32s Step;
if(pix == QuadPixIn) Step = 16;
if(pix == IntegerPixIn) Step = 4;
if(pix == HalfPixIn) Step = 2;
if(pix == QuarterPixIn) Step = 1;
MeMV CenterMV = m_BestMV[m_BlockIdx];
for(int y = -RangeY; y<RangeY; y+=Step ){
for(int x = -RangeX; x<RangeX; x+=Step ){
EstimatePoint<mt, pix>(CenterMV+MeMV(x,y));
}
}
}
template<MeMbPart mt, MeInPixType pix, MeDiamondType dm> void MeBase::DiamondSearch()
{
const Ipp32s BigDiamondTable[][2] = { {-3,0}, {-2,2}, {0,3}, {2,2}, {3,0}, {2,-2}, {0,-3}, {-2,-2} };
const Ipp32s MediumDiamondTable[][2] = { {-2,0}, {-1,1}, {0,2}, {1,1}, {2,0}, {1,-1}, {0,-2}, {-1,-1} };
const Ipp32s SmallDiamondTable[][2] = {{-1,0}, {0,1}, {1,0}, {0,-1}};
const Ipp32s* Table;
Ipp32s TableSize;
Ipp32s Step;
if(dm == Small){
Table = (const Ipp32s*)SmallDiamondTable;
TableSize = sizeof(SmallDiamondTable)/(2*sizeof(Ipp32s));
}
if(dm == Medium){
Table = (const Ipp32s*)MediumDiamondTable;
TableSize = sizeof(MediumDiamondTable)/(2*sizeof(Ipp32s));
}
if(dm == Big){
Table = (const Ipp32s*)BigDiamondTable;
TableSize = sizeof(BigDiamondTable)/(2*sizeof(Ipp32s));
}
if(pix == QuadPixIn) Step = 16;
if(pix == IntegerPixIn) Step = 4;
if(pix == HalfPixIn) Step = 2;
if(pix == QuarterPixIn) Step = 1;
MeMV CenterMV = m_BestMV[m_BlockIdx];
for(Ipp32s i=0; i<TableSize; i++){
EstimatePoint<mt, pix>(CenterMV + MeMV(Step*Table[2*i + 0], Step*Table[2*i + 1]));
}
}
template<MeMbPart mt, MeInPixType pix> bool MeBase::RefineSearch()
{
if(mt != Mb8x8 && mt != Mb16x16) assert(false);
switch(pix){
case QuadPixIn: //this means refinement after quard search, pixel type is integer here!
EstimatePoint<mt, IntegerPixIn>(m_BestMV[m_BlockIdx]);
//FullSearch<mt, IntegerPixel>(16,16); //16 = 4 pixel
DiamondSearch<mt, IntegerPixIn, Big>();
DiamondSearch<mt, IntegerPixIn, Medium>();
DiamondSearch<mt, IntegerPixIn, Small>();
return true;
case IntegerPixIn: //this means refinement after prediction.
for(int j=0; j<2; j++){ //maximum number of big steps that are allowed
MeMV CenterMV = m_BestMV[m_BlockIdx];
DiamondSearch<mt, IntegerPixIn, Medium>();
//check if best MV in center
if(CenterMV == m_BestMV[m_BlockIdx]){
//refine by small diamond
DiamondSearch<mt, IntegerPixIn, Small>();
return true;
}
}
return false;
case QuarterPixIn:
//FullSearch<mt, QuarterPixel>(4,4); // 4 = 1 pixel
DiamondSearch<mt, QuarterPixIn, Big>();
DiamondSearch<mt, QuarterPixIn, Medium>();
DiamondSearch<mt, QuarterPixIn, Small>();
return true;
case HalfPixIn: // TODO: rewrite this! Big diamond is excessive here!
DiamondSearch<mt, HalfPixIn, Big>();
DiamondSearch<mt, HalfPixIn, Medium>();
DiamondSearch<mt, HalfPixIn, Small>();
return true;
default:
assert(false);
}
return false;
}
}
#endif // defined (UMC_ENABLE_VC1_VIDEO_ENCODER)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -