📄 umc_me.cpp

📁 audio-video-codecs.rar语音编解码器
💻 CPP
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34

    IntraCost = 0;
    InterCost = 0;

    //for interpolation
    const   Ipp8u *pSrc, *pRefF, *pRefB;
    pSrc = m_pSrcFrameY+16*m_CurMB.y*m_SrcStep+16*m_CurMB.x;
    pRefF = m_MePar->pRefFY[0]+16*m_CurMB.y*m_RefStep+16*m_CurMB.x;

    //1 choose best Inter mode
    if(m_MePar->SearchDirection == forward_search){
        //FORWARD
        //interpolate
        // TODO: skip for integer pixel and for fast intra decision?
        Interpolate(pRefF,m_RefStep,m_16x16buf,16, m_BestMV[0]);

        //chose best cost
        m_ResMB[m_adr].MbType = MbFrw;
        m_ResMB[m_adr].MbCosts[0] = m_BestCost[0];
        m_ResMB[m_adr].MVs[0] = m_BestMV[0];
    }else if(m_MePar->SearchDirection == bidir_search){
        //BIDIR
        //interpolate
        // TODO: skip for integer pixel?
        pRefB = m_MePar->pRefBY[0]+16*m_CurMB.y*m_RefStep+16*m_CurMB.x;
        Interpolate(pRefF,m_RefStep,m_16x16buf,16, m_BestMV[0]);
        Interpolate(pRefB,m_RefStep,m_16x16bufB,16, m_BestBMV[0][0]);
        ippiAverage16x16_8u_C1R(m_16x16buf, 16, m_16x16bufB, 16, m_bufAvrg, 16);

        //calculate bidir SAD to compare with frw, bkw SADs
        MeCost cost;
        //GetCost<Mb16x16,IntegerPixIn,SAD>(pSrc, m_SrcStep, m_bufAvrg, 16, &cost, 0);
        CalcCost<Mb16x16,IntegerPixIn>(pSrc, m_SrcStep, m_bufAvrg, 16, &cost,0,m_CostMetrics);
        BidirCost = cost.sum+WeightMV(m_BestMV[0])+WeightMV(m_BestBMV[0][0])+32; // TODO: replace 32 by something meaningful.

        //chose best cost
        Ipp32s MinCost = IPP_MIN(m_BestCost[0], IPP_MIN(m_BestBCost[0][0], BidirCost));
        if(MinCost == m_BestCost[0]){
            m_ResMB[m_adr].MbType = MbFrw;
            m_ResMB[m_adr].MbCosts[0] = m_BestCost[0];
            m_ResMB[m_adr].MVs[0] = m_BestMV[0];
        }else if(MinCost == m_BestBCost[0][0]){
            m_ResMB[m_adr].MbType = MbBkw;
            m_ResMB[m_adr].MbCosts[0] = m_BestBCost[0][0];
            m_ResMB[m_adr].MVs[0] = m_BestBMV[0][0];
        }else if(MinCost == BidirCost){
            m_ResMB[m_adr].MbType = MbBidir;
            m_ResMB[m_adr].MbCosts[0] = BidirCost;
            m_ResMB[m_adr].MVs[0] = m_BestMV[0];
            m_ResMB[m_adr].MVs[1] = m_BestBMV[0][0];;
        }else assert(0);
    } else assert(0);


    //1 choose Intra mode
    MeInnerParams* inPars = m_MePar->GetMeInnerParams();
    if(inPars->UseFastIntraInterDecision == 1){
        //fast intra/inter decision
        InterCost = m_ResMB[m_adr].MbCosts[0];
        ippiMeanAbsDev16x16_8u32s_C1R(pSrc, m_SrcStep, &IntraCost);
        IntraCost += 256; // TODO: should be qp related!
     }
    else{
        //accurate decision, calculate difference
        if(m_ResMB[m_adr].MbType == MbFrw){
            ippiSub16x16_8u16s_C1R(pSrc, m_SrcStep, m_16x16buf, 16, bufDiff, 32);
        }else if(m_ResMB[m_adr].MbType == MbBkw){
            ippiSub16x16_8u16s_C1R(pSrc, m_SrcStep, m_16x16bufB, 16, bufDiff, 32);
        }else if(m_ResMB[m_adr].MbType == MbBidir){
            ippiSub16x16_8u16s_C1R(pSrc, m_SrcStep, m_bufAvrg, 16, bufDiff, 32);
        }else assert(0);

        //calculate Intra cost
        for(Ipp32s i=0; i<4; i++){
            HadamardFwd<Ipp8u,8>(pSrc+8*(i/2)*m_SrcStep+8*(i&1), m_SrcStep, bufHDMR);
            for(Ipp32s j=(i==0?0:1)/*DC predicted!*/; j<64; j++)   // TODO: if one of the neighbor is I then DC is predicted, if not first block DC should be transmited
                IntraCost+=abs((Ipp32s)bufHDMR[j]);

            HadamardFwd<Ipp16s,8>(bufDiff+16*8*(i/2)+8*(i&1), 32, bufHDMR);
            for(Ipp32s j=0; j<64; j++)
                InterCost+=abs((Ipp32s)bufHDMR[j]);
        }
    }

    if(InterCost>IntraCost)
    {
        m_ResMB[m_adr].MbType = MbIntra;
        memset(m_ResMB[m_adr].MVs,0,m_NumOfMVs * sizeof(MeMV)); //to simplify prediction calculation
    }

}


template<MeMbPart mt, MeInPixType pix> void MeBase::EstimatePoint(MeMV mv)
{
    if( mv.x<-m_SearchRangeX || mv.x>=m_SearchRangeX || mv.y<-m_SearchRangeY || mv.y>=m_SearchRangeY)
        return;

    Ipp32s MbWidth, MvShift, XboardSub, YboardSub;
    Ipp32s LeftLimit, TopLimit, RightLimit, BottomLimit;

    switch(pix)
    {
    case QuadPixIn:
        MbWidth = 4;
        MvShift = 4;
        LeftLimit   = m_PicRange.top_left.x >> 2;
        TopLimit    = m_PicRange.top_left.y >> 2;
        RightLimit  = m_PicRange.bottom_right.x >> 2;
        BottomLimit = m_PicRange.bottom_right.y >> 2;
        break;
    case IntegerPixIn:
        MbWidth = 16;
        MvShift = 2;
        LeftLimit   = m_PicRange.top_left.x;
        TopLimit    = m_PicRange.top_left.y;
        RightLimit  = m_PicRange.bottom_right.x;
        BottomLimit = m_PicRange.bottom_right.y;
        break;
    case HalfPixIn:
        MbWidth = 16;
        MvShift = 2;
        ((mv.x & 2) != 0) ? XboardSub = 1 : XboardSub = 0;
        ((mv.y & 2) != 0) ? YboardSub = 1 : YboardSub = 0;
        LeftLimit   = m_PicRange.top_left.x;
        TopLimit    = m_PicRange.top_left.y;
        RightLimit  = m_PicRange.bottom_right.x - XboardSub;
        BottomLimit = m_PicRange.bottom_right.y - YboardSub;
        break;
    case QuarterPixIn:
        MbWidth = 16;
        MvShift = 2;
        ((mv.x & 3) != 0) ? XboardSub = 1 : XboardSub = 0;
        ((mv.y & 3) != 0) ? YboardSub = 1 : YboardSub = 0;
        LeftLimit   = m_PicRange.top_left.x;
        TopLimit    = m_PicRange.top_left.y;
        RightLimit  = m_PicRange.bottom_right.x - XboardSub;
        BottomLimit = m_PicRange.bottom_right.y - YboardSub;
        break;
    }

    Ipp32s x = MbWidth*m_CurMB.x + (mv.x>>MvShift);
    Ipp32s y = MbWidth*m_CurMB.y + (mv.y>>MvShift);

    if(x < LeftLimit) x = LeftLimit;
    else if(x > RightLimit - MbWidth) x = RightLimit - MbWidth;
    if(y < TopLimit) y = TopLimit;
    else if(y > BottomLimit - MbWidth) y = BottomLimit - MbWidth;

    MeCost cost;
    const Ipp8u *pSrc=m_pSrcFrameY+16*m_CurMB.y*m_SrcStep+16*m_CurMB.x;
    const Ipp8u *pRef=m_pRefFrameY+y*m_RefStep+x;
    Ipp32s srcStep=m_SrcStep;
    Ipp32s refStep=m_RefStep;
    Ipp32s mc_type=0;

    switch(pix){
        case QuadPixIn:
            srcStep=m_SrcStepDwn4/4, refStep=m_RefStepDwn4/4;
            pSrc = m_pSrcFrameYDwn4+4*m_CurMB.y*m_SrcStepDwn4/4+4*m_CurMB.x;
            pRef= m_pRefFrameYDwn4+(y+(m_Padding >> 2))*m_RefStepDwn4/4+(x+(m_Padding >> 2));
            break;

        case IntegerPixIn:
            if(mt == Mb8x8){
                Ipp32s dx = 8*((m_BlockIdx-1)&1);
                Ipp32s dy = 8*((m_BlockIdx-1)/2);
                pSrc += m_SrcStep*dy+dx;
                pRef += m_RefStep*dy+dx;
            }
            break;

        case HalfPixIn:
            // TODO: interpolate for SATD here
            assert(m_MePar->Interpolation == Bilinear);
            mc_type = ((x & 0x2)<< 2) | ((y & 0x2)<< 1);
            break;

        case QuarterPixIn:
            Interpolate(pRef,refStep,m_16x16buf,16, MeMV(3&mv.x,3&mv.y));
            pRef=m_16x16buf;
            refStep=16;
            break;
    }
    //GetCost<mt,pix,SAD>(pSrc, srcStep, pRef, refStep, &cost, mc_type);
    CalcCost<mt,pix>(pSrc, srcStep, pRef, refStep, &cost,mc_type,m_CostMetrics);

    Ipp32s sum = cost.sum;
    switch(mt){
        case Mb4x8x8:
            sum = 0;
            for(Ipp32s i=0; i<4; i++){
                sum+=cost.sum8x8[i];
                if((Ipp32s)cost.sum8x8[i]<m_BestCost[i+1]){
                    m_BestCost[i+1]=cost.sum8x8[i];
                    m_BestMV[i+1] = mv;
                }
            }
            //no break to compute 16x16 SAD
            assert(m_BlockIdx == 0);

        case Mb16x16:
        case Mb8x8:
            sum += WeightMV(mv);
            if(sum<m_BestCost[m_BlockIdx]){
                m_BestCost[m_BlockIdx] = sum;
                m_BestMV[m_BlockIdx] = mv;
            }
            break;

        default:
            assert(false);
    }

}

template<MeMbPart mt, MeInPixType pix> void MeBase::FullSearch(Ipp32s RangeX, Ipp32s RangeY)
{
    Ipp32s Step;
    if(pix == QuadPixIn) Step = 16;
    if(pix == IntegerPixIn) Step = 4;
    if(pix == HalfPixIn) Step = 2;
    if(pix == QuarterPixIn) Step = 1;

    MeMV   CenterMV = m_BestMV[m_BlockIdx];
    for(int y = -RangeY; y<RangeY; y+=Step ){
        for(int x = -RangeX; x<RangeX; x+=Step ){
            EstimatePoint<mt, pix>(CenterMV+MeMV(x,y));
        }
    }
}


template<MeMbPart mt, MeInPixType pix, MeDiamondType dm> void MeBase::DiamondSearch()
{
    const Ipp32s BigDiamondTable[][2] = { {-3,0}, {-2,2}, {0,3}, {2,2}, {3,0}, {2,-2}, {0,-3}, {-2,-2} };
    const Ipp32s MediumDiamondTable[][2] = { {-2,0}, {-1,1}, {0,2}, {1,1}, {2,0}, {1,-1}, {0,-2}, {-1,-1} };
    const Ipp32s SmallDiamondTable[][2] = {{-1,0}, {0,1}, {1,0}, {0,-1}};

    const Ipp32s* Table;
    Ipp32s TableSize;
    Ipp32s Step;
    if(dm == Small){
        Table = (const Ipp32s*)SmallDiamondTable;
        TableSize = sizeof(SmallDiamondTable)/(2*sizeof(Ipp32s));
    }
    if(dm == Medium){
        Table = (const Ipp32s*)MediumDiamondTable;
        TableSize = sizeof(MediumDiamondTable)/(2*sizeof(Ipp32s));
    }
    if(dm == Big){
        Table = (const Ipp32s*)BigDiamondTable;
        TableSize = sizeof(BigDiamondTable)/(2*sizeof(Ipp32s));
    }

    if(pix == QuadPixIn) Step = 16;
    if(pix == IntegerPixIn) Step = 4;
    if(pix == HalfPixIn) Step = 2;
    if(pix == QuarterPixIn) Step = 1;

    MeMV   CenterMV = m_BestMV[m_BlockIdx];
    for(Ipp32s i=0; i<TableSize; i++){
        EstimatePoint<mt, pix>(CenterMV + MeMV(Step*Table[2*i + 0], Step*Table[2*i + 1]));
    }
}


template<MeMbPart mt, MeInPixType pix> bool MeBase::RefineSearch()
{
    if(mt != Mb8x8 && mt != Mb16x16) assert(false);

    switch(pix){
        case QuadPixIn: //this means refinement after quard search, pixel type is integer here!
            EstimatePoint<mt, IntegerPixIn>(m_BestMV[m_BlockIdx]);
            //FullSearch<mt, IntegerPixel>(16,16); //16 = 4 pixel
            DiamondSearch<mt, IntegerPixIn, Big>();
            DiamondSearch<mt, IntegerPixIn, Medium>();
            DiamondSearch<mt, IntegerPixIn, Small>();
            return true;

        case IntegerPixIn:  //this means refinement after prediction.
            for(int j=0; j<2; j++){ //maximum number of big steps that are allowed
                MeMV CenterMV = m_BestMV[m_BlockIdx];
                DiamondSearch<mt, IntegerPixIn, Medium>();

                //check if best MV in center
                if(CenterMV == m_BestMV[m_BlockIdx]){
                    //refine by small diamond
                    DiamondSearch<mt, IntegerPixIn, Small>();
                    return true;
                }
            }
            return false;

        case QuarterPixIn:
            //FullSearch<mt, QuarterPixel>(4,4); // 4 = 1 pixel
            DiamondSearch<mt, QuarterPixIn, Big>();
            DiamondSearch<mt, QuarterPixIn, Medium>();
            DiamondSearch<mt, QuarterPixIn, Small>();
            return true;

        case HalfPixIn:  // TODO: rewrite this! Big diamond is excessive here!
            DiamondSearch<mt, HalfPixIn, Big>();
            DiamondSearch<mt, HalfPixIn, Medium>();
            DiamondSearch<mt, HalfPixIn, Small>();
            return true;

        default:
            assert(false);
    }
    return false;
}

}

#endif // defined (UMC_ENABLE_VC1_VIDEO_ENCODER)
上一页 1 2 34
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -