⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mp4_enc_vop.cpp

📁 audio-video-codecs.rar语音编解码器
💻 CPP
📖 第 1 页 / 共 5 页
字号:
        } else {
            ippiDCT8x8Fwd_16s_C1I(coeffMB+3*64);
            ippiQuantInter_H263_16s_C1I(coeffMB+3*64, quant, &nzCount[3], 0);
        }
    }
    ippiSubSAD8x8_8u16s_C1R(pUc, mStepChroma, mcPred+64*4, 8, coeffMB+4*64, 16, &sU);
    ippiSubSAD8x8_8u16s_C1R(pVc, mStepChroma, mcPred+64*5, 8, coeffMB+5*64, 16, &sV);
    if (sU < lim) {
        nzCount[4] = 0;
        coeffMB[4*64] = 0;
    } else {
        ippiDCT8x8Fwd_16s_C1I(coeffMB+4*64);
        ippiQuantInter_H263_16s_C1I(coeffMB+4*64, quant, &nzCount[4], 0);
    }
    if (sV < lim) {
        nzCount[5] = 0;
        coeffMB[5*64] = 0;
    } else {
        ippiDCT8x8Fwd_16s_C1I(coeffMB+5*64);
        ippiQuantInter_H263_16s_C1I(coeffMB+5*64, quant, &nzCount[5], 0);
    }
    mp4_SetPatternInter(pattern, nzCount);
    return pattern;
}


int ippVideoEncoderMPEG4::TransMacroBlockIntra_MPEG4(Ipp8u *pY, Ipp8u *pU, Ipp8u *pV, Ipp16s *coeffMB, Ipp32s *nzCount, int quant, int row, int col, int *dct_type, int use_intra_dc_vlc, mp4_MacroBlock *MBcurr, int *predDir, int startRow, int *ac_pred, int *pat, int *costRD)
{
    int  pattern, yOff23, yStep, dctt = 0;
    int  ac_pred_flag, acPredSum0, acPredSum1, pattern1;
    __ALIGN16(Ipp16s, coeffFDCT, 64*6);

    if (VOL.interlaced) {
        // boundary MBs should be frame-DCT coded
        if (row != 0 && col != 0 && row != (mNumMacroBlockPerCol-1) && col != (mNumMacroBlockPerRow-1)) {
            Ipp32s  sfr, sfi;
            ippiFrameFieldSAD16x16_8u32s_C1R(pY, mStepLuma, &sfr, &sfi);
            dctt = (sfi + 999) < sfr;
        }
    }
    if (!dctt) {
        yOff23 = mStepLuma * 8;
        yStep = mStepLuma;
    } else {
        yOff23 = mStepLuma;
        yStep = mStepLuma * 2;
    }
    *dct_type = dctt;
    ippiDCT8x8Fwd_8u16s_C1R(pY, yStep, coeffMB+0*64);
    ippiDCT8x8Fwd_8u16s_C1R(pY+8, yStep, coeffMB+1*64);
    ippiDCT8x8Fwd_8u16s_C1R(pY+yOff23, yStep, coeffMB+2*64);
    ippiDCT8x8Fwd_8u16s_C1R(pY+yOff23+8, yStep, coeffMB+3*64);
    ippiDCT8x8Fwd_8u16s_C1R(pU, mStepChroma, coeffMB+4*64);
    ippiDCT8x8Fwd_8u16s_C1R(pV, mStepChroma, coeffMB+5*64);
    // copy DCT coeff for RD mode decision
    if (costRD != NULL)
        ippsCopy_16s(coeffMB, coeffFDCT, 64 * 6);
    ippiQuantIntra_MPEG4_16s_C1I(coeffMB+0*64, mQuantIntraSpec, quant, &nzCount[0], IPPVC_BLOCK_LUMA);
    ippiQuantIntra_MPEG4_16s_C1I(coeffMB+1*64, mQuantIntraSpec, quant, &nzCount[1], IPPVC_BLOCK_LUMA);
    ippiQuantIntra_MPEG4_16s_C1I(coeffMB+2*64, mQuantIntraSpec, quant, &nzCount[2], IPPVC_BLOCK_LUMA);
    ippiQuantIntra_MPEG4_16s_C1I(coeffMB+3*64, mQuantIntraSpec, quant, &nzCount[3], IPPVC_BLOCK_LUMA);
    ippiQuantIntra_MPEG4_16s_C1I(coeffMB+4*64, mQuantIntraSpec, quant, &nzCount[4], IPPVC_BLOCK_CHROMA);
    ippiQuantIntra_MPEG4_16s_C1I(coeffMB+5*64, mQuantIntraSpec, quant, &nzCount[5], IPPVC_BLOCK_CHROMA);
    mp4_SetPatternIntra(pattern1, nzCount, coeffMB, 1);
    ac_pred_flag = pattern1 ? 1 : 0;
    MBcurr->block[0].validPredIntra = MBcurr->block[1].validPredIntra = MBcurr->block[2].validPredIntra = MBcurr->block[3].validPredIntra = MBcurr->block[4].validPredIntra = MBcurr->block[5].validPredIntra = 1;
    MBcurr->block[0].quant = MBcurr->block[1].quant = MBcurr->block[2].quant = MBcurr->block[3].quant = MBcurr->block[4].quant = MBcurr->block[5].quant = (Ipp8u)quant;
    PredictIntraDCAC(MBcurr, coeffMB, quant, predDir, ac_pred_flag, &acPredSum0, &acPredSum1, nzCount, row - startRow);
    if (ac_pred_flag) {
        // check ac_pred is good
        if (acPredSum0 <= acPredSum1) {
            ac_pred_flag = 0;
            mp4_RestoreIntraAC(MBcurr, coeffMB, predDir);
            predDir[0] = predDir[1] = predDir[2] = predDir[3] = predDir[4] = predDir[5] = IPPVC_SCAN_ZIGZAG;
        } else {
            // re-count the nzCount because they could be changed during AC prediction
            // more optimal way is to correct nzCount during AC prediction
            mp4_NonZeroCount(coeffMB, nzCount);
        }
    }
    mp4_SetPatternIntra(pattern, nzCount, coeffMB, use_intra_dc_vlc);
    *ac_pred = ac_pred_flag;
    *pat = pattern1;
    if (costRD != NULL) {
        int costInter = *costRD;
        int costIntra = 0;
        for (int b = 0; b < 6; b ++) {
            const Ipp8u *scan = VOP.alternate_vertical_scan_flag ? mp4_AltVertScan : (predDir[b] == IPPVC_SCAN_HORIZONTAL) ? mp4_HorScan : (predDir[b] == IPPVC_SCAN_VERTICAL) ? mp4_AltVertScan : mp4_ZigZagScan;
            costIntra += RD_MUL * mp4_CalcBitsCoeffsIntra(coeffMB+b*64, nzCount[b], VOL.reversible_vlc, use_intra_dc_vlc, scan, b);
            coeffMB[b*64] = MBcurr->block[b].dct_dcq;
            ippiQuantInvIntra_MPEG4_16s_C1I(coeffMB+b*64, 63, mQuantInvIntraSpec, quant, (b < 4) ? IPPVC_BLOCK_LUMA : IPPVC_BLOCK_CHROMA);
            int ssd = 0;
            //for (int i = 0; i < 64; i ++)
            //    ssd += (coeffMB[b*64+i] - coeffFDCT[b*64+i]) * (coeffMB[b*64+i] - coeffFDCT[b*64+i]);
            Ipp64s n;
            ippsNormDiff_L2Sqr_16s64s_Sfs(coeffMB+b*64, coeffFDCT+b*64, 64, &n, 0);
            ssd = ssd + (int)n;
            costIntra += RD_MUL * ssd / (quant * quant);
            if (costIntra >= costInter)
                break;
        }
        *costRD = costIntra;
    }
    return pattern;
}


int ippVideoEncoderMPEG4::TransMacroBlockInter_MPEG4(Ipp8u *pYc, Ipp8u *pUc, Ipp8u *pVc, Ipp16s *coeffMB, Ipp32s *nzCount, int quant, Ipp8u *mcPred, int row, int col, int *dct_type, int trellis, int *costRD)
{
    int   pattern, sE[6], lim, b, costInter;
    Ipp8u *qmat = VOL.quant_type ? VOL.nonintra_quant_mat : NULL;
    const Ipp8u *scan = VOP.alternate_vertical_scan_flag ? mp4_AltVertScan : mp4_ZigZagScan;
    __ALIGN16(Ipp16s, coeffFDCT, 64);

    costInter = 0;
    lim = (VOL.quant_type == 0) ? quant * 16 : quant * 12;
    if (VOL.interlaced) {
        __ALIGN16(Ipp16s, coeff, 64*4);
        int   off23, s, dctt = 0, off[4];

        ippiSub16x16_8u16s_C1R(pYc, mStepLuma, mcPred, 16, coeff, 32);
        // boundary MBs should be frame-DCT coded
        if (row != 0 && col != 0 && row != (mNumMacroBlockPerCol-1) && col != (mNumMacroBlockPerRow-1)) {
            Ipp32s  sfr, sfi;
            ippiFrameFieldSAD16x16_16s32s_C1R(coeff, 32, &sfr, &sfi);
            dctt = (sfi + 999) < sfr;
        }
        *dct_type = dctt;
        if (dctt) {
            ippiSAD8x8_8u32s_C1R(pYc, 2*mStepLuma, mcPred, 32, &sE[0], IPPVC_MC_APX_FF);
            ippiSAD8x8_8u32s_C1R(pYc+8, 2*mStepLuma, mcPred+8, 32, &sE[1], IPPVC_MC_APX_FF);
            ippiSAD8x8_8u32s_C1R(pYc+mStepLuma, 2*mStepLuma, mcPred+16, 32, &sE[2], IPPVC_MC_APX_FF);
            ippiSAD8x8_8u32s_C1R(pYc+mStepLuma+8, 2*mStepLuma, mcPred+24, 32, &sE[3], IPPVC_MC_APX_FF);
            off23 = 16;
            s = 64;
        } else {
            ippiSAD8x8_8u32s_C1R(pYc, mStepLuma, mcPred, 16, &sE[0], IPPVC_MC_APX_FF);
            ippiSAD8x8_8u32s_C1R(pYc+8, mStepLuma, mcPred+8, 16, &sE[1], IPPVC_MC_APX_FF);
            ippiSAD8x8_8u32s_C1R(pYc+8*mStepLuma, mStepLuma, mcPred+128, 16, &sE[2], IPPVC_MC_APX_FF);
            ippiSAD8x8_8u32s_C1R(pYc+8*mStepLuma+8, mStepLuma, mcPred+136, 16, &sE[3], IPPVC_MC_APX_FF);
            off23 = 2*64;
            s = 32;
        }
        off[0] = 0; off[1] = 8; off[2] = off23; off[3] = off23 + 8;
        for (b = 0; b < 4; b ++) {
            if (sE[b] < lim) {
                nzCount[b] = 0;
            } else {
                ippiDCT8x8Fwd_16s_C1R(coeff+off[b], s, coeffMB+b*64);
                if (trellis || (costRD != NULL))
                    ippsCopy_16s(coeffMB+b*64, coeffFDCT, 64);
                ippiQuantInter_MPEG4_16s_C1I(coeffMB+b*64, mQuantInterSpec, quant, &nzCount[b]);
                if (trellis && (nzCount[b] != 0))
                    nzCount[b] = mp4_TrellisQuant(coeffFDCT, coeffMB+b*64, quant, qmat, scan, nzCount[b]);
                if (costRD != NULL) {
                    if (nzCount[b] != 0) {
                        costInter += RD_MUL * mp4_CalcBitsCoeffsInter(coeffMB+b*64, nzCount[b], VOL.reversible_vlc, scan);
                        ippiQuantInvInter_MPEG4_16s_C1I(coeffMB+b*64, 63, mQuantInvInterSpec, quant);
                    }
                    int ssd = 0;
                    Ipp64s n;
                    ippsNormDiff_L2Sqr_16s64s_Sfs(coeffMB+b*64, coeffFDCT, 64, &n, 0);
                    ssd = ssd + (int)n;
                    //for (int i = 0; i < 64; i ++)
                    //    ssd += (coeffMB[b*64+i] - coeffFDCT[i]) * (coeffMB[b*64+i] - coeffFDCT[i]);
                    costInter += RD_MUL * ssd / (quant * quant);
                }
            }
        }
    } else {
        *dct_type = 0;
        ippiSubSAD8x8_8u16s_C1R(pYc, mStepLuma, mcPred, 16, coeffMB+0*64, 16, &sE[0]);
        ippiSubSAD8x8_8u16s_C1R(pYc+8, mStepLuma, mcPred+8, 16, coeffMB+1*64, 16, &sE[1]);
        ippiSubSAD8x8_8u16s_C1R(pYc+8*mStepLuma, mStepLuma, mcPred+128, 16, coeffMB+2*64, 16, &sE[2]);
        ippiSubSAD8x8_8u16s_C1R(pYc+8*mStepLuma+8, mStepLuma, mcPred+136, 16, coeffMB+3*64, 16, &sE[3]);
        for (b = 0; b < 4; b ++) {
            if (sE[b] < lim) {
                nzCount[b] = 0;
            } else {
                ippiDCT8x8Fwd_16s_C1I(coeffMB+b*64);
                if (trellis || (costRD != NULL))
                    ippsCopy_16s(coeffMB+b*64, coeffFDCT, 64);
                ippiQuantInter_MPEG4_16s_C1I(coeffMB+b*64, mQuantInterSpec, quant, &nzCount[b]);
                if (trellis && (nzCount[b] != 0))
                    nzCount[b] = mp4_TrellisQuant(coeffFDCT, coeffMB+b*64, quant, qmat, scan, nzCount[b]);
                if (costRD != NULL) {
                    if (nzCount[b] != 0) {
                        costInter += RD_MUL * mp4_CalcBitsCoeffsInter(coeffMB+b*64, nzCount[b], VOL.reversible_vlc, scan);
                        ippiQuantInvInter_MPEG4_16s_C1I(coeffMB+b*64, 63, mQuantInvInterSpec, quant);
                    }
                    int ssd = 0;
                    Ipp64s n;
                    ippsNormDiff_L2Sqr_16s64s_Sfs(coeffMB+b*64, coeffFDCT, 64, &n, 0);
                    ssd = ssd + (int)n;
                    //for (int i = 0; i < 64; i ++)
                    //    ssd += (coeffMB[b*64+i] - coeffFDCT[i]) * (coeffMB[b*64+i] - coeffFDCT[i]);
                    costInter += RD_MUL * ssd / (quant * quant);
                }
            }
        }
    }
    ippiSubSAD8x8_8u16s_C1R(pUc, mStepChroma, mcPred+64*4, 8, coeffMB+4*64, 16, &sE[4]);
    ippiSubSAD8x8_8u16s_C1R(pVc, mStepChroma, mcPred+64*5, 8, coeffMB+5*64, 16, &sE[5]);
    for (b = 4; b < 6; b ++) {
        if (sE[b] < lim) {
            nzCount[b] = 0;
        } else {
            ippiDCT8x8Fwd_16s_C1I(coeffMB+b*64);
            if (trellis || (costRD != NULL))
                ippsCopy_16s(coeffMB+b*64, coeffFDCT, 64);
            ippiQuantInter_MPEG4_16s_C1I(coeffMB+b*64, mQuantInterSpec, quant, &nzCount[b]);
            if (trellis && (nzCount[b] != 0))
                nzCount[b] = mp4_TrellisQuant(coeffFDCT, coeffMB+b*64, quant, qmat, scan, nzCount[b]);
            if (costRD != NULL) {
                if (nzCount[b] != 0) {
                    costInter += RD_MUL * mp4_CalcBitsCoeffsInter(coeffMB+b*64, nzCount[b], VOL.reversible_vlc, scan);
                    ippiQuantInvInter_MPEG4_16s_C1I(coeffMB+b*64, 63, mQuantInvInterSpec, quant);
                }
                int ssd = 0;
                Ipp64s n;
                ippsNormDiff_L2Sqr_16s64s_Sfs(coeffMB+b*64, coeffFDCT, 64, &n, 0);
                ssd = ssd + (int)n;
                //for (int i = 0; i < 64; i ++)
                //    ssd += (coeffMB[b*64+i] - coeffFDCT[i]) * (coeffMB[b*64+i] - coeffFDCT[i]);
                costInter += RD_MUL * ssd / (quant * quant);
            }
        }
    }
#if 0
    __ALIGN16(Ipp16s, coeff, 64*4);
    Ipp32s   pattern, sU, sV, sL0, sL1, sL2, sL3, lim, off23, s, dctt = 0;

    lim = (VOL.quant_type == 0) ? quant * 16 : quant * 12;
    ippiSub16x16_8u16s_C1R(pYc, mStepLuma, mcPred, 16, coeff, 32);
    if (VOL.interlaced) {
        // boundary MBs should be frame-DCT coded
        if (row != 0 && col != 0 && row != (mNumMacroBlockPerCol-1) && col != (mNumMacroBlockPerRow-1)) {
            Ipp32s  sfr, sfi;
            ippiFrameFieldSAD16x16_16s32s_C1R(coeff, 32, &sfr, &sfi);
            dctt = (sfi + 999) < sfr;
        }
    }
    if (dctt) {
        ippiSAD8x8_8u32s_C1R(pYc, 2*mStepLuma, mcPred, 32, &sL0, IPPVC_MC_APX_FF);
        ippiSAD8x8_8u32s_C1R(pYc+8, 2*mStepLuma, mcPred+8, 32, &sL1, IPPVC_MC_APX_FF);
        ippiSAD8x8_8u32s_C1R(pYc+mStepLuma, 2*mStepLuma, mcPred+16, 32, &sL2, IPPVC_MC_APX_FF);
        ippiSAD8x8_8u32s_C1R(pYc+mStepLuma+8, 2*mStepLuma, mcPred+24, 32, &sL3, IPPVC_MC_APX_FF);
        off23 = 16;
        s = 64;
    } else {
        ippiSAD8x8_8u32s_C1R(pYc, mStepLuma, mcPred, 16, &sL0, IPPVC_MC_APX_FF);
        ippiSAD8x8_8u32s_C1R(pYc+8, mStepLuma, mcPred+8, 16, &sL1, IPPVC_MC_APX_FF);
        ippiSAD8x8_8u32s_C1R(pYc+8*mStepLuma, mStepLuma, mcPred+128, 16, &sL2, IPPVC_MC_APX_FF);
        ippiSAD8x8_8u32s_C1R(pYc+8*mStepLuma+8, mStepLuma, mcPred+136, 16, &sL3, IPPVC_MC_APX_FF);
        off23 = 2*64;
        s = 32;
    }
    *dct_type = dctt;
    if (sL0 < lim) {
        nzCount[0] = 0;
    } else {
        ippiDCT8x8Fwd_16s_C1R(coeff, s, coeffMB+0*64);
        ippiQuantInter_MPEG4_16s_C1I(coeffMB+0*64, mQuantInterSpec, quant, &nzCount[0]);
    }
    if (sL1 < lim) {
        nzCount[1] = 0;
    } else {
        ippiDCT8x8Fwd_16s_C1R(coeff+8, s, coeffMB+1*64);
        ippiQuantInter_MPEG4_16s_C1I(coeffMB+1*64, mQuantInterSpec, quant, &nzCount[1]);
    }
    if (sL2 < lim) {
        nzCount[2] = 0;
    } else {
        ippiDCT8x8Fwd_16s_C1R(coeff+off23, s, coeffMB+2*64);
        ippiQuantInter_MPEG4_16s_C1I(coeffMB+2*64, mQuantInterSpec, quant, &nzCount[2]);
    }
    if (sL3 < lim) {
        nzCount[3] = 0;
    } else {
        ippiDCT8x8Fwd_16s_C1R(coeff+off23+8, s, coeffMB+3*64);
        ippiQuantInter_MPEG4_16s_C1I(coeffMB+3*64, mQuantInterSpec, quant, &nzCount[3]);
    }
    ippiSAD8x8_8u32s_C1R(pUc, mStepChroma, mcPred+64*4, 8, &sU, IPPVC_MC_APX_FF);
    ippiSAD8x8_8u32s_C1R(pVc, mStepChroma, mcPred+64*5, 8, &sV, IPPVC_MC_APX_FF);
    if (sU < lim) {
        nzCount[4] = 0;
    } else {
        ippiSub8x8_8u16s_C1R(pUc, mStepChroma, mcPred+64*4, 8, coeffMB+4*64, 16);
        ippiDCT8x8Fwd_16s_C1I(coeffMB+4*64);
        ippiQuantInter_MPEG4_16s_C1I(coeffMB+4*64, mQuantInterSpec, quant, &nzCount[4]);
    }
    if (sV < lim) {
        nzCount[5] = 0;
    } else {
        ippiSub8x8_8u16s_C1R(pVc, mStepChroma, mcPred+64*5, 8, coeffMB+5*64, 16);
        ippiDCT8x8Fwd_16s_C1I(coeffMB+5*64);
        ippiQuantInter_MPEG4_16s_C1I(coeffMB+5*64, mQuantInterSpec, quant, &nzCount[5]);
    }
/*
    lim = (VOL.quant_type == 0) ? quant * 16 : quant * 12;
    if (!dct_type) {
        ippiSubSAD8x8_8u16s_C1R(pYc, mStepLuma, mcPred, 16, coeffMB+0*64, 16, &sL0);
        ippiSubSAD8x8_8u16s_C1R(pYc+8, mStepLuma, mcPred+8, 16, coeffMB+1*64, 16, &sL1);
        ippiSubSAD8x8_8u16s_C1R(pYc+8*mStepLuma, mStepLuma, mcPred+128, 16, coeffMB+2*64, 16, &sL2);
        ippiSubSAD8x8_

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -