📄 mp4_enc_vop.cpp
字号:
} else {
ippiDCT8x8Fwd_16s_C1I(coeffMB+3*64);
ippiQuantInter_H263_16s_C1I(coeffMB+3*64, quant, &nzCount[3], 0);
}
}
ippiSubSAD8x8_8u16s_C1R(pUc, mStepChroma, mcPred+64*4, 8, coeffMB+4*64, 16, &sU);
ippiSubSAD8x8_8u16s_C1R(pVc, mStepChroma, mcPred+64*5, 8, coeffMB+5*64, 16, &sV);
if (sU < lim) {
nzCount[4] = 0;
coeffMB[4*64] = 0;
} else {
ippiDCT8x8Fwd_16s_C1I(coeffMB+4*64);
ippiQuantInter_H263_16s_C1I(coeffMB+4*64, quant, &nzCount[4], 0);
}
if (sV < lim) {
nzCount[5] = 0;
coeffMB[5*64] = 0;
} else {
ippiDCT8x8Fwd_16s_C1I(coeffMB+5*64);
ippiQuantInter_H263_16s_C1I(coeffMB+5*64, quant, &nzCount[5], 0);
}
mp4_SetPatternInter(pattern, nzCount);
return pattern;
}
int ippVideoEncoderMPEG4::TransMacroBlockIntra_MPEG4(Ipp8u *pY, Ipp8u *pU, Ipp8u *pV, Ipp16s *coeffMB, Ipp32s *nzCount, int quant, int row, int col, int *dct_type, int use_intra_dc_vlc, mp4_MacroBlock *MBcurr, int *predDir, int startRow, int *ac_pred, int *pat, int *costRD)
{
int pattern, yOff23, yStep, dctt = 0;
int ac_pred_flag, acPredSum0, acPredSum1, pattern1;
__ALIGN16(Ipp16s, coeffFDCT, 64*6);
if (VOL.interlaced) {
// boundary MBs should be frame-DCT coded
if (row != 0 && col != 0 && row != (mNumMacroBlockPerCol-1) && col != (mNumMacroBlockPerRow-1)) {
Ipp32s sfr, sfi;
ippiFrameFieldSAD16x16_8u32s_C1R(pY, mStepLuma, &sfr, &sfi);
dctt = (sfi + 999) < sfr;
}
}
if (!dctt) {
yOff23 = mStepLuma * 8;
yStep = mStepLuma;
} else {
yOff23 = mStepLuma;
yStep = mStepLuma * 2;
}
*dct_type = dctt;
ippiDCT8x8Fwd_8u16s_C1R(pY, yStep, coeffMB+0*64);
ippiDCT8x8Fwd_8u16s_C1R(pY+8, yStep, coeffMB+1*64);
ippiDCT8x8Fwd_8u16s_C1R(pY+yOff23, yStep, coeffMB+2*64);
ippiDCT8x8Fwd_8u16s_C1R(pY+yOff23+8, yStep, coeffMB+3*64);
ippiDCT8x8Fwd_8u16s_C1R(pU, mStepChroma, coeffMB+4*64);
ippiDCT8x8Fwd_8u16s_C1R(pV, mStepChroma, coeffMB+5*64);
// copy DCT coeff for RD mode decision
if (costRD != NULL)
ippsCopy_16s(coeffMB, coeffFDCT, 64 * 6);
ippiQuantIntra_MPEG4_16s_C1I(coeffMB+0*64, mQuantIntraSpec, quant, &nzCount[0], IPPVC_BLOCK_LUMA);
ippiQuantIntra_MPEG4_16s_C1I(coeffMB+1*64, mQuantIntraSpec, quant, &nzCount[1], IPPVC_BLOCK_LUMA);
ippiQuantIntra_MPEG4_16s_C1I(coeffMB+2*64, mQuantIntraSpec, quant, &nzCount[2], IPPVC_BLOCK_LUMA);
ippiQuantIntra_MPEG4_16s_C1I(coeffMB+3*64, mQuantIntraSpec, quant, &nzCount[3], IPPVC_BLOCK_LUMA);
ippiQuantIntra_MPEG4_16s_C1I(coeffMB+4*64, mQuantIntraSpec, quant, &nzCount[4], IPPVC_BLOCK_CHROMA);
ippiQuantIntra_MPEG4_16s_C1I(coeffMB+5*64, mQuantIntraSpec, quant, &nzCount[5], IPPVC_BLOCK_CHROMA);
mp4_SetPatternIntra(pattern1, nzCount, coeffMB, 1);
ac_pred_flag = pattern1 ? 1 : 0;
MBcurr->block[0].validPredIntra = MBcurr->block[1].validPredIntra = MBcurr->block[2].validPredIntra = MBcurr->block[3].validPredIntra = MBcurr->block[4].validPredIntra = MBcurr->block[5].validPredIntra = 1;
MBcurr->block[0].quant = MBcurr->block[1].quant = MBcurr->block[2].quant = MBcurr->block[3].quant = MBcurr->block[4].quant = MBcurr->block[5].quant = (Ipp8u)quant;
PredictIntraDCAC(MBcurr, coeffMB, quant, predDir, ac_pred_flag, &acPredSum0, &acPredSum1, nzCount, row - startRow);
if (ac_pred_flag) {
// check ac_pred is good
if (acPredSum0 <= acPredSum1) {
ac_pred_flag = 0;
mp4_RestoreIntraAC(MBcurr, coeffMB, predDir);
predDir[0] = predDir[1] = predDir[2] = predDir[3] = predDir[4] = predDir[5] = IPPVC_SCAN_ZIGZAG;
} else {
// re-count the nzCount because they could be changed during AC prediction
// more optimal way is to correct nzCount during AC prediction
mp4_NonZeroCount(coeffMB, nzCount);
}
}
mp4_SetPatternIntra(pattern, nzCount, coeffMB, use_intra_dc_vlc);
*ac_pred = ac_pred_flag;
*pat = pattern1;
if (costRD != NULL) {
int costInter = *costRD;
int costIntra = 0;
for (int b = 0; b < 6; b ++) {
const Ipp8u *scan = VOP.alternate_vertical_scan_flag ? mp4_AltVertScan : (predDir[b] == IPPVC_SCAN_HORIZONTAL) ? mp4_HorScan : (predDir[b] == IPPVC_SCAN_VERTICAL) ? mp4_AltVertScan : mp4_ZigZagScan;
costIntra += RD_MUL * mp4_CalcBitsCoeffsIntra(coeffMB+b*64, nzCount[b], VOL.reversible_vlc, use_intra_dc_vlc, scan, b);
coeffMB[b*64] = MBcurr->block[b].dct_dcq;
ippiQuantInvIntra_MPEG4_16s_C1I(coeffMB+b*64, 63, mQuantInvIntraSpec, quant, (b < 4) ? IPPVC_BLOCK_LUMA : IPPVC_BLOCK_CHROMA);
int ssd = 0;
//for (int i = 0; i < 64; i ++)
// ssd += (coeffMB[b*64+i] - coeffFDCT[b*64+i]) * (coeffMB[b*64+i] - coeffFDCT[b*64+i]);
Ipp64s n;
ippsNormDiff_L2Sqr_16s64s_Sfs(coeffMB+b*64, coeffFDCT+b*64, 64, &n, 0);
ssd = ssd + (int)n;
costIntra += RD_MUL * ssd / (quant * quant);
if (costIntra >= costInter)
break;
}
*costRD = costIntra;
}
return pattern;
}
int ippVideoEncoderMPEG4::TransMacroBlockInter_MPEG4(Ipp8u *pYc, Ipp8u *pUc, Ipp8u *pVc, Ipp16s *coeffMB, Ipp32s *nzCount, int quant, Ipp8u *mcPred, int row, int col, int *dct_type, int trellis, int *costRD)
{
int pattern, sE[6], lim, b, costInter;
Ipp8u *qmat = VOL.quant_type ? VOL.nonintra_quant_mat : NULL;
const Ipp8u *scan = VOP.alternate_vertical_scan_flag ? mp4_AltVertScan : mp4_ZigZagScan;
__ALIGN16(Ipp16s, coeffFDCT, 64);
costInter = 0;
lim = (VOL.quant_type == 0) ? quant * 16 : quant * 12;
if (VOL.interlaced) {
__ALIGN16(Ipp16s, coeff, 64*4);
int off23, s, dctt = 0, off[4];
ippiSub16x16_8u16s_C1R(pYc, mStepLuma, mcPred, 16, coeff, 32);
// boundary MBs should be frame-DCT coded
if (row != 0 && col != 0 && row != (mNumMacroBlockPerCol-1) && col != (mNumMacroBlockPerRow-1)) {
Ipp32s sfr, sfi;
ippiFrameFieldSAD16x16_16s32s_C1R(coeff, 32, &sfr, &sfi);
dctt = (sfi + 999) < sfr;
}
*dct_type = dctt;
if (dctt) {
ippiSAD8x8_8u32s_C1R(pYc, 2*mStepLuma, mcPred, 32, &sE[0], IPPVC_MC_APX_FF);
ippiSAD8x8_8u32s_C1R(pYc+8, 2*mStepLuma, mcPred+8, 32, &sE[1], IPPVC_MC_APX_FF);
ippiSAD8x8_8u32s_C1R(pYc+mStepLuma, 2*mStepLuma, mcPred+16, 32, &sE[2], IPPVC_MC_APX_FF);
ippiSAD8x8_8u32s_C1R(pYc+mStepLuma+8, 2*mStepLuma, mcPred+24, 32, &sE[3], IPPVC_MC_APX_FF);
off23 = 16;
s = 64;
} else {
ippiSAD8x8_8u32s_C1R(pYc, mStepLuma, mcPred, 16, &sE[0], IPPVC_MC_APX_FF);
ippiSAD8x8_8u32s_C1R(pYc+8, mStepLuma, mcPred+8, 16, &sE[1], IPPVC_MC_APX_FF);
ippiSAD8x8_8u32s_C1R(pYc+8*mStepLuma, mStepLuma, mcPred+128, 16, &sE[2], IPPVC_MC_APX_FF);
ippiSAD8x8_8u32s_C1R(pYc+8*mStepLuma+8, mStepLuma, mcPred+136, 16, &sE[3], IPPVC_MC_APX_FF);
off23 = 2*64;
s = 32;
}
off[0] = 0; off[1] = 8; off[2] = off23; off[3] = off23 + 8;
for (b = 0; b < 4; b ++) {
if (sE[b] < lim) {
nzCount[b] = 0;
} else {
ippiDCT8x8Fwd_16s_C1R(coeff+off[b], s, coeffMB+b*64);
if (trellis || (costRD != NULL))
ippsCopy_16s(coeffMB+b*64, coeffFDCT, 64);
ippiQuantInter_MPEG4_16s_C1I(coeffMB+b*64, mQuantInterSpec, quant, &nzCount[b]);
if (trellis && (nzCount[b] != 0))
nzCount[b] = mp4_TrellisQuant(coeffFDCT, coeffMB+b*64, quant, qmat, scan, nzCount[b]);
if (costRD != NULL) {
if (nzCount[b] != 0) {
costInter += RD_MUL * mp4_CalcBitsCoeffsInter(coeffMB+b*64, nzCount[b], VOL.reversible_vlc, scan);
ippiQuantInvInter_MPEG4_16s_C1I(coeffMB+b*64, 63, mQuantInvInterSpec, quant);
}
int ssd = 0;
Ipp64s n;
ippsNormDiff_L2Sqr_16s64s_Sfs(coeffMB+b*64, coeffFDCT, 64, &n, 0);
ssd = ssd + (int)n;
//for (int i = 0; i < 64; i ++)
// ssd += (coeffMB[b*64+i] - coeffFDCT[i]) * (coeffMB[b*64+i] - coeffFDCT[i]);
costInter += RD_MUL * ssd / (quant * quant);
}
}
}
} else {
*dct_type = 0;
ippiSubSAD8x8_8u16s_C1R(pYc, mStepLuma, mcPred, 16, coeffMB+0*64, 16, &sE[0]);
ippiSubSAD8x8_8u16s_C1R(pYc+8, mStepLuma, mcPred+8, 16, coeffMB+1*64, 16, &sE[1]);
ippiSubSAD8x8_8u16s_C1R(pYc+8*mStepLuma, mStepLuma, mcPred+128, 16, coeffMB+2*64, 16, &sE[2]);
ippiSubSAD8x8_8u16s_C1R(pYc+8*mStepLuma+8, mStepLuma, mcPred+136, 16, coeffMB+3*64, 16, &sE[3]);
for (b = 0; b < 4; b ++) {
if (sE[b] < lim) {
nzCount[b] = 0;
} else {
ippiDCT8x8Fwd_16s_C1I(coeffMB+b*64);
if (trellis || (costRD != NULL))
ippsCopy_16s(coeffMB+b*64, coeffFDCT, 64);
ippiQuantInter_MPEG4_16s_C1I(coeffMB+b*64, mQuantInterSpec, quant, &nzCount[b]);
if (trellis && (nzCount[b] != 0))
nzCount[b] = mp4_TrellisQuant(coeffFDCT, coeffMB+b*64, quant, qmat, scan, nzCount[b]);
if (costRD != NULL) {
if (nzCount[b] != 0) {
costInter += RD_MUL * mp4_CalcBitsCoeffsInter(coeffMB+b*64, nzCount[b], VOL.reversible_vlc, scan);
ippiQuantInvInter_MPEG4_16s_C1I(coeffMB+b*64, 63, mQuantInvInterSpec, quant);
}
int ssd = 0;
Ipp64s n;
ippsNormDiff_L2Sqr_16s64s_Sfs(coeffMB+b*64, coeffFDCT, 64, &n, 0);
ssd = ssd + (int)n;
//for (int i = 0; i < 64; i ++)
// ssd += (coeffMB[b*64+i] - coeffFDCT[i]) * (coeffMB[b*64+i] - coeffFDCT[i]);
costInter += RD_MUL * ssd / (quant * quant);
}
}
}
}
ippiSubSAD8x8_8u16s_C1R(pUc, mStepChroma, mcPred+64*4, 8, coeffMB+4*64, 16, &sE[4]);
ippiSubSAD8x8_8u16s_C1R(pVc, mStepChroma, mcPred+64*5, 8, coeffMB+5*64, 16, &sE[5]);
for (b = 4; b < 6; b ++) {
if (sE[b] < lim) {
nzCount[b] = 0;
} else {
ippiDCT8x8Fwd_16s_C1I(coeffMB+b*64);
if (trellis || (costRD != NULL))
ippsCopy_16s(coeffMB+b*64, coeffFDCT, 64);
ippiQuantInter_MPEG4_16s_C1I(coeffMB+b*64, mQuantInterSpec, quant, &nzCount[b]);
if (trellis && (nzCount[b] != 0))
nzCount[b] = mp4_TrellisQuant(coeffFDCT, coeffMB+b*64, quant, qmat, scan, nzCount[b]);
if (costRD != NULL) {
if (nzCount[b] != 0) {
costInter += RD_MUL * mp4_CalcBitsCoeffsInter(coeffMB+b*64, nzCount[b], VOL.reversible_vlc, scan);
ippiQuantInvInter_MPEG4_16s_C1I(coeffMB+b*64, 63, mQuantInvInterSpec, quant);
}
int ssd = 0;
Ipp64s n;
ippsNormDiff_L2Sqr_16s64s_Sfs(coeffMB+b*64, coeffFDCT, 64, &n, 0);
ssd = ssd + (int)n;
//for (int i = 0; i < 64; i ++)
// ssd += (coeffMB[b*64+i] - coeffFDCT[i]) * (coeffMB[b*64+i] - coeffFDCT[i]);
costInter += RD_MUL * ssd / (quant * quant);
}
}
}
#if 0
__ALIGN16(Ipp16s, coeff, 64*4);
Ipp32s pattern, sU, sV, sL0, sL1, sL2, sL3, lim, off23, s, dctt = 0;
lim = (VOL.quant_type == 0) ? quant * 16 : quant * 12;
ippiSub16x16_8u16s_C1R(pYc, mStepLuma, mcPred, 16, coeff, 32);
if (VOL.interlaced) {
// boundary MBs should be frame-DCT coded
if (row != 0 && col != 0 && row != (mNumMacroBlockPerCol-1) && col != (mNumMacroBlockPerRow-1)) {
Ipp32s sfr, sfi;
ippiFrameFieldSAD16x16_16s32s_C1R(coeff, 32, &sfr, &sfi);
dctt = (sfi + 999) < sfr;
}
}
if (dctt) {
ippiSAD8x8_8u32s_C1R(pYc, 2*mStepLuma, mcPred, 32, &sL0, IPPVC_MC_APX_FF);
ippiSAD8x8_8u32s_C1R(pYc+8, 2*mStepLuma, mcPred+8, 32, &sL1, IPPVC_MC_APX_FF);
ippiSAD8x8_8u32s_C1R(pYc+mStepLuma, 2*mStepLuma, mcPred+16, 32, &sL2, IPPVC_MC_APX_FF);
ippiSAD8x8_8u32s_C1R(pYc+mStepLuma+8, 2*mStepLuma, mcPred+24, 32, &sL3, IPPVC_MC_APX_FF);
off23 = 16;
s = 64;
} else {
ippiSAD8x8_8u32s_C1R(pYc, mStepLuma, mcPred, 16, &sL0, IPPVC_MC_APX_FF);
ippiSAD8x8_8u32s_C1R(pYc+8, mStepLuma, mcPred+8, 16, &sL1, IPPVC_MC_APX_FF);
ippiSAD8x8_8u32s_C1R(pYc+8*mStepLuma, mStepLuma, mcPred+128, 16, &sL2, IPPVC_MC_APX_FF);
ippiSAD8x8_8u32s_C1R(pYc+8*mStepLuma+8, mStepLuma, mcPred+136, 16, &sL3, IPPVC_MC_APX_FF);
off23 = 2*64;
s = 32;
}
*dct_type = dctt;
if (sL0 < lim) {
nzCount[0] = 0;
} else {
ippiDCT8x8Fwd_16s_C1R(coeff, s, coeffMB+0*64);
ippiQuantInter_MPEG4_16s_C1I(coeffMB+0*64, mQuantInterSpec, quant, &nzCount[0]);
}
if (sL1 < lim) {
nzCount[1] = 0;
} else {
ippiDCT8x8Fwd_16s_C1R(coeff+8, s, coeffMB+1*64);
ippiQuantInter_MPEG4_16s_C1I(coeffMB+1*64, mQuantInterSpec, quant, &nzCount[1]);
}
if (sL2 < lim) {
nzCount[2] = 0;
} else {
ippiDCT8x8Fwd_16s_C1R(coeff+off23, s, coeffMB+2*64);
ippiQuantInter_MPEG4_16s_C1I(coeffMB+2*64, mQuantInterSpec, quant, &nzCount[2]);
}
if (sL3 < lim) {
nzCount[3] = 0;
} else {
ippiDCT8x8Fwd_16s_C1R(coeff+off23+8, s, coeffMB+3*64);
ippiQuantInter_MPEG4_16s_C1I(coeffMB+3*64, mQuantInterSpec, quant, &nzCount[3]);
}
ippiSAD8x8_8u32s_C1R(pUc, mStepChroma, mcPred+64*4, 8, &sU, IPPVC_MC_APX_FF);
ippiSAD8x8_8u32s_C1R(pVc, mStepChroma, mcPred+64*5, 8, &sV, IPPVC_MC_APX_FF);
if (sU < lim) {
nzCount[4] = 0;
} else {
ippiSub8x8_8u16s_C1R(pUc, mStepChroma, mcPred+64*4, 8, coeffMB+4*64, 16);
ippiDCT8x8Fwd_16s_C1I(coeffMB+4*64);
ippiQuantInter_MPEG4_16s_C1I(coeffMB+4*64, mQuantInterSpec, quant, &nzCount[4]);
}
if (sV < lim) {
nzCount[5] = 0;
} else {
ippiSub8x8_8u16s_C1R(pVc, mStepChroma, mcPred+64*5, 8, coeffMB+5*64, 16);
ippiDCT8x8Fwd_16s_C1I(coeffMB+5*64);
ippiQuantInter_MPEG4_16s_C1I(coeffMB+5*64, mQuantInterSpec, quant, &nzCount[5]);
}
/*
lim = (VOL.quant_type == 0) ? quant * 16 : quant * 12;
if (!dct_type) {
ippiSubSAD8x8_8u16s_C1R(pYc, mStepLuma, mcPred, 16, coeffMB+0*64, 16, &sL0);
ippiSubSAD8x8_8u16s_C1R(pYc+8, mStepLuma, mcPred+8, 16, coeffMB+1*64, 16, &sL1);
ippiSubSAD8x8_8u16s_C1R(pYc+8*mStepLuma, mStepLuma, mcPred+128, 16, coeffMB+2*64, 16, &sL2);
ippiSubSAD8x8_
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -