📄 h263_enc_frame.cpp
字号:
if (cSAD < bSAD) { i = 1; j = -1; bSAD = cSAD;#ifdef ME_USE_THRESHOLD if (bSAD <= thrSAD_8x8) goto L_01;#endif } } if (yPos < yB) { ippiSAD8x8_8u32s_C1R(pCur, step, pRef+yPos*step+xPos, step, &cSAD, IPPVC_MC_APX_FH); if (cSAD < bSAD) { i = 1; j = 0; bSAD = cSAD;#ifdef ME_USE_THRESHOLD if (bSAD <= thrSAD_8x8) goto L_01;#endif } } if (xPos < xR && yPos < yB) { ippiSAD8x8_8u32s_C1R(pCur, step, pRef+yPos*step+xPos, step, &cSAD, IPPVC_MC_APX_HH); if (cSAD < bSAD) { i = 1; j = 1; bSAD = cSAD; } }#ifdef ME_USE_THRESHOLDL_01:#endif mv->dx = (Ipp16s)((xPos << 1) + j); mv->dy = (Ipp16s)((yPos << 1) + i); *bestSAD = bSAD;}static void h263_ME_HalfPel_SAD_Fast_8x8(Ipp8u *pCur, Ipp8u *pRef, int step, int xL, int xR, int yT, int yB, int xPos, int yPos, int *bSad, IppMotionVector *mv, int *sad, int xP, int yP){ int i, j; Ipp32s m0, m1, m2, m3, m4; m0 = *bSad; j = xPos; i = yPos - 1; if (j >= xL && j <= xR && i >= yT && i <= yB) { if (j >= xP - rangeME_8x8 && j <= xP + rangeME_8x8 && i >= yP - rangeME_8x8 && i <= yP + rangeME_8x8) m1 = sad[(i - yP + rangeME_8x8) * (rangeME_8x8*2+1) + j - xP + rangeME_8x8]; else ippiSAD8x8_8u32s_C1R(pCur, step, pRef+i*step+j, step, &m1, IPPVC_MC_APX_FF); } else m1 = 1000000; j = xPos; i = yPos + 1; if (j >= xL && j <= xR && i >= yT && i <= yB) { if (j >= xP - rangeME_8x8 && j <= xP + rangeME_8x8 && i >= yP - rangeME_8x8 && i <= yP + rangeME_8x8) m2 = sad[(i - yP + rangeME_8x8) * (rangeME_8x8*2+1) + j - xP + rangeME_8x8]; else ippiSAD8x8_8u32s_C1R(pCur, step, pRef+i*step+j, step, &m2, IPPVC_MC_APX_FF); } else m2 = 1000000; j = xPos - 1; i = yPos; if (j >= xL && j <= xR && i >= yT && i <= yB) { if (j >= xP - rangeME_8x8 && j <= xP + rangeME_8x8 && i >= yP - rangeME_8x8 && i <= yP + rangeME_8x8) m3 = sad[(i - yP + rangeME_8x8) * (rangeME_8x8*2+1) + j - xP + rangeME_8x8]; else ippiSAD8x8_8u32s_C1R(pCur, step, pRef+i*step+j, step, &m3, IPPVC_MC_APX_FF); } else m3 = 1000000; j = xPos + 1; i = yPos; if (j >= xL && j <= xR && i >= yT && i <= yB) { if (j >= xP - rangeME_8x8 && j <= xP + rangeME_8x8 && i >= yP - rangeME_8x8 && i <= yP + rangeME_8x8) m4 = sad[(i - yP + rangeME_8x8) * (rangeME_8x8*2+1) + j - xP + rangeME_8x8]; else ippiSAD8x8_8u32s_C1R(pCur, step, pRef+i*step+j, step, &m4, IPPVC_MC_APX_FF); } else m4 = 1000000; if (2*(m3-m0) < (m4-m0)) { j = -1; } else if ((m3-m0) > 2*(m4-m0)) { j = +1; } else j = 0; if (2*(m1-m0) < (m2-m0)) { i = -1; } else if ((m1-m0) > 2*(m2-m0)) { i = +1; } else i = 0; mv->dx = (Ipp16s)((xPos << 1) + j); mv->dy = (Ipp16s)((yPos << 1) + i); if (j != 0 || i != 0) { ippiSAD8x8_8u32s_C1R(pCur, step, pRef+h263_MC_offs(mv, step), step, bSad, h263_MC_type(mv)); if (*bSad > m0) { // false choosing mv->dx = (Ipp16s)(xPos << 1); mv->dy = (Ipp16s)(yPos << 1); *bSad = m0; } }}#define FIND_MV_LIMITS_UMV1(mvPred, x, y) \{ \ if (mvPred.dx <= -32) { \ xL = -IPP_MIN(x * 16 + 16, 31); \ xR = 0; \ } else if (mvPred.dx <= 32) { \ xL = -IPP_MIN(x * 16 + 16, (32 - mvPred.dx) >> 1); \ xR = IPP_MIN((mNumMacroBlockPerRow - x - 1) * 16 + 15, (mvPred.dx + 31) >> 1); \ } else { \ xL = 0; \ xR = IPP_MIN((mNumMacroBlockPerRow - x - 1) * 16 + 15, 31); \ } \ if (mvPred.dy <= -32) { \ yT = -IPP_MIN(y * 16 + 16, 31); \ yB = 0; \ } else if (mvPred.dy <= 32) { \ yT = -IPP_MIN(y * 16 + 16, (32 - mvPred.dy) >> 1); \ yB = IPP_MIN((mNumMacroBlockPerCol - y - 1) * 16 + 15, (mvPred.dy + 31) >> 1); \ } else { \ yT = 0; \ yB = IPP_MIN((mNumMacroBlockPerCol - y - 1) * 16 + 15, 31); \ } \}void ippVideoEncoderH263::ME_SAD_8x8(Ipp8u *pCur, Ipp8u *pRef, int step, int xL0, int xR0, int yT0, int yB0, int *bestSAD, IppMotionVector *mv, IppMotionVector *mv4, int x, int y, IppMotionVector *mvNeighb){ Ipp32s xPos[4], yPos[4], bSad[4], sad[4][(rangeME_8x8*2+1)*(rangeME_8x8*2+1)], pOff[4], xP[4], yP[4]; IppMotionVector mvPred; int i, j, k, n, iPos, jPos; int xL, xR, yT, yB;#ifdef ME_USE_THRESHOLD Ipp32s sadThresh[4] = {0, 0, 0, 0};#endif xP[0] = h263_Div2(mv->dx); yP[0] = h263_Div2(mv->dy); xL = xL0; xR = xR0; yT = yT0; yB = yB0; bSad[0] = bSad[1] = bSad[2] = bSad[3] = MAX_SAD; pOff[0] = 0; pOff[1] = 8; pOff[2] = step * 8; pOff[3] = step * 8 + 8; if (mMEfastHP) memset(sad, -1, 4*(rangeME_8x8*2+1)*(rangeME_8x8*2+1)*sizeof(Ipp32s)); for (n = 0; n < 4; n ++) { k = 0; if (n) { if (n == 1) { mvPred.dx = h263_Median(xPos[0] << 1, mvNeighb[1].dx, mvNeighb[2].dx); mvPred.dy = h263_Median(yPos[0] << 1, mvNeighb[1].dy, mvNeighb[2].dy); } else if (n == 2) { mvPred.dx = h263_Median(xPos[0] << 1, xPos[1] << 1, mvNeighb[3].dx); mvPred.dy = h263_Median(yPos[0] << 1, yPos[1] << 1, mvNeighb[3].dy); } else { mvPred.dx = h263_Median(xPos[0] << 1, xPos[1] << 1, xPos[2] << 1); mvPred.dy = h263_Median(yPos[0] << 1, yPos[1] << 1, yPos[2] << 1); } xP[n] = h263_Div2(mvPred.dx); yP[n] = h263_Div2(mvPred.dy); if (VOP.UMV == 1) FIND_MV_LIMITS_UMV1(mvPred, x, y); } for (iPos = -rangeME_8x8; iPos <= rangeME_8x8; iPos ++) { i = yP[n] + iPos; if (i >= yT && i <= yB) { for (jPos = -rangeME_8x8; jPos <= rangeME_8x8; jPos ++) { j = xP[n] + jPos; if (j >= xL && j <= xR) { ippiSAD8x8_8u32s_C1R(pCur+pOff[n], step, pRef+pOff[n]+i*step+j, step, &sad[n][k], IPPVC_MC_APX_FF); if (bSad[n] > sad[n][k]) { bSad[n] = sad[n][k]; xPos[n] = j; yPos[n] = i;#ifdef ME_USE_THRESHOLD if (sad[n][k] <= mMEthrSAD8x8) { sadThresh[n] = 1; goto L_1; }#endif } } k ++; } } else k += (rangeME_8x8*2+1); }#ifdef ME_USE_THRESHOLDL_1:;#endif } if (mMEaccuracy == 1) { for (n = 0; n < 4; n ++) { mv4[n].dx = (Ipp16s)(xPos[n] << 1); mv4[n].dy = (Ipp16s)(yPos[n] << 1); } } else { xL = xL0; xR = xR0; yT = yT0; yB = yB0; if (mMEalgorithm == 1 && mMEfastHP) { for (n = 0; n < 4; n ++) { if (VOP.UMV == 1 && n) { if (n == 1) { mvPred.dx = h263_Median(mv4[0].dx, mvNeighb[1].dx, mvNeighb[2].dx); mvPred.dy = h263_Median(mv4[0].dy, mvNeighb[1].dy, mvNeighb[2].dy); } else if (n == 2) { mvPred.dx = h263_Median(mv4[0].dx, mv4[1].dx, mvNeighb[3].dx); mvPred.dy = h263_Median(mv4[0].dy, mv4[1].dy, mvNeighb[3].dy); } else { mvPred.dx = h263_Median(mv4[0].dx, mv4[1].dx, mv4[2].dx); mvPred.dy = h263_Median(mv4[0].dy, mv4[1].dy, mv4[2].dy); } FIND_MV_LIMITS_UMV1(mvPred, x, y); } h263_ME_HalfPel_SAD_Fast_8x8(pCur+pOff[n], pRef+pOff[n], step, xL, xR, yT, yB, xPos[n], yPos[n], &bSad[n], &mv4[n], sad[n], xP[n], yP[n]); } } else for (n = 0; n < 4; n ++) { if (VOP.UMV == 1 && n) { if (n == 1) { mvPred.dx = h263_Median(mv4[0].dx, mvNeighb[1].dx, mvNeighb[2].dx); mvPred.dy = h263_Median(mv4[0].dy, mvNeighb[1].dy, mvNeighb[2].dy); } else if (n == 2) { mvPred.dx = h263_Median(mv4[0].dx, mv4[1].dx, mvNeighb[3].dx); mvPred.dy = h263_Median(mv4[0].dy, mv4[1].dy, mvNeighb[3].dy); } else { mvPred.dx = h263_Median(mv4[0].dx, mv4[1].dx, mv4[2].dx); mvPred.dy = h263_Median(mv4[0].dy, mv4[1].dy, mv4[2].dy); } FIND_MV_LIMITS_UMV1(mvPred, x, y); }#ifdef ME_USE_THRESHOLD if (!sadThresh[n]) h263_ME_HalfPel_SAD_8x8(pCur+pOff[n], pRef+pOff[n], step, xL, xR, yT, yB, xPos[n], yPos[n], &bSad[n], &mv4[n], mMEthrSAD8x8); else { mv4[n].dx = (Ipp16s)(xPos[n] << 1); mv4[n].dy = (Ipp16s)(yPos[n] << 1); }#else h263_ME_HalfPel_SAD_8x8(pCur+pOff[n], pRef+pOff[n], step, xL, xR, yT, yB, xPos[n], yPos[n], &bSad[n], &mv4[n], mMEthrSAD8x8);#endif } } *bestSAD = bSad[0] + bSad[1] + bSad[2] + bSad[3];}#ifdef ME_USE_THRESHOLD#define SAD_ZERO mMEthrSAD16x16#else#define SAD_ZERO 0#endifvoid ippVideoEncoderH263::EncodePVOP(){ __ALIGN16(Ipp16s, coeffMB, 64*10); __ALIGN16(Ipp8u, mcPred, 64*6); Ipp8u *pYc, *pUc, *pVc, *pYf, *pUf, *pVf, *pF[6]; int i, j, quant, pattern, xL, xR, yT, yB; Ipp32s nzCount[6], dev, bestSAD, sadU, sadV, sadL0, sadL1, sadL2, sadL3, ncThrL, ncThrC, sadFavorInter; Ipp32s bestSAD16x16, bestSAD8x8; h263_MacroBlock *MBcurr = MBinfo; int scan; int maxMVx, maxMVy; IppMotionVector mvLuma4[4], mvChroma, mvPred[4]; int frGOB = 0; int rt = VOP.vop_rounding_type; IppiRect limitRectL, limitRectC; IppMotionVector mvLuma; mME4mv = VOP.advPred | VOP.deblockFilt;#ifdef ME_USE_THRESHOLD // setup thresholds for ME mMEthrSAD16x16 = VOP.vop_quant >= 6 ? 256 : (4 << VOP.vop_quant); mMEthrSAD8x8 = mMEthrSAD16x16 >> 2; mMEthrSAD16x8 = mMEthrSAD16x16 >> 1;#endif if (1) {// encode short_video_header P-VOP int nmb, nmbf; limitRectL.x = -16; limitRectL.y = -16; limitRectL.width = mNumMacroBlockPerRow * 16 + 32; limitRectL.height = mNumMacroBlockPerCol * 16 + 32; limitRectC.x = -8; limitRectC.y = -8; limitRectC.width = mNumMacroBlockPerRow * 8 + 16; limitRectC.height = mNumMacroBlockPerCol * 8 + 16; if (VOP.UMV == 2) { if (VOP.vop_width <= 352) maxMVx = 16; else if (VOP.vop_width <= 704) maxMVx = 32; else if (VOP.vop_width <= 1408) maxMVx = 64; else maxMVx = 128; if (VOP.vop_height <= 288) maxMVy = 16; else if (VOP.vop_width <= 576) maxMVy = 32; else maxMVy = 64; } quant = VOP.vop_quant; MBcurr = MBinfo; for (i = 0; i < mNumMacroBlockPerCol; i ++) { pYc = mCurrPtrY + i * 16 * mStepLuma; pYf = mForwPtrY + i * 16 * mStepLuma; pUc = mCurrPtrU + i * 8 * mStepChroma; pUf = mForwPtrU + i * 8 * mStepChroma; pVc = mCurrPtrV + i * 8 * mStepChroma; pVf = mForwPtrV + i * 8 * mStepChroma; if (!VOP.UMV) { yT = (i | VOP.advPred) ? -16 : 0; yB = ((mNumMacroBlockPerCol - i - 1) | VOP.advPred) ? 15 : 0; } else if (VOP.UMV == 2) { yT = -IPP_MIN(i * 16 + 15, maxMVy); yB = IPP_MIN((mNumMacroBlockPerCol - i - 1)*16 + 15, maxMVy - 1); } else if (VOP.UMV == 3) { yT = -(i * 16 + 15); yB = (mNumMacroBlockPerCol - i - 1)*16 + 15; } for (j = 0; j < mNumMacroBlockPerRow; j ++) { // calc SAD for Y, U,V blocks at (0,0) ippiSAD8x8_8u32s_C1R(pYc, mStepLuma, pYf, mStepLuma, &sadL0, IPPVC_MC_APX_FF); ippiSAD8x8_8u32s_C1R(pYc+8, mStepLuma, pYf+8, mStepLuma, &sadL1, IPPVC_MC_APX_FF); ippiSAD8x8_8u32s_C1R(pYc+8*mStepLuma, mStepLuma, pYf+8*mStepLuma, mStepLuma, &sadL2, IPPVC_MC_APX_FF); ippiSAD8x8_8u32s_C1R(pYc+8*mStepLuma+8, mStepLuma, pYf+8*mStepLuma+8, mStepLuma, &sadL3, IPPVC_MC_APX_FF); ippiSAD8x8_8u32s_C1R(pUc, mStepChroma, pUf, mStepChroma, &sadU, IPPVC_MC_APX_FF); ippiSAD8x8_8u32s_C1R(pVc, mStepChroma, pVf, mStepChroma, &sadV, IPPVC_MC_APX_FF); // not_coded decision ncThrL = quant * SAD_NOTCODED_THR_LUMA; ncThrC = quant * SAD_NOTCODED_THR_CHROMA; if ((sadL0 < ncThrL) && (sadL1 < ncThrL) && (sadL2 < ncThrL) && (sadL3 < ncThrL) && (sadU < ncThrC) && (sadV < ncThrC)) { MBcurr->type = IPPVC_MBTYPE_INTER; // ??? IPPVC_MB_STUFFING MBcurr->not_coded = 1; MBcurr->mv[0].dx = MBcurr->mv[0].dy = 0; if (mME4mv) MBcurr->mv[1].dx = MBcurr->mv[1].dy = MBcurr->mv[2].dx = MBcurr->mv[2].dy = MBcurr->mv[3].dx = MBcurr->mv[3].dy = 0; } else { MBcurr->not_coded = 0; // SAD at (0,0) bestSAD = sadL0 + sadL1 + sadL2 + sadL3; PredictMV(MBcurr, frGOB, i, j, mME4mv, &mvLuma); if (!VOP.UMV) { xL = (j | VOP.advPred) ? -16 : 0; xR = ((mNumMacroBlockPerRow - j - 1) | VOP.advPred) ? 15 : 0; } else if (VOP.UMV == 1) { // ??? To pad with 32 for UMV w/out PLUSPTYPE: can save some bits on MV coding at borders when predictors are large ??? if (mvLuma.dx <= -32) { xL = -IPP_MIN(j * 16 + 16, 31); xR = 0; } else if (mvLuma.dx <= 32) { xL = -IPP_MIN(j * 16 + 16, (32 - mvLuma.dx) >> 1); xR = IPP_MIN((mNumMacroBlockPerRow - j - 1) * 16 + 15, (mvLuma.dx + 31) >> 1); } else { xL = 0; xR = IPP_MIN((mNumMacroBlockPerRow - j - 1) * 16 + 15, 31); } if (mvLuma.dy <= -32) { yT = -IPP_MIN(i * 16 + 16, 31); yB = 0; } else if (mvLuma.dy <= 32) { yT = -IPP_MIN(i * 16 + 16, (32 - mvLuma.dy) >> 1); yB = IPP_MIN((mNumMacroBlockPerCol - i - 1) * 16 + 15, (mvLuma.dy + 31) >> 1); } else { yT = 0; yB = IPP_MIN((mNumMacroBlockPerCol - i - 1) * 16 + 15, 31); } } else if (VOP.UMV == 2) { xL = -IPP_MIN(j * 16 + 15, maxMVx); xR = IPP_MIN((mNumMacroBlockPerRow - j - 1)*16 + 15, maxMVx - 1); } else { // unlimited MV, UUI == 01 xL = -(j * 16 + 15); xR = (mNumMacroBlockPerRow - j - 1)*16 + 15; } mvPred[0] = mvLuma; ME_SAD_16x16(pYc, pYf, mStepLuma, xL, xR, yT, y
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -