📄 h263_enc_frame.cpp
字号:
}
static void h263e_ME_FullSearch_SAD_16x16(Ipp8u *pCur, Ipp8u *pRef, Ipp32s step, Ipp32s xL, Ipp32s xR,
Ipp32s yT, Ipp32s yB, Ipp32s *xPos, Ipp32s *yPos, Ipp32s *bestSAD, Ipp32s thrSAD_16x16)
{
Ipp32s cSAD, bSAD, i, j, jPos, iPos;
jPos = *xPos;
iPos = *yPos;
bSAD = *bestSAD;
#ifdef ME_FULLSEARCH_RECT
pRef += yT * step;
for (i = yT; i <= yB; i ++) {
for (j = xL; j <= xR; j ++) {
ippiSAD16x16_8u32s(pCur, step, pRef+j, step, &cSAD, IPPVC_MC_APX_FF);
if (cSAD < bSAD) {
bSAD = cSAD;
iPos = i;
jPos = j;
#ifdef ME_USE_THRESHOLD
if (cSAD <= thrSAD_16x16) goto L_01;
#endif
}
}
pRef += step;
}
#else // involute search
Ipp32s nLoop, cLoop, lPos;
i = iPos;
j = jPos;
pRef += iPos * step + jPos;
nLoop = IPP_MAX(IPP_MAX(jPos - xL, xR - jPos), IPP_MAX(iPos - yT, yB - iPos));
for (cLoop = 1; cLoop <= nLoop; cLoop ++) {
i --;
j --;
pRef -= step + 1;
for (lPos = 0; lPos < (cLoop << 3); lPos ++) {
if (j >= xL && j <= xR && i >= yT && i <= yB) {
ippiSAD16x16_8u32s(pCur, step, pRef, step, &cSAD, IPPVC_MC_APX_FF);
if (cSAD < bSAD) {
bSAD = cSAD;
iPos = i;
jPos = j;
#ifdef ME_USE_THRESHOLD
if (cSAD <= thrSAD_16x16) goto L_01;
#endif
}
}
if (lPos < (cLoop * 2)) {
j ++; pRef ++;
} else if (lPos < (cLoop * 4)) {
i ++; pRef += step;
} else if (lPos < (cLoop * 6)) {
j --; pRef --;
} else {
i --; pRef -= step;
}
}
}
#endif
#ifdef ME_USE_THRESHOLD
L_01:
#endif
*bestSAD = bSAD;
*xPos = jPos;
*yPos = iPos;
}
static const Ipp32s bdJ[9] = {0, -2, -1, 0, 1, 2, 1, 0, -1},
bdI[9] = {0, 0, -1, -2, -1, 0, 1, 2, 1},
sdJ[5] = {0, -1, 0, 1, 0},
sdI[5] = {0, 0, -1, 0, 1};
static void h263e_ME_DiamondSearch_SAD_16x16(Ipp8u *pCur, Ipp8u *pRef, Ipp32s step, Ipp32s xL, Ipp32s xR,
Ipp32s yT, Ipp32s yB, Ipp32s *xPos, Ipp32s *yPos, Ipp32s *bestSAD,
Ipp32s *sadBuf, Ipp32s thrSAD_16x16)
{
Ipp32s cSAD, bSAD, i, j, k, l, iPos, jPos, swWidth, swHeight;
jPos = *xPos;
iPos = *yPos;
bSAD = *bestSAD;
swWidth = (xR - xL + 1);
swHeight = (yB - yT + 1);
ippsSet_8u((Ipp8u)-1, (Ipp8u*)sadBuf, swWidth * swHeight * 4);
sadBuf[(iPos-yT)*swWidth+jPos-xL] = bSAD;
for (;;) {
// find SAD at big diamond
l = 0;
for (k = 1; k < 9; k ++) {
j = jPos + bdJ[k];
i = iPos + bdI[k];
if (j >= xL && j <= xR && i >= yT && i <= yB) {
if (sadBuf[(i-yT)*swWidth+j-xL] == -1) {
ippiSAD16x16_8u32s(pCur, step, pRef+i*step+j, step, &cSAD, IPPVC_MC_APX_FF);
#ifdef ME_USE_THRESHOLD
if (cSAD <= thrSAD_16x16) {
*xPos = j;
*yPos = i;
*bestSAD = cSAD;
return;
}
#endif
sadBuf[(i-yT)*swWidth+j-xL] = cSAD;
if (cSAD < bSAD) {
l = k;
bSAD = cSAD;
}
}
}
}
if (l == 0) {
// find SAD at small diamond
for (k = 1; k <= 4; k ++) {
j = jPos + sdJ[k];
i = iPos + sdI[k];
if (j >= xL && j <= xR && i >= yT && i <= yB) {
if (sadBuf[(i-yT)*swWidth+j-xL] == -1) {
ippiSAD16x16_8u32s(pCur, step, pRef+i*step+j, step, &cSAD, IPPVC_MC_APX_FF);
#ifdef ME_USE_THRESHOLD
if (cSAD <= thrSAD_16x16) {
*xPos = j;
*yPos = i;
*bestSAD = cSAD;
return;
}
#endif
sadBuf[(i-yT)*swWidth+j-xL]= cSAD;
if (cSAD < bSAD) {
l = k;
bSAD = cSAD;
}
}
}
}
*xPos = jPos + sdJ[l];
*yPos = iPos + sdI[l];
*bestSAD = bSAD;
return;
}
iPos += bdI[l];
jPos += bdJ[l];
}
}
static void h263e_ME_HalfPel_SAD_16x16(Ipp8u *pCur, Ipp8u *pRef, Ipp32s step, Ipp32s xL, Ipp32s xR,
Ipp32s yT, Ipp32s yB, Ipp32s xPos, Ipp32s yPos, Ipp32s *bestSAD,
IppMotionVector *mv, Ipp32s thrSAD_16x16)
{
Ipp32s cSAD, bSAD, i, j;
i = 0; j = 0; bSAD = *bestSAD;
if (xPos > xL && yPos > yT) {
ippiSAD16x16_8u32s(pCur, step, pRef+(yPos-1)*step+xPos-1, step, &cSAD, IPPVC_MC_APX_HH);
if (cSAD < bSAD) {
i = -1; j = -1; bSAD = cSAD;
#ifdef ME_USE_THRESHOLD
if (bSAD <= thrSAD_16x16) goto L_01;
#endif
}
}
if (yPos > yT) {
ippiSAD16x16_8u32s(pCur, step, pRef+(yPos-1)*step+xPos, step, &cSAD, IPPVC_MC_APX_FH);
if (cSAD < bSAD) {
i = -1; j = 0; bSAD = cSAD;
#ifdef ME_USE_THRESHOLD
if (bSAD <= thrSAD_16x16) goto L_01;
#endif
}
}
if (xPos < xR && yPos > yT) {
ippiSAD16x16_8u32s(pCur, step, pRef+(yPos-1)*step+xPos, step, &cSAD, IPPVC_MC_APX_HH);
if (cSAD < bSAD) {
i = -1; j = 1; bSAD = cSAD;
#ifdef ME_USE_THRESHOLD
if (bSAD <= thrSAD_16x16) goto L_01;
#endif
}
}
if (xPos > xL) {
ippiSAD16x16_8u32s(pCur, step, pRef+yPos*step+xPos-1, step, &cSAD, IPPVC_MC_APX_HF);
if (cSAD < bSAD) {
i = 0; j = -1; bSAD = cSAD;
#ifdef ME_USE_THRESHOLD
if (bSAD <= thrSAD_16x16) goto L_01;
#endif
}
}
if (xPos < xR) {
ippiSAD16x16_8u32s(pCur, step, pRef+yPos*step+xPos, step, &cSAD, IPPVC_MC_APX_HF);
if (cSAD < bSAD) {
i = 0; j = 1; bSAD = cSAD;
#ifdef ME_USE_THRESHOLD
if (bSAD <= thrSAD_16x16) goto L_01;
#endif
}
}
if (xPos > xL && yPos < yB) {
ippiSAD16x16_8u32s(pCur, step, pRef+yPos*step+xPos-1, step, &cSAD, IPPVC_MC_APX_HH);
if (cSAD < bSAD) {
i = 1; j = -1; bSAD = cSAD;
#ifdef ME_USE_THRESHOLD
if (bSAD <= thrSAD_16x16) goto L_01;
#endif
}
}
if (yPos < yB) {
ippiSAD16x16_8u32s(pCur, step, pRef+yPos*step+xPos, step, &cSAD, IPPVC_MC_APX_FH);
if (cSAD < bSAD) {
i = 1; j = 0; bSAD = cSAD;
#ifdef ME_USE_THRESHOLD
if (bSAD <= thrSAD_16x16) goto L_01;
#endif
}
}
if (xPos < xR && yPos < yB) {
ippiSAD16x16_8u32s(pCur, step, pRef+yPos*step+xPos, step, &cSAD, IPPVC_MC_APX_HH);
if (cSAD < bSAD) {
i = 1; j = 1; bSAD = cSAD;
}
}
#ifdef ME_USE_THRESHOLD
L_01:
#endif
mv->dx = (Ipp16s)((xPos << 1) + j);
mv->dy = (Ipp16s)((yPos << 1) + i);
*bestSAD = bSAD;
}
static void h263e_ME_HalfPel_SAD_Fast_16x16(Ipp8u *pCur, Ipp8u *pRef, Ipp32s step, Ipp32s xL, Ipp32s xR,
Ipp32s yT, Ipp32s yB, Ipp32s xPos, Ipp32s yPos, Ipp32s *bestSAD,
IppMotionVector *mv, Ipp32s *sadBuff)
{
Ipp32s i, j, swWidth, swHeight;
Ipp32s m0, m1, m2, m3, m4;
m0 = *bestSAD;
swWidth = (xR - xL + 1);
swHeight = (yB - yT + 1);
j = xPos;
i = yPos - 1;
if (j >= xL && j <= xR && i >= yT && i <= yB) {
m1 = sadBuff[(i-yT)*swWidth+j-xL];
if (m1 == -1)
ippiSAD16x16_8u32s(pCur, step, pRef+i*step+j, step, &m1, IPPVC_MC_APX_FF);
} else
m1 = 1000000;
j = xPos;
i = yPos + 1;
if (j >= xL && j <= xR && i >= yT && i <= yB) {
m2 = sadBuff[(i-yT)*swWidth+j-xL];
if (m2 == -1)
ippiSAD16x16_8u32s(pCur, step, pRef+i*step+j, step, &m2, IPPVC_MC_APX_FF);
} else
m2 = 1000000;
j = xPos - 1;
i = yPos;
if (j >= xL && j <= xR && i >= yT && i <= yB) {
m3 = sadBuff[(i-yT)*swWidth+j-xL];
if (m3 == -1)
ippiSAD16x16_8u32s(pCur, step, pRef+i*step+j, step, &m3, IPPVC_MC_APX_FF);
} else
m3 = 1000000;
j = xPos + 1;
i = yPos;
if (j >= xL && j <= xR && i >= yT && i <= yB) {
m4 = sadBuff[(i-yT)*swWidth+j-xL];
if (m4 == -1)
ippiSAD16x16_8u32s(pCur, step, pRef+i*step+j, step, &m4, IPPVC_MC_APX_FF);
} else
m4 = 1000000;
if (2*(m3-m0) < (m4-m0)) {
j = -1;
} else if ((m3-m0) > 2*(m4-m0)) {
j = +1;
} else
j = 0;
if (2*(m1-m0) < (m2-m0)) {
i = -1;
} else if ((m1-m0) > 2*(m2-m0)) {
i = +1;
} else
i = 0;
mv->dx = (Ipp16s)((xPos << 1) + j);
mv->dy = (Ipp16s)((yPos << 1) + i);
if (j != 0 || i != 0) {
ippiSAD16x16_8u32s(pCur, step, pRef+h263e_MC_offs(mv, step), step, bestSAD, h263e_MC_type(mv));
if (*bestSAD > m0) {
// false choosing
mv->dx = (Ipp16s)(xPos << 1);
mv->dy = (Ipp16s)(yPos << 1);
*bestSAD = m0;
}
}
}
void ippVideoEncoderH263::ME_SAD_16x16(Ipp8u *pCur, Ipp8u *pRef, Ipp32s step, Ipp32s xL, Ipp32s xR,
Ipp32s yT, Ipp32s yB, Ipp32s *bestSAD, IppMotionVector *mv)
{
Ipp32s xPos, yPos, sadL;
// mv - Predicted MV
// bestSAD is the SAD at (0,0)
#ifdef ME_USE_THRESHOLD
if (*bestSAD <= mMEthrSAD16x16) {
mv->dx = mv->dy = 0;
return;
}
#endif
xPos = h263e_Div2(mv->dx);
yPos = h263e_Div2(mv->dy);
// check init mv is in rect
if (xPos < xL || xPos > xR || yPos < yT || yPos > yB) {
// reset to (0,0)
xPos = yPos = 0;
} else {
// calc SAD at Predict (intpel acc)
if (xPos != 0 || yPos != 0) {
ippiSAD16x16_8u32s(pCur, mStepLuma, pRef+yPos*mStepLuma+xPos, mStepLuma, &sadL, IPPVC_MC_APX_FF);
if (sadL - SAD_FAVOR_PRED < *bestSAD) {
// choose Predict as origin for ME
*bestSAD = sadL;
} else {
// choose (0,0) as origin for ME
xPos = yPos = 0;
}
}
}
xL = IPP_MAX(xL, xPos - mPPicsearchHor);
xR = IPP_MIN(xR, xPos + mPPicsearchHor);
yT = IPP_MAX(yT, yPos - mPPicsearchVer);
yB = IPP_MIN(yB, yPos + mPPicsearchVer);
*bestSAD -= SAD_FAVOR_ZERO;
sadL = *bestSAD;
#ifdef ME_USE_THRESHOLD
if (*bestSAD > mMEthrSAD16x16) {
#endif
if (mMEalgorithm == 0)
h263e_ME_FullSearch_SAD_16x16(pCur, pRef, step, xL, xR, yT, yB, &xPos, &yPos, bestSAD, mMEthrSAD16x16);
else
h263e_ME_DiamondSearch_SAD_16x16(pCur, pRef, step, xL, xR, yT, yB, &xPos, &yPos, bestSAD, mMEfastSAD, mMEthrSAD16x16);
#ifdef ME_USE_THRESHOLD
}
if (mMEaccuracy == 1 || *bestSAD <= mMEthrSAD16x16) {
mv->dx = (Ipp16s)(xPos << 1);
mv->dy = (Ipp16s)(yPos << 1);
if (mMEaccuracy > 2) {
// quarter pel
mv->dx = (Ipp16s)(mv->dx << 1);
mv->dy = (Ipp16s)(mv->dy << 1);
}
#else
if (mMEaccuracy == 1) {
mv->dx = (Ipp16s)(xPos << 1);
mv->dy = (Ipp16s)(yPos << 1);
#endif
} else {
if (mMEalgorithm == 1 && mMEfastHP)
h263e_ME_HalfPel_SAD_Fast_16x16(pCur, pRef, step, xL, xR, yT, yB, xPos, yPos, bestSAD, mv, mMEfastSAD);
else
h263e_ME_HalfPel_SAD_16x16(pCur, pRef, step, xL, xR, yT, yB, xPos, yPos, bestSAD, mv, mMEthrSAD16x16);
}
if (*bestSAD == sadL)
*bestSAD += SAD_FAVOR_ZERO;
}
static void h263e_ME_HalfPel_SAD_8x8(Ipp8u *pCur, Ipp8u *pRef, Ipp32s step, Ipp32s xL, Ipp32s xR,
Ipp32s yT, Ipp32s yB, Ipp32s xPos, Ipp32s yPos, Ipp32s *bestSAD,
IppMotionVector *mv, Ipp32s thrSAD_8x8)
{
Ipp32s cSAD, bSAD, i, j;
i = 0; j = 0; bSAD = *bestSAD;
if (xPos > xL && yPos > yT) {
ippiSAD8x8_8u32s_C1R(pCur, step, pRef+(yPos-1)*step+xPos-1, step, &cSAD, IPPVC_MC_APX_HH);
if (cSAD < bSAD) {
i = -1; j = -1; bSAD = cSAD;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -