📄 umc_h264_me_new.cpp
字号:
sad = SATD8x16(pCur, pitchPixelsCur*sizeof(PixType), pRef, pitchPixelsRef*sizeof(PixType));
break;
case BS_8x8:
sad = SATD8x8(pCur, pitchPixelsCur*sizeof(PixType), pRef, pitchPixelsRef*sizeof(PixType));
break;
case BS_8x4:
sad = SATD8x4(pCur, pitchPixelsCur*sizeof(PixType), pRef, pitchPixelsRef*sizeof(PixType));
break;
case BS_4x8:
sad = SATD4x8(pCur, pitchPixelsCur*sizeof(PixType), pRef, pitchPixelsRef*sizeof(PixType));
break;
case BS_4x4:
sad = SATD4x4(pCur, pitchPixelsCur*sizeof(PixType), pRef, pitchPixelsRef*sizeof(PixType));
break;
case BS_4x2:
{
Ipp32s d0 = pCur[0] - pRef[0];
Ipp32s d1 = pCur[1] - pRef[1];
Ipp32s d2 = pCur[pitchPixelsCur+0] - pRef[pitchPixelsRef+0];
Ipp32s d3 = pCur[pitchPixelsCur+1] - pRef[pitchPixelsRef+1];
Ipp32s a0 = d0 + d2;
Ipp32s a1 = d1 + d3;
Ipp32s a2 = d0 - d2;
Ipp32s a3 = d1 - d3;
sad = ABS(a0 + a1) + ABS(a0 - a1) + ABS(a2 + a3) + ABS(a2 - a3);
d0 = pCur[2] - pRef[2];
d1 = pCur[3] - pRef[3];
d2 = pCur[pitchPixelsCur+2] - pRef[pitchPixelsRef+2];
d3 = pCur[pitchPixelsCur+3] - pRef[pitchPixelsRef+3];
a0 = d0 + d2;
a1 = d1 + d3;
a2 = d0 - d2;
a3 = d1 - d3;
sad += ABS(a0 + a1) + ABS(a0 - a1) + ABS(a2 + a3) + ABS(a2 - a3);
break;
}
case BS_2x4:
{
Ipp32s d0 = pCur[0] - pRef[0];
Ipp32s d1 = pCur[1] - pRef[1];
Ipp32s d2 = pCur[pitchPixelsCur+0] - pRef[pitchPixelsRef+0];
Ipp32s d3 = pCur[pitchPixelsCur+1] - pRef[pitchPixelsRef+1];
Ipp32s a0 = d0 + d2;
Ipp32s a1 = d1 + d3;
Ipp32s a2 = d0 - d2;
Ipp32s a3 = d1 - d3;
sad = ABS(a0 + a1) + ABS(a0 - a1) + ABS(a2 + a3) + ABS(a2 - a3);
pCur += pitchPixelsCur * 2;
pRef += pitchPixelsRef * 2;
d0 = pCur[0] - pRef[0];
d1 = pCur[1] - pRef[1];
d2 = pCur[pitchPixelsCur+0] - pRef[pitchPixelsRef+0];
d3 = pCur[pitchPixelsCur+1] - pRef[pitchPixelsRef+1];
a0 = d0 + d2;
a1 = d1 + d3;
a2 = d0 - d2;
a3 = d1 - d3;
sad += ABS(a0 + a1) + ABS(a0 - a1) + ABS(a2 + a3) + ABS(a2 - a3);
break;
}
case BS_2x2:
{
Ipp32s d0 = pCur[0] - pRef[0];
Ipp32s d1 = pCur[1] - pRef[1];
Ipp32s d2 = pCur[pitchPixelsCur+0] - pRef[pitchPixelsRef+0];
Ipp32s d3 = pCur[pitchPixelsCur+1] - pRef[pitchPixelsRef+1];
Ipp32s a0 = d0 + d2;
Ipp32s a1 = d1 + d3;
Ipp32s a2 = d0 - d2;
Ipp32s a3 = d1 - d3;
sad = ABS(a0 + a1) + ABS(a0 - a1) + ABS(a2 + a3) + ABS(a2 - a3);
break;
}
default:
sad = 0;
break;
}
return sad;
}
// need to modify for NEW_INTERPOLATE
template <class PixType> void ME_IntPel(ME_Inf<PixType> *meInfo)
{
Ipp32s cSAD, bSAD, xL, xR, yT, yB, rX, rY, bMVX, bMVY, blockSize, i;
PixType *pCur = meInfo->pCur;
PixType *pRef = meInfo->pRef;
Ipp32s pitchPixels = meInfo->pitchPixels;
H264MotionVector predictedMV = meInfo->predictedMV;
Ipp16s* pRDQM = meInfo->pRDQM;
blockSize = meInfo->block.width + (meInfo->block.height >> 2);
bSAD = MAX_SAD;
bMVX = bMVY = 0;
for (i = 0; i < meInfo->candNum; i ++) {
if (meInfo->candMV[i].mvx >= meInfo->xMin && meInfo->candMV[i].mvx <= meInfo->xMax && meInfo->candMV[i].mvy >= meInfo->yMin && meInfo->candMV[i].mvy <= meInfo->yMax) {
cSAD = MVConstraint((meInfo->candMV[i].mvx << SUB_PEL_SHIFT) - predictedMV.mvx, (meInfo->candMV[i].mvy << SUB_PEL_SHIFT) - predictedMV.mvy, pRDQM);
if (cSAD <= bSAD) {
cSAD += SAD(pCur, pitchPixels, MVADJUST(pRef, pitchPixels, meInfo->candMV[i].mvx, meInfo->candMV[i].mvy), pitchPixels, blockSize);
if (cSAD < bSAD) {
bSAD = cSAD;
bMVX = meInfo->candMV[i].mvx;
bMVY = meInfo->candMV[i].mvy;
if (bSAD <= meInfo->threshold)
goto end;
}
}
}
}
xL = MAX(meInfo->xMin, bMVX - meInfo->rX);
xR = MIN(meInfo->xMax, bMVX + meInfo->rX);
yT = MAX(meInfo->yMin, bMVY - meInfo->rY);
yB = MIN(meInfo->yMax, bMVY + meInfo->rY);
rX = meInfo->rX;
rY = meInfo->rY;
switch (meInfo->searchAlgo & 15) {
case MV_SEARCH_TYPE_UMH:
{
static const Ipp32s ConvergenceThreshold = 1000;
static const Ipp32s CrossSearchThreshold1 = 800;
static const Ipp32s CrossSearchThreshold2 = 1000;
Ipp32s block_id = abs((meInfo->block.width + meInfo->block.height)/8 - 4);
if (meInfo->block.width + meInfo->block.height == 8) block_id = 4;
Ipp32s j, i,xPos, yPos, y, x, n, k, l, X, Y;
if (bSAD < (ConvergenceThreshold >> block_id))
goto end;
Ipp32s Used_pos = 0;
Y = yB - yT + 1;
X = xR - xL + 1;
Ipp8s * Used = new Ipp8s[Y * X];
memset(Used,0,Y*X);
if (((block_id == 0) && (bSAD < CrossSearchThreshold1)) || (bSAD < (CrossSearchThreshold2 >> block_id)))
goto end;
Ipp32s sr = MAX(rX,rY);
xPos = bMVX;
yPos = bMVY;
i = yPos;
for (j = xPos - rX; j < xPos + rX; j += 2)
{
if (j >= xL && j <= xR && i >= yT && i <= yB) {
Used_pos = X*(i - yT) + j - xL;
Used[Used_pos] = 1;
cSAD = SAD(pCur, pitchPixels, MVADJUST(pRef, pitchPixels, j, i), pitchPixels, blockSize);
cSAD += MVConstraint((j << SUB_PEL_SHIFT) - predictedMV.mvx, (i << SUB_PEL_SHIFT) - predictedMV.mvy, pRDQM);
if (cSAD < bSAD) {
bSAD = cSAD;
bMVX = j;
if (bSAD <= meInfo->threshold)
{
goto end;
}
}
}
}
// xPos = bMVX;
j = xPos;
for (i = yPos - rY/2; i < yPos + rY/2; i += 2)
{
if (j >= xL && j <= xR && i >= yT && i <= yB) {
Used_pos = X*(i - yT) + j - xL;
Used[Used_pos] = 1;
cSAD = SAD(pCur, pitchPixels, MVADJUST(pRef, pitchPixels, j, i), pitchPixels, blockSize);
cSAD += MVConstraint((j << SUB_PEL_SHIFT) - predictedMV.mvx, (i << SUB_PEL_SHIFT) - predictedMV.mvy, pRDQM);
if (cSAD < bSAD) {
bSAD = cSAD;
bMVY = i;
if (bSAD <= meInfo->threshold)
{
goto end;
}
}
}
}
xPos = bMVX;
yPos = bMVY;
for (y = yPos - 2; y <= yPos + 2; y ++) {
for (x = xPos - 2; x <= xPos + 2; x ++) {
if (x >= xL && x <= xR && y >= yT && y <= yB)
{
Used_pos = X*(y - yT) + x - xL;
if(!Used[Used_pos])
{
Used[Used_pos] = 1;
cSAD = SAD(pCur, pitchPixels, MVADJUST(pRef, pitchPixels, x, y), pitchPixels, blockSize);
cSAD += MVConstraint((x << SUB_PEL_SHIFT) - predictedMV.mvx, (y << SUB_PEL_SHIFT) - predictedMV.mvy, pRDQM);
if (cSAD < bSAD) {
bSAD = cSAD;
bMVX = x;
bMVY = y;
if (bSAD <= meInfo->threshold)
{
goto end;
}
}
}
}
}
}
static const Ipp32s bdJ[17] = {0, 0, 2, 4, 4, 4, 4, 4, 2, 0, -2, -4, -4, -4, -4, -4, -2};
static const Ipp32s bdI[17] = {0, -4, -3, -2, -1, 0, 1, 2, 3, 4, 3, 2, 1, 0, -1, -2, -3};
xPos = bMVX;
yPos = bMVY;
for (n = 1; n < sr/4+1; n++)
{
for (k = 0; k < 17; k++)
{
j = xPos + bdJ[k]*n;
i = yPos + bdI[k]*n;
if (j >= xL && j <= xR && i >= yT && i <= yB)
{
Used_pos = X*(i - yT) + j - xL;
if(!Used[Used_pos])
{
Used[Used_pos] = 1;
cSAD = SAD(pCur, pitchPixels, MVADJUST(pRef, pitchPixels, j, i), pitchPixels, blockSize);
cSAD += MVConstraint((j << SUB_PEL_SHIFT) - predictedMV.mvx, (i << SUB_PEL_SHIFT) - predictedMV.mvy, pRDQM);
if (cSAD < bSAD) {
bSAD = cSAD;
bMVX = j;
bMVY = i;
if (bSAD <= meInfo->threshold)
{
goto end;
}
}
}
}
}
}
if (bSAD < (ConvergenceThreshold >> block_id))
goto end;
static const Ipp32s bdJL[7] = {0, -1, 1, 2, 1, -1, -2};
static const Ipp32s bdIL[7] = {0, -2, -2, 0, 2, 2, 0};
static const Ipp32s bdJS[5] = {0, 0, 1, 0, -1};
static const Ipp32s bdIS[5] = {0, -1, 0, 1, 0};
for (;;)
{
l = 0;
for (k = 0; k < 7; k++)
{
j = xPos + bdJL[k];
i = yPos + bdIL[k];
if (j >= xL && j <= xR && i >= yT && i <= yB)
{
Used_pos = X*(i - yT) + j - xL;
if(!Used[Used_pos])
{
Used[Used_pos] = 1;
cSAD = SAD(pCur, pitchPixels, MVADJUST(pRef, pitchPixels, j, i), pitchPixels, blockSize);
cSAD += MVConstraint((j << SUB_PEL_SHIFT) - predictedMV.mvx, (i << SUB_PEL_SHIFT) - predictedMV.mvy, pRDQM);
if (cSAD < bSAD) {
l = k;
bSAD = cSAD;
bMVX = j;
bMVY = i;
if (bSAD <= meInfo->threshold)
{
goto end;
}
}
}
}
}
if (l == 0) break;
else
{
xPos += bdJL[l];
yPos += bdIL[l];
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -