📄 umc_h264_me_new.cpp
字号:
bSAD = cSAD;
bMVX = jL;
if (bSAD <= meInfo->threshold)
goto end;
}
}
if (jR <= xR) {
cSAD = SAD(pCur, pitchPixels, MVADJUST(pRef, pitchPixels, jR, i), pitchPixels, blockSize);
cSAD += MVConstraint((jR << SUB_PEL_SHIFT) - predictedMV.mvx, (i << SUB_PEL_SHIFT) - predictedMV.mvy, pRDQM);
if (cSAD < bSAD) {
bSAD = cSAD;
bMVX = jR;
if (bSAD <= meInfo->threshold)
goto end;
}
}
if (bMVX == xPos)
{
r /= 2;
if (r == 0) break;
}
}
j = bMVX;
r = rY;
for (;;)
{
yPos = bMVY;
iT = yPos - r;
iB = yPos + r;
if (iT >= yT) {
cSAD = SAD(pCur, pitchPixels, MVADJUST(pRef, pitchPixels, j, iT), pitchPixels, blockSize);
cSAD += MVConstraint((j << SUB_PEL_SHIFT) - predictedMV.mvx, (iT << SUB_PEL_SHIFT) - predictedMV.mvy, pRDQM);
if (cSAD < bSAD) {
bSAD = cSAD;
bMVY = iT;
if (bSAD <= meInfo->threshold)
goto end;
}
}
if (iB <= yB) {
cSAD = SAD(pCur, pitchPixels, MVADJUST(pRef, pitchPixels, j, iB), pitchPixels, blockSize);
cSAD += MVConstraint((j << SUB_PEL_SHIFT) - predictedMV.mvx, (iB << SUB_PEL_SHIFT) - predictedMV.mvy, pRDQM);
if (cSAD < bSAD) {
bSAD = cSAD;
bMVY = iB;
if (bSAD <= meInfo->threshold)
goto end;
}
}
if (bMVY == yPos)
{
r /= 2;
if (r == 0) break;
}
}
}
break;
default:
{
Ipp32s i, j, k, l, m, n, xPos, yPos;
//static const Ipp32s bdJ[5] = {0, -1, 0, 1, 0}, bdI[5] = {0, 0, -1, 0, 1};
//static const Ipp32s bdN[5] = {4, 3, 3, 3, 3}, bdA[5][4] = {{1, 2, 3, 4}, {1, 2, 4, 0}, {1, 2, 3, 0}, {2, 3, 4, 0}, {3, 4, 1, 0}};
static const Ipp32s bdJ[9] = {0, -1, 0, 1, 1, 1, 0, -1, -1}, bdI[9] = {0, -1, -1, -1, 0, 1, 1, 1, 0};
static const Ipp32s bdN[9] = {8, 5, 3, 5, 3, 5, 3, 5, 3}, bdA[9][8] = {{1, 2, 3, 4, 5, 6, 7, 8}, {1, 2, 3, 7, 8, 0, 0, 0}, {1, 2, 3, 0, 0, 0, 0, 0}, {1, 2, 3, 4, 5, 0, 0, 0}, {3, 4, 5, 0, 0, 0, 0, 0}, {3, 4, 5, 6, 7, 0, 0, 0}, {5, 6, 7, 0, 0, 0, 0, 0}, {5, 6, 7, 8, 1, 0, 0, 0}, {7, 8, 1, 0, 0, 0, 0, 0}};
xPos = bMVX;
yPos = bMVY;
l = 0;
for (;;) {
n = l;
l = 0;
for (m = 0; m < bdN[n]; m ++) {
k = bdA[n][m];
j = xPos + bdJ[k];
i = yPos + bdI[k];
if (j >= xL && j <= xR && i >= yT && i <= yB) {
cSAD = SAD(pCur, pitchPixels, MVADJUST(pRef, pitchPixels, j, i), pitchPixels, blockSize);
cSAD += MVConstraint((j << SUB_PEL_SHIFT) - predictedMV.mvx, (i << SUB_PEL_SHIFT) - predictedMV.mvy, pRDQM);
if (cSAD < bSAD) {
l = k;
bSAD = cSAD;
bMVX = j;
bMVY = i;
if (bSAD <= meInfo->threshold)
goto end;
}
}
}
if (l == 0)
break;
xPos += bdJ[l];
yPos += bdI[l];
}
}
break;
}
end:
meInfo->bestMV.mvx = (Ipp16s)bMVX << SUB_PEL_SHIFT;
meInfo->bestMV.mvy = (Ipp16s)bMVY << SUB_PEL_SHIFT;
// change SAD to SATD
if (!(meInfo->flags & ANALYSE_SAD)) {
bSAD = SATD(pCur, pitchPixels, MVADJUST(pRef, pitchPixels, bMVX, bMVY), pitchPixels, blockSize) +
MVConstraint(meInfo->bestMV.mvx - predictedMV.mvx, meInfo->bestMV.mvy - predictedMV.mvy, pRDQM);
}
if (meInfo->flags & ANALYSE_ME_CHROMA) {
__ALIGN16 PixType interpBuff[256];
IppiSize chroma_size;
PixType *pCurU = meInfo->pCurU;
PixType *pCurV = meInfo->pCurV;
PixType *pRefU = meInfo->pRefU;
PixType *pRefV = meInfo->pRefV;
Ipp32s chroma_mvy_offset = meInfo->chroma_mvy_offset;
if (meInfo->chroma_format_idc == 1) { //420
chroma_size.width = meInfo->block.width>>1;
chroma_size.height = meInfo->block.height>>1;
} else if (meInfo->chroma_format_idc == 2) { //422
chroma_size.width = meInfo->block.width>>1;
chroma_size.height = meInfo->block.height;
}
Ipp32s chroma_block_size = chroma_size.width + (chroma_size.height >> 2);
Ipp32s iXType, iYType;
H264MotionVector chroma_vec;
chroma_vec.mvx = meInfo->bestMV.mvx; chroma_vec.mvy = meInfo->bestMV.mvy + chroma_mvy_offset;
Ipp32s offset = SubpelChromaMVAdjust(&chroma_vec, pitchPixels, iXType, iYType, meInfo->chroma_format_idc);
ippiInterpolateChroma_H264(pRefU+offset, pitchPixels*sizeof(PixType), interpBuff, 16*sizeof(PixType), iXType, iYType, chroma_size, meInfo->bit_depth_chroma);
ippiInterpolateChroma_H264(pRefV+offset, pitchPixels*sizeof(PixType), interpBuff+8, 16*sizeof(PixType), iXType, iYType, chroma_size, meInfo->bit_depth_chroma);
if (meInfo->flags & ANALYSE_SAD){
bSAD += SAD(pCurU, pitchPixels, interpBuff, 16, chroma_block_size);
bSAD += SAD(pCurV, pitchPixels, interpBuff+8, 16, chroma_block_size);
} else {
bSAD += SATD(pCurU, pitchPixels, interpBuff, 16, chroma_block_size);
bSAD += SATD(pCurV, pitchPixels, interpBuff+8, 16, chroma_block_size);
}
}
meInfo->bestSAD = bSAD;
}
template <class PixType> void ME_SubPel(ME_Inf<PixType> *meInfo)
{
//if (!(meInfo->flags & ANALYSE_ME_SUBPEL))
// return;
__ALIGN16 PixType interpBuff[256];
PixType *pCur = meInfo->pCur;
PixType *pRef = meInfo->pRef;
PixType *pCurU;
PixType *pCurV;
PixType *pRefU;
PixType *pRefV;
Ipp32s pitchPixels = meInfo->pitchPixels;
H264MotionVector predictedMV = meInfo->predictedMV;
Ipp16s* pRDQM = meInfo->pRDQM;
Ipp32s subPelAlgo = meInfo->searchAlgo >> 5;
Ipp32s bSAD = meInfo->bestSAD;
Ipp32s blockSize = meInfo->block.width + (meInfo->block.height >> 2);
Ipp32s bMVX = meInfo->bestMV.mvx;
Ipp32s bMVY = meInfo->bestMV.mvy;
Ipp32s cSAD;
IppiSize chroma_size;
Ipp32s chroma_block_size, chroma_mvy_offset;
if (meInfo->flags & ANALYSE_ME_CHROMA) {
pCurU = meInfo->pCurU;
pCurV = meInfo->pCurV;
pRefU = meInfo->pRefU;
pRefV = meInfo->pRefV;
chroma_mvy_offset = meInfo->chroma_mvy_offset;
if (meInfo->chroma_format_idc == 1) { //420
chroma_size.width = meInfo->block.width>>1;
chroma_size.height = meInfo->block.height>>1;
} else if (meInfo->chroma_format_idc == 2) { //422
chroma_size.width = meInfo->block.width>>1;
chroma_size.height = meInfo->block.height;
}
chroma_block_size = chroma_size.width + (chroma_size.height >> 2);
}
if (subPelAlgo == MV_SEARCH_TYPE_FULL) {
// full search
Ipp32s i, j, xC, yC;
xC = bMVX;
yC = bMVY;
for (i = yC - 3; i <= yC + 3; i ++) {
for (j = xC - 3; j <= xC + 3; j ++) {
ippiInterpolateLuma_H264(MVADJUST(pRef, pitchPixels, j >> SUB_PEL_SHIFT, i >> SUB_PEL_SHIFT), pitchPixels*sizeof(PixType), interpBuff, 16*sizeof(PixType), j & 3, i & 3, meInfo->block, meInfo->bit_depth_luma);
if (meInfo->flags & ANALYSE_SAD)
cSAD = SAD(pCur, pitchPixels, interpBuff, 16, blockSize);
else
cSAD = SATD(pCur, pitchPixels, interpBuff, 16, blockSize);
cSAD += MVConstraint(j - predictedMV.mvx, i - predictedMV.mvy, pRDQM);
if ((meInfo->flags & ANALYSE_ME_CHROMA) && (cSAD < bSAD)) {
Ipp32s iXType, iYType;
H264MotionVector chroma_vec;
chroma_vec.mvx = i; chroma_vec.mvy=j+chroma_mvy_offset;
Ipp32s offset = SubpelChromaMVAdjust(&chroma_vec, pitchPixels, iXType, iYType, meInfo->chroma_format_idc);
ippiInterpolateChroma_H264(pRefU+offset, pitchPixels*sizeof(PixType), interpBuff, 16*sizeof(PixType), iXType, iYType, chroma_size, meInfo->bit_depth_chroma);
ippiInterpolateChroma_H264(pRefV+offset, pitchPixels*sizeof(PixType), interpBuff+8, 16*sizeof(PixType), iXType, iYType, chroma_size, meInfo->bit_depth_chroma);
if (meInfo->flags & ANALYSE_SAD){
cSAD += SAD(pCurU, pitchPixels, interpBuff, 16, chroma_block_size);
cSAD += SAD(pCurV, pitchPixels, interpBuff+8, 16, chroma_block_size);
}else{
cSAD += SATD(pCurU, pitchPixels, interpBuff, 16, chroma_block_size);
cSAD += SATD(pCurV, pitchPixels, interpBuff+8, 16, chroma_block_size);
}
}
if (cSAD < bSAD) {
bSAD = cSAD;
bMVX = j;
bMVY = i;
}
}
}
} else {
Ipp32s i, j, k, l, m, n, xPos, yPos, xL, xR, yT, yB;
// static const Ipp32s bdJ[5] = {0, -1, 0, 1, 0}, bdI[5] = {0, 0, -1, 0, 1};
// static const Ipp32s bdN[5] = {4, 3, 3, 3, 3}, bdA[5][4] = {{1, 2, 3, 4}, {1, 2, 4, 0}, {1, 2, 3, 0}, {2, 3, 4, 0}, {3, 4, 1, 0}};
static const Ipp32s bdJ[9] = {0, -1, 0, 1, 1, 1, 0, -1, -1}, bdI[9] = {0, -1, -1, -1, 0, 1, 1, 1, 0};
static const Ipp32s bdN[9] = {8, 5, 3, 5, 3, 5, 3, 5, 3}, bdA[9][8] = {{1, 2, 3, 4, 5, 6, 7, 8}, {1, 2, 3, 7, 8, 0, 0, 0}, {1, 2, 3, 0, 0, 0, 0, 0}, {1, 2, 3, 4, 5, 0, 0, 0}, {3, 4, 5, 0, 0, 0, 0, 0}, {3, 4, 5, 6, 7, 0, 0, 0}, {5, 6, 7, 0, 0, 0, 0, 0}, {5, 6, 7, 8, 1, 0, 0, 0}, {7, 8, 1, 0, 0, 0, 0, 0}};
xPos = bMVX;
yPos = bMVY;
xL = xPos - 3;
xR = xPos + 3;
yT = yPos - 3;
yB = yPos + 3;
// int r = 2;
l = 0;
for (;;) {
n = l;
l = 0;
for (m = 0; m < bdN[n]; m ++) {
k = bdA[n][m];
j = xPos + bdJ[k];
i = yPos + bdI[k];
if (j >= xL && j <= xR && i >= yT && i <= yB) {
ippiInterpolateLuma_H264(MVADJUST(pRef, pitchPixels, j >> SUB_PEL_SHIFT, i >> SUB_PEL_SHIFT), pitchPixels*sizeof(PixType), interpBuff, 16*sizeof(PixType), j & 3, i & 3, meInfo->block, meInfo->bit_depth_luma);
if (meInfo->flags & ANALYSE_SAD)
cSAD = SAD(pCur, pitchPixels, interpBuff, 16, blockSize);
else
cSAD = SATD(pCur, pitchPixels, interpBuff, 16, blockSize);
cSAD += MVConstraint(j - predictedMV.mvx, i - predictedMV.mvy, pRDQM);
if ((meInfo->flags & ANALYSE_ME_CHROMA) && (cSAD < bSAD)) {
Ipp32s iXType, iYType;
H264MotionVector chroma_vec;
chroma_vec.mvx = j; chroma_vec.mvy=i+chroma_mvy_offset;
Ipp32s offset = SubpelChromaMVAdjust(&chroma_vec, pitchPixels, iXType, iYType, meInfo->chroma_format_idc);
ippiInterpolateChroma_H264(pRefU+offset, pitchPixels*sizeof(PixType), interpBuff, 16*sizeof(PixType), iXType, iYType, chroma_size, meInfo->bit_depth_chroma);
ippiInterpolateChroma_H264(pRefV+offset, pitchPixels*sizeof(PixType), interpBuff+8, 16*sizeof(PixType), iXType, iYType, chroma_size, meInfo->bit_depth_chroma);
if (meInfo->flags & ANALYSE_SAD){
cSAD += SAD(pCurU, pitchPixels, interpBuff, 16, chroma_block_size);
cSAD += SAD(pCurV, pitchPixels, interpBuff+8, 16, chroma_block_size);
}else{
cSAD += SATD(pCurU, pitchPixels, interpBuff, 16, chroma_block_size);
cSAD += SATD(pCurV, pitchPixels, interpBuff+8, 16, chroma_block_size);
}
}
if (cSAD < bSAD) {
l = k;
bSAD = cSAD;
}
}
}
if (l == 0)
break;
xPos += bdJ[l];
yPos += bdI[l];
// if (l != 0) {
// xPos += bdJ[l] * r;
// yPos += bdI[l] * r;
// }
// if (r != 1)
// r = 1;
// else if (l == 0)
// break;
}
bMVX = xPos;
bMVY = yPos;
}
meInfo->bestMV.mvx = (Ipp16s)bMVX;
meInfo->bestMV.mvy = (Ipp16s)bMVY;
meInfo->bestSAD = bSAD;
}
template <class PixType, class CoeffsType> void H264CoreEncoder<PixType, CoeffsType>::ME_CandList16x16(
H264EncoderThreadPrivateSlice<PixType, CoeffsType> *curr_slice, Ipp32s list_id, ME_Inf<PixType> *meInfo)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -