📄 mp4_enc_gme.cpp
字号:
if( z < 0 )
{
w[k] = -z;
if( vT )
{
for( j = 0; j < n; j++ )
vT[j + k * ldvT] = -vT[j + k * ldvT];
}
}
} /* end of diagonalization loop */
/* sort singular values and corresponding values */
for( i = 0; i < n; i++ )
{
k = i;
for( j = i + 1; j < n; j++ )
if( w[k] < w[j] )
k = j;
if( k != i )
{
Ipp64f t;
t = w[i]; w[i] = w[k]; w[k] = t;
if( vT )
for( j = 0; j < n; j++ )
{
t = vT[j + ldvT*i];
vT[j + ldvT*i] = vT[j + ldvT*k];
vT[j + ldvT*k] = t;
}
if( uT )
for( j = 0; j < m; j++ )
{
t = uT[j + lduT*i];
uT[j + lduT*i] = uT[j + lduT*k];
uT[j + lduT*k] = t;
}
}
}
}
/* multiplies a^-1*b, where a is already decomposed using SVD.
a is m x n (m >= n), b is m x nb. the result is stored in x (n x nb).
the buffer must be at least nb elements large.
If nb=1, the buffer is not used. */
static void
ownSVBkSb_64f(Ipp32s m, Ipp32s n, const Ipp64f* w,
const Ipp64f* uT, Ipp32s lduT,
const Ipp64f* vT, Ipp32s ldvT,
const Ipp64f* b, Ipp32s ldb, Ipp32s nb,
Ipp64f* x, Ipp32s ldx, Ipp64f* buffer)
{
Ipp64f threshold = 0;
Ipp32s i, j;
if( !b )
nb = m;
for( i = 0; i < n; i++ )
memset( x + i*ldx, 0, nb*sizeof(x[0]));
for( i = 0; i < n; i++ )
threshold += w[i];
threshold *= 2*DBL_EPSILON;
/* vT * inv(w) * uT * b */
for( i = 0; i < n; i++, uT += lduT, vT += ldvT )
{
Ipp64f wi = w[i];
if( wi > threshold )
{
wi = 1./wi;
if( nb == 1 )
{
Ipp64f s = 0;
if( b )
{
if( ldb == 1 )
{
for( j = 0; j <= m - 4; j += 4 )
s += uT[j]*b[j] + uT[j+1]*b[j+1] + uT[j+2]*b[j+2] + uT[j+3]*b[j+3];
for( ; j < m; j++ )
s += uT[j]*b[j];
}
else
{
for( j = 0; j < m; j++ )
s += uT[j]*b[j*ldb];
}
}
else
s = uT[0];
s *= wi;
if( ldx == 1 )
{
for( j = 0; j <= n - 4; j += 4 )
{
Ipp64f t0 = x[j] + s*vT[j];
Ipp64f t1 = x[j+1] + s*vT[j+1];
x[j] = t0;
x[j+1] = t1;
t0 = x[j+2] + s*vT[j+2];
t1 = x[j+3] + s*vT[j+3];
x[j+2] = t0;
x[j+3] = t1;
}
for( ; j < n; j++ )
x[j] += s*vT[j];
}
else
{
for( j = 0; j < n; j++ )
x[j*ldx] += s*vT[j];
}
}
else
{
if( b )
{
memset( buffer, 0, nb*sizeof(buffer[0]));
ownMatrAXPY_64f( m, nb, b, ldb, uT, buffer, 0 );
for( j = 0; j < nb; j++ )
buffer[j] *= wi;
}
else
{
for( j = 0; j < nb; j++ )
buffer[j] = uT[j]*wi;
}
ownMatrAXPY_64f( n, nb, buffer, 0, vT, x, ldx );
}
}
}
}
/* Given a list of corresponding points,
estimate the transformation that minimizes error
sum(||M*a - b||^2). */
static void
ownGetRTMatrix(const IppiPoint_32f* a, const IppiPoint_32f* b,
Ipp32s count, Ipp64f* M, Ipp32s nwp)
{
if (nwp == 3) {
/* Full affine transformation, M = [a b | c]
[d e | f] */
Ipp64f sa[36], sb[6], w[6], u[36], v[36], buffer[24];
Ipp32s i;
memset( sa, 0, sizeof(sa) );
memset( sb, 0, sizeof(sb) );
for( i = 0; i < count; i++ )
{
sa[0] += a[i].x*a[i].x;
sa[1] += a[i].y*a[i].x;
sa[2] += a[i].x;
sa[6] += a[i].x*a[i].y;
sa[7] += a[i].y*a[i].y;
sa[8] += a[i].y;
sa[12] += a[i].x;
sa[13] += a[i].y;
sa[14] += 1;
sb[0] += a[i].x*b[i].x;
sb[1] += a[i].y*b[i].x;
sb[2] += b[i].x;
sb[3] += a[i].x*b[i].y;
sb[4] += a[i].y*b[i].y;
sb[5] += b[i].y;
}
sa[21] = sa[0];
sa[22] = sa[1];
sa[23] = sa[2];
sa[27] = sa[6];
sa[28] = sa[7];
sa[29] = sa[8];
sa[33] = sa[12];
sa[34] = sa[13];
sa[35] = sa[14];
ownSVD_64f( sa, 6, 6, 6, w, u, 6, 6, v, 6, buffer );
ownSVBkSb_64f( 6, 6, w, u, 6, v, 6, sb, 1, 1, M, 1, 0 );
} else {
/* Rotation + zoom + shift, so M = [a -b | c]
[b a | d] */
Ipp64f sa[16], sb[4], w[4], u[16], v[16], m[4];
Ipp64f buffer[16];
Ipp32s i;
memset( sa, 0, sizeof(sa) );
memset( sb, 0, sizeof(sb) );
for( i = 0; i < count; i++ )
{
sa[0] += a[i].x*a[i].x + a[i].y*a[i].y;
sa[1] += 0;
sa[2] += a[i].x;
sa[3] += a[i].y;
sa[4] += 0;
sa[5] += a[i].x*a[i].x + a[i].y*a[i].y;
sa[6] += -a[i].y;
sa[7] += a[i].x;
sa[8] += a[i].x;
sa[9] += -a[i].y;
sa[10] += 1;
sa[11] += 0;
sa[12] += a[i].y;
sa[13] += a[i].x;
sa[14] += 0;
sa[15] += 1;
sb[0] += a[i].x*b[i].x + a[i].y*b[i].y;
sb[1] += a[i].x*b[i].y - a[i].y*b[i].x;
sb[2] += b[i].x;
sb[3] += b[i].y;
}
ownSVD_64f( sa, 4, 4, 4, w, u, 4, 4, v, 4, buffer );
ownSVBkSb_64f( 4, 4, w, u, 4, v, 4, sb, 1, 1, m, 1, 0 );
M[0] = M[4] = m[0];
M[1] = -m[1];
M[3] = m[1];
M[2] = m[2];
M[5] = m[3];
}
}
/* Given a list of corresponding points, estimate the optimal rigid transformation.
RANSAC algorithm is used that includes 2 steps:
1) Within a loop,
1.1) from the original set choose a random subset of point correspondences
that is sufficient to calculate the rigid transformation.
For rigid 2d transformation 3 point correspondences is enough.
1.2) calculate transformation matrix M using ownGetRTMatrix
1.3) for each point check the error ||M*a - b|| and find the number of inliers/outliers.
2) Choose the transformation matrix that gives the smallest number of outliers - the best case.
Calculate the final transformation matrix using ownGetRTMatrix from the "best case" inliers.
*/
static Ipp32s
ownGetRTMatrix_RANSAC(IppiPoint_32f* A, IppiPoint_32f* B,
Ipp32s count, Ipp64f* M, Ipp32s iterations,
Ipp64f min_triangle_side, Ipp32s nwp, Ipp8s* mask)
{
#define RANSAC_SIZE0 3
#define SUBSET_SELECTION_MAX_ATTEMPTS 100
Ipp32s i, j, k, k1;
Ipp64u rng = (Ipp64u)-1;
Ipp8s* best_mask = mask + count;
Ipp32s max_inlier_count = 0;
// 1. find the consensus
for( k = 0; k < iterations; k++ )
{
Ipp32s idx[RANSAC_SIZE0];
IppiPoint_32f a[RANSAC_SIZE0];
IppiPoint_32f b[RANSAC_SIZE0];
Ipp32s inlier_count = 0;
// choose random 3 non-complanar points from A & B
for( i = 0; i < RANSAC_SIZE0; i++ )
{
for( k1 = 0; k1 < SUBSET_SELECTION_MAX_ATTEMPTS; k1++ )
{
idx[i] = OWN_RNG_NEXT(rng) % count;
for( j = 0; j < i; j++ )
{
if( idx[j] == idx[i] )
break;
// check that the points are not very close one each other
if( fabs(A[idx[i]].x - A[idx[j]].x) +
fabs(A[idx[i]].y - A[idx[j]].y) < min_triangle_side )
break;
if( fabs(B[idx[i]].x - B[idx[j]].x) +
fabs(B[idx[i]].y - B[idx[j]].y) < min_triangle_side )
break;
}
if( j < i )
continue;
if( i+1 == RANSAC_SIZE0 )
{
// additional check for non-complanar vectors
a[0] = A[idx[0]];
a[1] = A[idx[1]];
a[2] = A[idx[2]];
b[0] = B[idx[0]];
b[1] = B[idx[1]];
b[2] = B[idx[2]];
if( fabs((a[1].x - a[0].x)*(a[2].y - a[0].y) - (a[1].y - a[0].y)*(a[2].x - a[0].x)) < 1 ||
fabs((b[1].x - b[0].x)*(b[2].y - b[0].y) - (b[1].y - b[0].y)*(b[2].x - b[0].x)) < 1 )
continue;
}
break;
}
if( k1 >= SUBSET_SELECTION_MAX_ATTEMPTS )
break;
}
if( i < RANSAC_SIZE0 )
continue;
// estimate the transformation using 3 points
ownGetRTMatrix( a, b, RANSAC_SIZE0, M, nwp );
for( i = 0; i < count; i++ )
{
mask[i] = fabs( M[0]*A[i].x + M[1]*A[i].y + M[2] - B[i].x ) +
fabs( M[3]*A[i].x + M[4]*A[i].y + M[5] - B[i].y ) < 8;
inlier_count += mask[i];
}
if( inlier_count >= max_inlier_count )
{
memcpy( best_mask, mask, count );
max_inlier_count = inlier_count;
if( max_inlier_count == count )
break;
}
}
if( max_inlier_count )
{
// 2. repack the points, leave only the good ones
if( max_inlier_count < count )
for( i = 0, j = 0; i < count; i++ )
{
if( best_mask[i] )
{
A[j] = A[i];
B[j] = B[i];
j++;
}
}
// 3. estimate the final transformation
ownGetRTMatrix( A, B, max_inlier_count, M, nwp );
}
return max_inlier_count;
}
void ippVideoEncoderMPEG4::PyramidCalc(const Ipp8u* img, IppiPyramid* pyramid)
{
pyramid->pImage[0] = (Ipp8u*)img;
pyramid->pStep[0] = mStepLuma;
for (Ipp32s i = 0; i < pyramid->level; i ++)
ippiPyramidLayerDown_8u_C1R((const Ipp8u*)pyramid->pImage[i], pyramid->pStep[i], pyramid->pRoi[i],
(Ipp8u*)pyramid->pImage[i+1], pyramid->pStep[i+1], pyramid->pRoi[i+1],
(IppiPyramidDownState_8u_C1R*)(pyramid->pState));
}
#endif // USE_CV_GME
bool ippVideoEncoderMPEG4::FindTransformGMC()
{
// only translation is implemented
if (VOL.no_of_sprite_warping_points == 1) {
Ipp32s i, j;
Ipp32s *shX, *shY;
Ipp32s mbW, mbH, mbStep, xIndx, yIndx;
mp4_Data_ME meData;
meData.method = mMEmethod;
meData.flags = mMEflags & (~ME_QP) & (~ME_USE_MVWEIGHT) & (~ME_CHROMA);
meData.quant = 0;
meData.fcode = 1;
meData.rt = 0;
#ifdef USE_ME_SADBUFF
meData.meBuff = mMEfastSAD;
#endif
meData.thrDiff = meData.thrDiff16x16 = 256;
meData.sadFunc = ippiSAD16x16_8u32s;
meData.stepL = mStepLuma;
meData.numPred = 1;
mbStep = 4;
mbW = ((mNumMacroBlockPerRow - 3) / mbStep) + 1;
mbH = ((mNumMacroBlockPerCol - 3) / mbStep) + 1;
shX = ippsMalloc_32s(mbW * mbH);
shY = ippsMalloc_32s(mbW * mbH);
yIndx = 1;
for (i = 0; i < mbH; i ++) {
meData.yT = -IPP_MIN(yIndx * 16 + 16, mPVOPsearchVer);
meData.yB = IPP_MIN((mNumMacroBlockPerCol - yIndx) * 16, mPVOPsearchVer);
xIndx = 1;
meData.yMB = i;
for (j = 0; j < mbW; j ++) {
meData.pYc = mFrameC->pY + yIndx * 16 * mStepLuma + xIndx * 16;
meData.pYr = mFrameF->pY + yIndx * 16 * mStepLuma + xIndx * 16;
meData.xMB = j;
// SAD at (0,0)
ippiSAD16x16_8u32s(meData.pYc, meData.stepL, meData.pYr, meData.stepL, &meData.bestDiff, IPPVC_MC_APX_FF);
if (meData.bestDiff > meData.thrDiff16x16) {
meData.xL = -IPP_MIN(xIndx * 16 + 16, mPVOPsearchHor);
meData.xR = IPP_MIN((mNumMacroBlockPerRow - xIndx) * 16, mPVOPsearchHor);
meData.mvPred[0].dx = (Ipp16s)VOP.warping_mv_code_du[0];
meData.mvPred[0].dy = (Ipp16s)VOP.warping_mv_code_dv[0];
mp4_ME_SAD(&meData);
shX[i*mbW+j] = meData.xPos;
shY[i*mbW+j] = meData.yPos;
} else {
shX[i*mbW+j] = 0;
shY[i*mbW+j] = 0;
}
xIndx += mbStep;
}
yIndx += mbStep;
}
ippsSortAscend_32s_I(shX, mbW * mbH);
ippsSortAscend_32s_I(shY, mbW * mbH);
VOP.warping_mv_code_du[0] = shX[mbW * mbH >> 1];
VOP.warping_mv_code_dv[0] = shY[mbW * mbH >> 1];
ippsFree(shX);
ippsFree(shY);
}
#ifdef USE_CV_GME
else if (VOL.no_of_sprite_warping_points > 1) {
Ipp32s i0, j0, i1, j1, i2, j2, i, j, k;
Ipp64f affmat[6];
IppStatus sts;
Ipp32f min_triangle_size;
PyramidCalc(mFrameC->pY, mPyrC);
PyramidCalc(mFrameF->pY, mPyrR);
// make a uniform grid of tracked points
for (i = 0, k = 0; i < mOptFlowNumPointY; i++)
for (j = 0; j < mOptFlowNumPointX; j ++, k ++) {
mOptFlowPtC[k].x = mOptFlowPtR[k].x = (j + 0.5f) * mPyrC->pRoi[0].width / mOptFlowNumPointX;
mOptFlowPtC[k].y = mOptFlowPtR[k].y = (i + 0.5f) * mPyrC->pRoi[0].height / mOptFlowNumPointY;
}
// track the points
sts = ippiOpticalFlowPyrLK_8u_C1R(mPyrC, mPyrR, mOptFlowPtC, mOptFlowPtR, mOptFlowPtSts,
mOptFlowPatchDiff, mOptFlowNumPoint, mOptFlowWinSize,
mPyrC->level, 20, 0.1f, mOptFlowState);
if (sts == ippStsNoErr) {
// compress the successfully tracked points
for (i = j = 0; i < mOptFlowNumPoint; i++) {
if (mOptFlowPtSts[i] == 0) {
mOptFlowPtC[j] = mOptFlowPtC[i];
mOptFlowPtR[j] = mOptFlowPtR[i];
j++;
}
}
if (j == 0) {
sts = ippStsSingularity;
} else {
// apply RANSAC algorithm to find the rigid transformation
min_triangle_size = 20.f;
min_triangle_size = IPP_MIN(min_triangle_size, mPyrC->pRoi[0].height * 0.1f);
min_triangle_size = IPP_MIN(min_triangle_size, mPyrC->pRoi[0].width * 0.1f);
i = ownGetRTMatrix_RANSAC(mOptFlowPtC, mOptFlowPtR, j, affmat, 100, min_triangle_size,
VOL.no_of_sprite_warping_points, mOptFlowMask);
sts = i < 0 ? (IppStatus)i : i * 5 > mOptFlowNumPoint ? ippStsOk : ippStsSingularity;
/* meaning that there is no good transformation found */
}
}
if (sts != ippStsNoErr) {
VOP.warping_mv_code_du[0] = VOP.warping_mv_code_du[1] = VOP.warping_mv_code_du[2] = 0;
VOP.warping_mv_code_dv[0] = VOP.warping_mv_code_dv[1] = VOP.warping_mv_code_dv[2] = 0;
return false;
}
i0 = (Ipp32s)((affmat[0] * 0 + affmat[1] * 0 + affmat[2]) * 2.0);
j0 = (Ipp32s)((affmat[3] * 0 + affmat[4] * 0 + affmat[5]) * 2.0);
i1 = (Ipp32s)((affmat[0] * mSourceWidth + affmat[1] * 0 + affmat[2]) * 2.0);
j1 = (Ipp32s)((affmat[3] * mSourceWidth + affmat[4] * 0 + affmat[5]) * 2.0);
i2 = (Ipp32s)((affmat[0] * 0 + affmat[1] * mSourceHeight + affmat[2]) * 2.0);
j2 = (Ipp32s)((affmat[3] * 0 + affmat[4] * mSourceHeight + affmat[5]) * 2.0);
VOP.warping_mv_code_du[0] = i0;
VOP.warping_mv_code_dv[0] = j0;
VOP.warping_mv_code_du[1] = i1 - mSourceWidth * 2 - i0;
VOP.warping_mv_code_dv[1] = j1 - j0;
VOP.warping_mv_code_du[2] = i2 - i0;
VOP.warping_mv_code_dv[2] = j2 - mSourceHeight * 2 - j0;
}
#endif
return true;
}
} // namespace MPEG4_ENC
#endif //defined (UMC_ENABLE_MPEG4_VIDEO_ENCODER)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -