📄 mp4_enc_gme.cpp

📁 audio-video-codecs.rar语音编解码器
💻 CPP
📖 第 1 页 / 共 2 页
字号:
上一页 12

        if( z < 0 )
        {
            w[k] = -z;
            if( vT )
            {
                for( j = 0; j < n; j++ )
                    vT[j + k * ldvT] = -vT[j + k * ldvT];
            }
        }
    } /* end of diagonalization loop */

    /* sort singular values and corresponding values */
    for( i = 0; i < n; i++ )
    {
        k = i;
        for( j = i + 1; j < n; j++ )
            if( w[k] < w[j] )
                k = j;

        if( k != i )
        {
            Ipp64f t;
            t = w[i]; w[i] = w[k]; w[k] = t;
            if( vT )
                for( j = 0; j < n; j++ )
                {
                    t = vT[j + ldvT*i];
                    vT[j + ldvT*i] = vT[j + ldvT*k];
                    vT[j + ldvT*k] = t;
                }

                if( uT )
                    for( j = 0; j < m; j++ )
                    {
                        t = uT[j + lduT*i];
                        uT[j + lduT*i] = uT[j + lduT*k];
                        uT[j + lduT*k] = t;
                    }
        }
    }
}

/* multiplies a^-1*b, where a is already decomposed using SVD.
a is m x n (m >= n), b is m x nb. the result is stored in x (n x nb).
the buffer must be at least nb elements large.
If nb=1, the buffer is not used. */
static void
ownSVBkSb_64f(Ipp32s m, Ipp32s n, const Ipp64f* w,
              const Ipp64f* uT, Ipp32s lduT,
              const Ipp64f* vT, Ipp32s ldvT,
              const Ipp64f* b, Ipp32s ldb, Ipp32s nb,
              Ipp64f* x, Ipp32s ldx, Ipp64f* buffer)
{
    Ipp64f threshold = 0;
    Ipp32s i, j;

    if( !b )
        nb = m;

    for( i = 0; i < n; i++ )
        memset( x + i*ldx, 0, nb*sizeof(x[0]));

    for( i = 0; i < n; i++ )
        threshold += w[i];
    threshold *= 2*DBL_EPSILON;

    /* vT * inv(w) * uT * b */
    for( i = 0; i < n; i++, uT += lduT, vT += ldvT )
    {
        Ipp64f wi = w[i];

        if( wi > threshold )
        {
            wi = 1./wi;

            if( nb == 1 )
            {
                Ipp64f s = 0;
                if( b )
                {
                    if( ldb == 1 )
                    {
                        for( j = 0; j <= m - 4; j += 4 )
                            s += uT[j]*b[j] + uT[j+1]*b[j+1] + uT[j+2]*b[j+2] + uT[j+3]*b[j+3];
                        for( ; j < m; j++ )
                            s += uT[j]*b[j];
                    }
                    else
                    {
                        for( j = 0; j < m; j++ )
                            s += uT[j]*b[j*ldb];
                    }
                }
                else
                    s = uT[0];
                s *= wi;
                if( ldx == 1 )
                {
                    for( j = 0; j <= n - 4; j += 4 )
                    {
                        Ipp64f t0 = x[j] + s*vT[j];
                        Ipp64f t1 = x[j+1] + s*vT[j+1];
                        x[j] = t0;
                        x[j+1] = t1;
                        t0 = x[j+2] + s*vT[j+2];
                        t1 = x[j+3] + s*vT[j+3];
                        x[j+2] = t0;
                        x[j+3] = t1;
                    }

                    for( ; j < n; j++ )
                        x[j] += s*vT[j];
                }
                else
                {
                    for( j = 0; j < n; j++ )
                        x[j*ldx] += s*vT[j];
                }
            }
            else
            {
                if( b )
                {
                    memset( buffer, 0, nb*sizeof(buffer[0]));
                    ownMatrAXPY_64f( m, nb, b, ldb, uT, buffer, 0 );
                    for( j = 0; j < nb; j++ )
                        buffer[j] *= wi;
                }
                else
                {
                    for( j = 0; j < nb; j++ )
                        buffer[j] = uT[j]*wi;
                }
                ownMatrAXPY_64f( n, nb, buffer, 0, vT, x, ldx );
            }
        }
    }
}

/* Given a list of corresponding points,
estimate the transformation that minimizes error
sum(||M*a - b||^2). */
static void
ownGetRTMatrix(const IppiPoint_32f* a, const IppiPoint_32f* b,
               Ipp32s count, Ipp64f* M, Ipp32s nwp)
{
    if (nwp == 3) {
        /* Full affine transformation, M = [a b | c]
        [d e | f] */
        Ipp64f sa[36], sb[6], w[6], u[36], v[36], buffer[24];
        Ipp32s i;

        memset( sa, 0, sizeof(sa) );
        memset( sb, 0, sizeof(sb) );

        for( i = 0; i < count; i++ )
        {
            sa[0] += a[i].x*a[i].x;
            sa[1] += a[i].y*a[i].x;
            sa[2] += a[i].x;

            sa[6] += a[i].x*a[i].y;
            sa[7] += a[i].y*a[i].y;
            sa[8] += a[i].y;

            sa[12] += a[i].x;
            sa[13] += a[i].y;
            sa[14] += 1;

            sb[0] += a[i].x*b[i].x;
            sb[1] += a[i].y*b[i].x;
            sb[2] += b[i].x;
            sb[3] += a[i].x*b[i].y;
            sb[4] += a[i].y*b[i].y;
            sb[5] += b[i].y;
        }

        sa[21] = sa[0];
        sa[22] = sa[1];
        sa[23] = sa[2];
        sa[27] = sa[6];
        sa[28] = sa[7];
        sa[29] = sa[8];
        sa[33] = sa[12];
        sa[34] = sa[13];
        sa[35] = sa[14];

        ownSVD_64f( sa, 6, 6, 6, w, u, 6, 6, v, 6, buffer );
        ownSVBkSb_64f( 6, 6, w, u, 6, v, 6, sb, 1, 1, M, 1, 0 );
    } else {
        /* Rotation + zoom + shift, so M = [a -b | c]
        [b  a | d] */
        Ipp64f sa[16], sb[4], w[4], u[16], v[16], m[4];
        Ipp64f buffer[16];
        Ipp32s i;

        memset( sa, 0, sizeof(sa) );
        memset( sb, 0, sizeof(sb) );

        for( i = 0; i < count; i++ )
        {
            sa[0] += a[i].x*a[i].x + a[i].y*a[i].y;
            sa[1] += 0;
            sa[2] += a[i].x;
            sa[3] += a[i].y;

            sa[4] += 0;
            sa[5] += a[i].x*a[i].x + a[i].y*a[i].y;
            sa[6] += -a[i].y;
            sa[7] += a[i].x;

            sa[8] += a[i].x;
            sa[9] += -a[i].y;
            sa[10] += 1;
            sa[11] += 0;

            sa[12] += a[i].y;
            sa[13] += a[i].x;
            sa[14] += 0;
            sa[15] += 1;

            sb[0] += a[i].x*b[i].x + a[i].y*b[i].y;
            sb[1] += a[i].x*b[i].y - a[i].y*b[i].x;
            sb[2] += b[i].x;
            sb[3] += b[i].y;
        }

        ownSVD_64f( sa, 4, 4, 4, w, u, 4, 4, v, 4, buffer );
        ownSVBkSb_64f( 4, 4, w, u, 4, v, 4, sb, 1, 1, m, 1, 0 );

        M[0] = M[4] = m[0];
        M[1] = -m[1];
        M[3] = m[1];
        M[2] = m[2];
        M[5] = m[3];
    }
}

/* Given a list of corresponding points, estimate the optimal rigid transformation.
RANSAC algorithm is used that includes 2 steps:
1) Within a loop,
1.1) from the original set choose a random subset of point correspondences
that is sufficient to calculate the rigid transformation.
For rigid 2d transformation 3 point correspondences is enough.
1.2) calculate transformation matrix M using ownGetRTMatrix
1.3) for each point check the error ||M*a - b|| and find the number of inliers/outliers.
2) Choose the transformation matrix that gives the smallest number of outliers - the best case.
Calculate the final transformation matrix using ownGetRTMatrix from the "best case" inliers.
*/
static Ipp32s
ownGetRTMatrix_RANSAC(IppiPoint_32f* A, IppiPoint_32f* B,
                      Ipp32s count, Ipp64f* M, Ipp32s iterations,
                      Ipp64f min_triangle_side, Ipp32s nwp, Ipp8s* mask)
{
#define RANSAC_SIZE0 3
#define SUBSET_SELECTION_MAX_ATTEMPTS 100

    Ipp32s i, j, k, k1;
    Ipp64u rng = (Ipp64u)-1;
    Ipp8s* best_mask = mask + count;
    Ipp32s max_inlier_count = 0;

    // 1. find the consensus
    for( k = 0; k < iterations; k++ )
    {
        Ipp32s idx[RANSAC_SIZE0];
        IppiPoint_32f a[RANSAC_SIZE0];
        IppiPoint_32f b[RANSAC_SIZE0];
        Ipp32s inlier_count = 0;

        // choose random 3 non-complanar points from A & B
        for( i = 0; i < RANSAC_SIZE0; i++ )
        {
            for( k1 = 0; k1 < SUBSET_SELECTION_MAX_ATTEMPTS; k1++ )
            {
                idx[i] = OWN_RNG_NEXT(rng) % count;

                for( j = 0; j < i; j++ )
                {
                    if( idx[j] == idx[i] )
                        break;
                    // check that the points are not very close one each other
                    if( fabs(A[idx[i]].x - A[idx[j]].x) +
                        fabs(A[idx[i]].y - A[idx[j]].y) < min_triangle_side )
                        break;
                    if( fabs(B[idx[i]].x - B[idx[j]].x) +
                        fabs(B[idx[i]].y - B[idx[j]].y) < min_triangle_side )
                        break;
                }

                if( j < i )
                    continue;

                if( i+1 == RANSAC_SIZE0 )
                {
                    // additional check for non-complanar vectors
                    a[0] = A[idx[0]];
                    a[1] = A[idx[1]];
                    a[2] = A[idx[2]];

                    b[0] = B[idx[0]];
                    b[1] = B[idx[1]];
                    b[2] = B[idx[2]];

                    if( fabs((a[1].x - a[0].x)*(a[2].y - a[0].y) - (a[1].y - a[0].y)*(a[2].x - a[0].x)) < 1 ||
                        fabs((b[1].x - b[0].x)*(b[2].y - b[0].y) - (b[1].y - b[0].y)*(b[2].x - b[0].x)) < 1 )
                        continue;
                }
                break;
            }

            if( k1 >= SUBSET_SELECTION_MAX_ATTEMPTS )
                break;
        }

        if( i < RANSAC_SIZE0 )
            continue;

        // estimate the transformation using 3 points
        ownGetRTMatrix( a, b, RANSAC_SIZE0, M, nwp );

        for( i = 0; i < count; i++ )
        {
            mask[i] = fabs( M[0]*A[i].x + M[1]*A[i].y + M[2] - B[i].x ) +
                fabs( M[3]*A[i].x + M[4]*A[i].y + M[5] - B[i].y ) < 8;
            inlier_count += mask[i];
        }

        if( inlier_count >= max_inlier_count )
        {
            memcpy( best_mask, mask, count );
            max_inlier_count = inlier_count;
            if( max_inlier_count == count )
                break;
        }
    }

    if( max_inlier_count )
    {
        // 2. repack the points, leave only the good ones
        if( max_inlier_count < count )
            for( i = 0, j = 0; i < count; i++ )
            {
                if( best_mask[i] )
                {
                    A[j] = A[i];
                    B[j] = B[i];
                    j++;
                }
            }

            // 3. estimate the final transformation
            ownGetRTMatrix( A, B, max_inlier_count, M, nwp );
    }
    return max_inlier_count;
}

void ippVideoEncoderMPEG4::PyramidCalc(const Ipp8u* img, IppiPyramid* pyramid)
{
    pyramid->pImage[0] = (Ipp8u*)img;
    pyramid->pStep[0] = mStepLuma;
    for (Ipp32s i = 0; i < pyramid->level; i ++)
        ippiPyramidLayerDown_8u_C1R((const Ipp8u*)pyramid->pImage[i], pyramid->pStep[i], pyramid->pRoi[i],
                                    (Ipp8u*)pyramid->pImage[i+1], pyramid->pStep[i+1], pyramid->pRoi[i+1],
                                    (IppiPyramidDownState_8u_C1R*)(pyramid->pState));
}

#endif // USE_CV_GME

bool ippVideoEncoderMPEG4::FindTransformGMC()
{
    // only translation is implemented
    if (VOL.no_of_sprite_warping_points == 1) {
        Ipp32s  i, j;
        Ipp32s  *shX, *shY;
        Ipp32s  mbW, mbH, mbStep, xIndx, yIndx;
        mp4_Data_ME meData;

        meData.method = mMEmethod;
        meData.flags = mMEflags & (~ME_QP) & (~ME_USE_MVWEIGHT) & (~ME_CHROMA);
        meData.quant = 0;
        meData.fcode = 1;
        meData.rt = 0;
#ifdef USE_ME_SADBUFF
        meData.meBuff = mMEfastSAD;
#endif
        meData.thrDiff = meData.thrDiff16x16 = 256;
        meData.sadFunc = ippiSAD16x16_8u32s;
        meData.stepL = mStepLuma;
        meData.numPred = 1;
        mbStep = 4;
        mbW = ((mNumMacroBlockPerRow - 3) / mbStep) + 1;
        mbH = ((mNumMacroBlockPerCol - 3) / mbStep) + 1;
        shX = ippsMalloc_32s(mbW * mbH);
        shY = ippsMalloc_32s(mbW * mbH);
        yIndx = 1;
        for (i = 0; i < mbH; i ++) {
            meData.yT = -IPP_MIN(yIndx * 16 + 16, mPVOPsearchVer);
            meData.yB =  IPP_MIN((mNumMacroBlockPerCol - yIndx) * 16, mPVOPsearchVer);
            xIndx = 1;
            meData.yMB = i;
            for (j = 0; j < mbW; j ++) {
                meData.pYc = mFrameC->pY + yIndx * 16 * mStepLuma + xIndx * 16;
                meData.pYr = mFrameF->pY + yIndx * 16 * mStepLuma + xIndx * 16;
                meData.xMB = j;
                // SAD at (0,0)
                ippiSAD16x16_8u32s(meData.pYc, meData.stepL, meData.pYr, meData.stepL, &meData.bestDiff, IPPVC_MC_APX_FF);
                if (meData.bestDiff > meData.thrDiff16x16) {
                    meData.xL = -IPP_MIN(xIndx * 16 + 16, mPVOPsearchHor);
                    meData.xR =  IPP_MIN((mNumMacroBlockPerRow - xIndx) * 16, mPVOPsearchHor);
                    meData.mvPred[0].dx = (Ipp16s)VOP.warping_mv_code_du[0];
                    meData.mvPred[0].dy = (Ipp16s)VOP.warping_mv_code_dv[0];
                    mp4_ME_SAD(&meData);
                    shX[i*mbW+j] = meData.xPos;
                    shY[i*mbW+j] = meData.yPos;
                } else {
                    shX[i*mbW+j] = 0;
                    shY[i*mbW+j] = 0;
                }
                xIndx += mbStep;
            }
            yIndx += mbStep;
        }
        ippsSortAscend_32s_I(shX, mbW * mbH);
        ippsSortAscend_32s_I(shY, mbW * mbH);
        VOP.warping_mv_code_du[0] = shX[mbW * mbH >> 1];
        VOP.warping_mv_code_dv[0] = shY[mbW * mbH >> 1];
        ippsFree(shX);
        ippsFree(shY);
    }
#ifdef USE_CV_GME
    else if (VOL.no_of_sprite_warping_points > 1) {
        Ipp32s      i0, j0, i1, j1, i2, j2, i, j, k;
        Ipp64f      affmat[6];
        IppStatus   sts;
        Ipp32f      min_triangle_size;

        PyramidCalc(mFrameC->pY, mPyrC);
        PyramidCalc(mFrameF->pY, mPyrR);
        // make a uniform grid of tracked points
        for (i = 0, k = 0; i < mOptFlowNumPointY; i++)
            for (j = 0; j < mOptFlowNumPointX; j ++, k ++) {
                mOptFlowPtC[k].x = mOptFlowPtR[k].x = (j + 0.5f) * mPyrC->pRoi[0].width / mOptFlowNumPointX;
                mOptFlowPtC[k].y = mOptFlowPtR[k].y = (i + 0.5f) * mPyrC->pRoi[0].height / mOptFlowNumPointY;
            }
        // track the points
        sts = ippiOpticalFlowPyrLK_8u_C1R(mPyrC, mPyrR, mOptFlowPtC, mOptFlowPtR, mOptFlowPtSts,
                                          mOptFlowPatchDiff, mOptFlowNumPoint, mOptFlowWinSize,
                                          mPyrC->level, 20, 0.1f, mOptFlowState);
        if (sts == ippStsNoErr) {
            // compress the successfully tracked points
            for (i = j = 0; i < mOptFlowNumPoint; i++) {
                if (mOptFlowPtSts[i] == 0) {
                    mOptFlowPtC[j] = mOptFlowPtC[i];
                    mOptFlowPtR[j] = mOptFlowPtR[i];
                    j++;
                }
            }
            if (j == 0) {
                sts = ippStsSingularity;
            } else {
                // apply RANSAC algorithm to find the rigid transformation
                min_triangle_size = 20.f;
                min_triangle_size = IPP_MIN(min_triangle_size, mPyrC->pRoi[0].height * 0.1f);
                min_triangle_size = IPP_MIN(min_triangle_size, mPyrC->pRoi[0].width * 0.1f);
                i = ownGetRTMatrix_RANSAC(mOptFlowPtC, mOptFlowPtR, j, affmat, 100, min_triangle_size,
                                          VOL.no_of_sprite_warping_points, mOptFlowMask);
                sts = i < 0 ? (IppStatus)i : i * 5 > mOptFlowNumPoint ? ippStsOk : ippStsSingularity;
                /* meaning that there is no good transformation found */
            }
        }
        if (sts != ippStsNoErr) {
            VOP.warping_mv_code_du[0] = VOP.warping_mv_code_du[1] = VOP.warping_mv_code_du[2] = 0;
            VOP.warping_mv_code_dv[0] = VOP.warping_mv_code_dv[1] = VOP.warping_mv_code_dv[2] = 0;
            return false;
        }
        i0 = (Ipp32s)((affmat[0] *            0 + affmat[1] *             0 + affmat[2]) * 2.0);
        j0 = (Ipp32s)((affmat[3] *            0 + affmat[4] *             0 + affmat[5]) * 2.0);
        i1 = (Ipp32s)((affmat[0] * mSourceWidth + affmat[1] *             0 + affmat[2]) * 2.0);
        j1 = (Ipp32s)((affmat[3] * mSourceWidth + affmat[4] *             0 + affmat[5]) * 2.0);
        i2 = (Ipp32s)((affmat[0] *            0 + affmat[1] * mSourceHeight + affmat[2]) * 2.0);
        j2 = (Ipp32s)((affmat[3] *            0 + affmat[4] * mSourceHeight + affmat[5]) * 2.0);
        VOP.warping_mv_code_du[0] = i0;
        VOP.warping_mv_code_dv[0] = j0;
        VOP.warping_mv_code_du[1] = i1 - mSourceWidth * 2 - i0;
        VOP.warping_mv_code_dv[1] = j1 - j0;
        VOP.warping_mv_code_du[2] = i2 - i0;
        VOP.warping_mv_code_dv[2] = j2 - mSourceHeight * 2 - j0;
    }
#endif
    return true;
}

} // namespace MPEG4_ENC

#endif //defined (UMC_ENABLE_MPEG4_VIDEO_ENCODER)
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -