📄 mp4_enc_gme.cpp

📁 audio-video-codecs.rar语音编解码器
💻 CPP
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* ///////////////////////////////////////////////////////////////////////
//
//               INTEL CORPORATION PROPRIETARY INFORMATION
//  This software is supplied under the terms of a license agreement or
//  nondisclosure agreement with Intel Corporation and may not be copied
//  or disclosed except in accordance with the terms of that agreement.
//        Copyright (c) 2003-2007 Intel Corporation. All Rights Reserved.
//
//  Description:    GME
//
*/

#include "umc_defs.h"

#if defined (UMC_ENABLE_MPEG4_VIDEO_ENCODER)

#include <math.h>
#include "mp4_enc.hpp"
#ifdef USE_CV_GME
#include <float.h>
#endif

namespace MPEG4_ENC
{

#ifdef USE_CV_GME

#define OWN_RNG_NEXT(rng)  \
    ((rng)=(Ipp64u)(Ipp32u)(rng)*1554115554 + ((rng)>>32), (Ipp32u)(rng))

/////////////////////////////////////////////////////////////////////////////////////////
/*                                                                                     */
/*                Singular Value Decomposition and Back Substitution                   */
/*                                                                                     */
/////////////////////////////////////////////////////////////////////////////////////////

#define ownGivens_64f( n, x, y, c, s ) \
{                                      \
    Ipp32s _i;                         \
    Ipp64f* _x = (x);                  \
    Ipp64f* _y = (y);                  \
                                       \
    for( _i = 0; _i < n; _i++ )        \
    {                                  \
        Ipp64f t0 = _x[_i];            \
        Ipp64f t1 = _y[_i];            \
        _x[_i] = t0*c + t1*s;          \
        _y[_i] = -t0*s + t1*c;         \
    }                                  \
}

/* y[0:m,0:n] += diag(a[0:1,0:m]) * x[0:m,0:n] */
static void
ownMatrAXPY_64f(Ipp32s m, Ipp32s n, const Ipp64f* x, Ipp32s dx,
                const Ipp64f* a, Ipp64f* y, Ipp32s dy)
{
    Ipp32s i, j;

    for( i = 0; i < m; i++, x += dx, y += dy )
    {
        Ipp64f s = a[i];

        for( j = 0; j <= n - 4; j += 4 )
        {
            Ipp64f t0 = y[j]   + s*x[j];
            Ipp64f t1 = y[j+1] + s*x[j+1];
            y[j]   = t0;
            y[j+1] = t1;
            t0 = y[j+2] + s*x[j+2];
            t1 = y[j+3] + s*x[j+3];
            y[j+2] = t0;
            y[j+3] = t1;
        }

        for( ; j < n; j++ ) y[j] += s*x[j];
    }
}

/* y[1:m,-1] = h*y[1:m,0:n]*x[0:1,0:n]'*x[-1]  (this is used for U&V reconstruction)
y[1:m,0:n] += h*y[1:m,0:n]*x[0:1,0:n]'*x[0:1,0:n] */
static void
ownMatrAXPY3_64f( Ipp32s m, Ipp32s n, const Ipp64f* x, Ipp32s l, Ipp64f* y, Ipp64f h )
{
    Ipp32s i, j;

    for( i = 1; i < m; i++ )
    {
        Ipp64f s = 0;

        y += l;

        for( j = 0; j <= n - 4; j += 4 )
            s += x[j]*y[j] + x[j+1]*y[j+1] + x[j+2]*y[j+2] + x[j+3]*y[j+3];

        for( ; j < n; j++ )  s += x[j]*y[j];

        s *= h;
        y[-1] = s*x[-1];

        for( j = 0; j <= n - 4; j += 4 )
        {
            Ipp64f t0 = y[j]   + s*x[j];
            Ipp64f t1 = y[j+1] + s*x[j+1];
            y[j]   = t0;
            y[j+1] = t1;
            t0 = y[j+2] + s*x[j+2];
            t1 = y[j+3] + s*x[j+3];
            y[j+2] = t0;
            y[j+3] = t1;
        }

        for( ; j < n; j++ ) y[j] += s*x[j];
    }
}

/* accurate hypotenuse calculation */
static Ipp64f
ownPythag( Ipp64f a, Ipp64f b )
{
    a = fabs( a );
    b = fabs( b );
    if( a > b )
    {
        b /= a;
        a *= sqrt( 1. + b * b );
    }
    else if( b != 0 )
    {
        a /= b;
        a = b * sqrt( 1. + a * a );
    }
    return a;
}

#define MAX_ITERS  30

/* decomposes a (m x n,  m >=n): a = (uT)'*w*vT,
where uT and vT are orthogonal, uT is nu x m (nu=m or nu=n), is w is diagonal.
Only the diagonal of w is stored. uT and vT are optional,
lda, lduT and ldvT are strides (steps) of a, uT and vT, respectively, in
_elements_, not in bytes. the buffer must be at least n + 2*m + 1 elements large. */
static void
ownSVD_64f(Ipp64f* a, Ipp32s lda, Ipp32s m, Ipp32s n,
           Ipp64f* w,
           Ipp64f* uT, Ipp32s lduT, Ipp32s nu,
           Ipp64f* vT, Ipp32s ldvT,
           Ipp64f* buffer)
{
    Ipp64f* e;
    Ipp64f* temp;
    Ipp64f *w1, *e1;
    Ipp64f *hv;
    Ipp64f ku0 = 0, kv0 = 0;
    Ipp64f anorm = 0;
    Ipp64f *a1, *u0 = uT, *v0 = vT;
    Ipp64f scale, h;
    Ipp32s i, j, k, l;
    Ipp32s m1, n1;
    Ipp32s nv = n;
    Ipp32s iters = 0;
    Ipp64f* hv0;

    e = buffer;
    w1 = w;
    e1 = e + 1;

    temp = e + n;
    hv0 = temp + m + 1;

    memset( w, 0, n * sizeof( w[0] ));
    memset( e, 0, n * sizeof( e[0] ));

    m1 = m;
    n1 = n;

    /* transform a to bi-diagonal form */
    for( ;; )
    {
        Ipp32s update_u;
        Ipp32s update_v;

        if( m1 == 0 )
            break;

        scale = h = 0;
        update_u = uT && m1 > m - nu;
        hv = update_u ? uT : hv0;

        for( j = 0, a1 = a; j < m1; j++, a1 += lda )
        {
            Ipp64f t = a1[0];
            scale += fabs( hv[j] = t );
        }

        if( scale != 0 )
        {
            Ipp64f f = 1./scale, g, s = 0;

            for( j = 0; j < m1; j++ )
            {
                Ipp64f t = (hv[j] *= f);
                s += t * t;
            }

            g = sqrt( s );
            f = hv[0];
            if( f >= 0 )
                g = -g;
            hv[0] = f - g;
            h = 1. / (f * g - s);

            memset( temp, 0, n1 * sizeof( temp[0] ));

            /* calc temp[0:n-i] = a[i:m,i:n]'*hv[0:m-i] */
            ownMatrAXPY_64f( m1, n1 - 1, a + 1, lda, hv, temp + 1, 0 );
            for( k = 1; k < n1; k++ ) temp[k] *= h;

            /* modify a: a[i:m,i:n] = a[i:m,i:n] + hv[0:m-i]*temp[0:n-i]' */
            ownMatrAXPY_64f( m1, n1 - 1, temp + 1, 0, hv, a + 1, lda );
            *w1 = g*scale;
        }
        w1++;

        /* store -2/(hv'*hv) */
        if( update_u )
        {
            if( m1 == m )
                ku0 = h;
            else
                hv[-1] = h;
        }

        a++;
        n1--;
        if( vT )
            vT += ldvT + 1;

        if( n1 == 0 )
            break;

        scale = h = 0;
        update_v = vT && n1 > n - nv;

        hv = update_v ? vT : hv0;

        for( j = 0; j < n1; j++ )
        {
            Ipp64f t = a[j];
            scale += fabs( hv[j] = t );
        }

        if( scale != 0 )
        {
            Ipp64f f = 1./scale, g, s = 0;

            for( j = 0; j < n1; j++ )
            {
                Ipp64f t = (hv[j] *= f);
                s += t * t;
            }

            g = sqrt( s );
            f = hv[0];
            if( f >= 0 )
                g = -g;
            hv[0] = f - g;
            h = 1. / (f * g - s);
            hv[-1] = 0.;

            /* update a[i:m:i+1:n] = a[i:m,i+1:n] + (a[i:m,i+1:n]*hv[0:m-i])*... */
            ownMatrAXPY3_64f( m1, n1, hv, lda, a, h );

            *e1 = g*scale;
        }
        e1++;

        /* store -2/(hv'*hv) */
        if( update_v )
        {
            if( n1 == n )
                kv0 = h;
            else
                hv[-1] = h;
        }

        a += lda;
        m1--;
        if( uT )
            uT += lduT + 1;
    }

    m1 -= m1 != 0;
    n1 -= n1 != 0;

    /* accumulate left transformations */
    if( uT )
    {
        m1 = m - m1;
        uT = u0 + m1 * lduT;
        for( i = m1; i < nu; i++, uT += lduT )
        {
            memset( uT + m1, 0, (m - m1) * sizeof( uT[0] ));
            uT[i] = 1.;
        }

        for( i = m1 - 1; i >= 0; i-- )
        {
            Ipp64f s;
            Ipp32s lh = nu - i;

            l = m - i;

            hv = u0 + (lduT + 1) * i;
            h = i == 0 ? ku0 : hv[-1];

            //            VM_ASSERT( h <= 0 );

            if( h != 0 )
            {
                uT = hv;
                ownMatrAXPY3_64f( lh, l-1, hv+1, lduT, uT+1, h );

                s = hv[0] * h;
                for( k = 0; k < l; k++ ) hv[k] *= s;
                hv[0] += 1;
            }
            else
            {
                for( j = 1; j < l; j++ )
                    hv[j] = 0;
                for( j = 1; j < lh; j++ )
                    hv[j * lduT] = 0;
                hv[0] = 1;
            }
        }
        uT = u0;
    }

    /* accumulate right transformations */
    if( vT )
    {
        n1 = n - n1;
        vT = v0 + n1 * ldvT;
        for( i = n1; i < nv; i++, vT += ldvT )
        {
            memset( vT + n1, 0, (n - n1) * sizeof( vT[0] ));
            vT[i] = 1.;
        }

        for( i = n1 - 1; i >= 0; i-- )
        {
            Ipp64f s;
            Ipp32s lh = nv - i;

            l = n - i;
            hv = v0 + (ldvT + 1) * i;
            h = i == 0 ? kv0 : hv[-1];

            //            VM_ASSERT( h <= 0 );

            if( h != 0 )
            {
                vT = hv;
                ownMatrAXPY3_64f( lh, l-1, hv+1, ldvT, vT+1, h );

                s = hv[0] * h;
                for( k = 0; k < l; k++ ) hv[k] *= s;
                hv[0] += 1;
            }
            else
            {
                for( j = 1; j < l; j++ )
                    hv[j] = 0;
                for( j = 1; j < lh; j++ )
                    hv[j * ldvT] = 0;
                hv[0] = 1;
            }
        }
        vT = v0;
    }

    for( i = 0; i < n; i++ )
    {
        Ipp64f tnorm = fabs( w[i] );
        tnorm += fabs( e[i] );

        if( anorm < tnorm )
            anorm = tnorm;
    }

    anorm *= DBL_EPSILON;

    /* diagonalization of the bidiagonal form */
    for( k = n - 1; k >= 0; k-- )
    {
        Ipp64f z = 0;
        iters = 0;

        for( ;; )               /* do iterations */
        {
            Ipp64f c, s, f, g, x, y;
            Ipp32s flag = 0;

            /* test for splitting */
            for( l = k; l >= 0; l-- )
            {
                if( fabs(e[l]) <= anorm )
                {
                    flag = 1;
                    break;
                }
                //                VM_ASSERT( l > 0 );
                if( fabs(w[l - 1]) <= anorm )
                    break;
            }

            if( !flag )
            {
                c = 0;
                s = 1;

                for( i = l; i <= k; i++ )
                {
                    f = s * e[i];

                    e[i] *= c;

                    if( anorm + fabs( f ) == anorm )
                        break;

                    g = w[i];
                    h = ownPythag( f, g );
                    w[i] = h;
                    c = g / h;
                    s = -f / h;

                    if( uT )
                        ownGivens_64f( m, uT + lduT * (l - 1), uT + lduT * i, c, s );
                }
            }

            z = w[k];
            if( l == k || iters++ == MAX_ITERS )
                break;

            /* shift from bottom 2x2 minor */
            x = w[l];
            y = w[k - 1];
            g = e[k - 1];
            h = e[k];
            f = 0.5 * (((g + z) / h) * ((g - z) / y) + y / h - h / y);
            g = ownPythag( f, 1 );
            if( f < 0 )
                g = -g;
            f = x - (z / x) * z + (h / x) * (y / (f + g) - h);
            /* next QR transformation */
            c = s = 1;

            for( i = l + 1; i <= k; i++ )
            {
                g = e[i];
                y = w[i];
                h = s * g;
                g *= c;
                z = ownPythag( f, h );
                e[i - 1] = z;
                c = f / z;
                s = h / z;
                f = x * c + g * s;
                g = -x * s + g * c;
                h = y * s;
                y *= c;

                if( vT )
                    ownGivens_64f( n, vT + ldvT * (i - 1), vT + ldvT * i, c, s );

                z = ownPythag( f, h );
                w[i - 1] = z;

                /* rotation can be arbitrary if z == 0 */
                if( z != 0 )
                {
                    c = f / z;
                    s = h / z;
                }
                f = c * g + s * y;
                x = -s * g + c * y;

                if( uT )
                    ownGivens_64f( m, uT + lduT * (i - 1), uT + lduT * i, c, s );
            }

            e[l] = 0;
            e[k] = f;
            w[k] = x;
        }                       /* end of iteration loop */

        if( iters > MAX_ITERS )
            break;
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -