⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 checkasm.c.svn-base

📁 此段代码是h.264在linux下的编码源程序,在linux下编译可以得到可执行程序
💻 SVN-BASE
📖 第 1 页 / 共 2 页
字号:
#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>#include "common/common.h"#include "common/cpu.h"#ifdef HAVE_MMXEXT#include "common/i386/pixel.h"#include "common/i386/dct.h"#include "common/i386/mc.h"#endif#ifdef ARCH_PPC#include "common/ppc/pixel.h"#include "common/ppc/mc.h"#endif/* buf1, buf2: initialised to random data and shouldn't write into them */uint8_t * buf1, * buf2;/* buf3, buf4: used to store output */uint8_t * buf3, * buf4;/* buf5: temp */uint8_t * buf5;#define report( name ) { \    if( used_asm ) \        fprintf( stderr, " - %-21s [%s]\n", name, ok ? "OK" : "FAILED" ); \    if( !ok ) ret = -1; \}static int check_pixel( int cpu_ref, int cpu_new ){    x264_pixel_function_t pixel_c;    x264_pixel_function_t pixel_ref;    x264_pixel_function_t pixel_asm;    x264_predict_t predict_16x16[4+3];    x264_predict_t predict_8x8c[4+3];    x264_predict_t predict_4x4[9+3];    x264_predict8x8_t predict_8x8[9+3];    DECLARE_ALIGNED( uint8_t, edge[33], 8 );    int ret = 0, ok, used_asm;    int i, j;    x264_pixel_init( 0, &pixel_c );    x264_pixel_init( cpu_ref, &pixel_ref );    x264_pixel_init( cpu_new, &pixel_asm );    x264_predict_16x16_init( 0, predict_16x16 );    x264_predict_8x8c_init( 0, predict_8x8c );    x264_predict_8x8_init( 0, predict_8x8 );    x264_predict_4x4_init( 0, predict_4x4 );    x264_predict_8x8_filter( buf2+40, edge, ALL_NEIGHBORS, ALL_NEIGHBORS );#define TEST_PIXEL( name ) \    for( i = 0, ok = 1, used_asm = 0; i < 7; i++ ) \    { \        int res_c, res_asm; \        if( pixel_asm.name[i] != pixel_ref.name[i] ) \        { \            used_asm = 1; \            res_c   = pixel_c.name[i]( buf1, 32, buf2, 16 ); \            res_asm = pixel_asm.name[i]( buf1, 32, buf2, 16 ); \            if( res_c != res_asm ) \            { \                ok = 0; \                fprintf( stderr, #name "[%d]: %d != %d [FAILED]\n", i, res_c, res_asm ); \            } \        } \    } \    report( "pixel " #name " :" );    TEST_PIXEL( sad );    TEST_PIXEL( ssd );    TEST_PIXEL( satd );    TEST_PIXEL( sa8d );#define TEST_PIXEL_X( N ) \    for( i = 0, ok = 1, used_asm = 0; i < 7; i++ ) \    { \        int res_c[4]={0}, res_asm[4]={0}; \        if( pixel_asm.sad_x##N[i] && pixel_asm.sad_x##N[i] != pixel_ref.sad_x##N[i] ) \        { \            used_asm = 1; \            res_c[0] = pixel_c.sad[i]( buf1, 16, buf2, 32 ); \            res_c[1] = pixel_c.sad[i]( buf1, 16, buf2+30, 32 ); \            res_c[2] = pixel_c.sad[i]( buf1, 16, buf2+1, 32 ); \            if(N==4) \            { \                res_c[3] = pixel_c.sad[i]( buf1, 16, buf2+99, 32 ); \                pixel_asm.sad_x4[i]( buf1, buf2, buf2+30, buf2+1, buf2+99, 32, res_asm ); \            } \            else \                pixel_asm.sad_x3[i]( buf1, buf2, buf2+30, buf2+1, 32, res_asm ); \            if( memcmp(res_c, res_asm, sizeof(res_c)) ) \            { \                ok = 0; \                fprintf( stderr, "sad_x"#N"[%d]: %d,%d,%d,%d != %d,%d,%d,%d [FAILED]\n", \                         i, res_c[0], res_c[1], res_c[2], res_c[3], \                         res_asm[0], res_asm[1], res_asm[2], res_asm[3] ); \            } \        } \    } \    report( "pixel sad_x"#N" :" );    TEST_PIXEL_X(3);    TEST_PIXEL_X(4);#define TEST_INTRA_SATD( name, pred, satd, i8x8, ... ) \    if( pixel_asm.name && pixel_asm.name != pixel_ref.name ) \    { \        int res_c[3], res_asm[3]; \        used_asm = 1; \        memcpy( buf3, buf2, 1024 ); \        for( i=0; i<3; i++ ) \        { \            pred[i]( buf3+40, ##__VA_ARGS__ ); \            res_c[i] = pixel_c.satd( buf1+40, 16, buf3+40, 32 ); \        } \        pixel_asm.name( buf1+40, i8x8 ? edge : buf3+40, res_asm ); \        if( memcmp(res_c, res_asm, sizeof(res_c)) ) \        { \            ok = 0; \            fprintf( stderr, #name": %d,%d,%d != %d,%d,%d [FAILED]\n", \                     res_c[0], res_c[1], res_c[2], \                     res_asm[0], res_asm[1], res_asm[2] ); \        } \    }    ok = 1; used_asm = 0;    TEST_INTRA_SATD( intra_satd_x3_16x16, predict_16x16, satd[PIXEL_16x16], 0 );    TEST_INTRA_SATD( intra_satd_x3_8x8c, predict_8x8c, satd[PIXEL_8x8], 0 );    TEST_INTRA_SATD( intra_satd_x3_4x4, predict_4x4, satd[PIXEL_4x4], 0 );    TEST_INTRA_SATD( intra_sa8d_x3_8x8, predict_8x8, sa8d[PIXEL_8x8], 1, edge );    report( "intra satd_x3 :" );    if( pixel_asm.ssim_4x4x2_core != pixel_ref.ssim_4x4x2_core ||        pixel_asm.ssim_end4 != pixel_ref.ssim_end4 )    {        float res_c, res_a;        ok = 1;        x264_cpu_restore( cpu_new );        res_c = x264_pixel_ssim_wxh( &pixel_c,   buf1+2, 32, buf2+2, 32, 32, 28 );        res_a = x264_pixel_ssim_wxh( &pixel_asm, buf1+2, 32, buf2+2, 32, 32, 28 );        if( fabs(res_c - res_a) > 1e-8 )        {            ok = 0;            fprintf( stderr, "ssim: %.7f != %.7f [FAILED]\n", res_c, res_a );        }        report( "ssim :" );    }    ok = 1; used_asm = 0;    for( i=0; i<4; i++ )        if( pixel_asm.ads[i] != pixel_ref.ads[i] )        {            uint16_t res_a[32], res_c[32];            uint16_t sums[72];            int dc[4];            for( j=0; j<72; j++ )                sums[j] = rand() & 0x3fff;            for( j=0; j<4; j++ )                dc[j] = rand() & 0x3fff;            used_asm = 1;            pixel_c.ads[i]( dc, sums, 32, res_c, 32 );            pixel_asm.ads[i]( dc, sums, 32, res_a, 32 );            if( memcmp(res_a, res_c, sizeof(res_c)) )                ok = 0;        }    report( "esa ads:" );    return ret;}static int check_dct( int cpu_ref, int cpu_new ){    x264_dct_function_t dct_c;    x264_dct_function_t dct_ref;    x264_dct_function_t dct_asm;    int ret = 0, ok, used_asm;    int16_t dct1[16][4][4] __attribute__((aligned(16)));    int16_t dct2[16][4][4] __attribute__((aligned(16)));    x264_dct_init( 0, &dct_c );    x264_dct_init( cpu_ref, &dct_ref);    x264_dct_init( cpu_new, &dct_asm );#define TEST_DCT( name, t1, t2, size ) \    if( dct_asm.name != dct_ref.name ) \    { \        used_asm = 1; \        dct_c.name( t1, buf1, buf2 ); \        dct_asm.name( t2, buf1, buf2 ); \        if( memcmp( t1, t2, size ) ) \        { \            ok = 0; \            fprintf( stderr, #name " [FAILED]\n" ); \        } \    }    ok = 1; used_asm = 0;    TEST_DCT( sub4x4_dct, dct1[0], dct2[0], 16*2 );    TEST_DCT( sub8x8_dct, dct1, dct2, 16*2*4 );    TEST_DCT( sub16x16_dct, dct1, dct2, 16*2*16 );    report( "sub_dct4 :" );    ok = 1; used_asm = 0;    TEST_DCT( sub8x8_dct8, (void*)dct1[0], (void*)dct2[0], 64*2 );    TEST_DCT( sub16x16_dct8, (void*)dct1, (void*)dct2, 64*2*4 );    report( "sub_dct8 :" );#undef TEST_DCT    /* copy coefs because idct8 modifies them in place */    memcpy( buf5, dct1, 512 );#define TEST_IDCT( name ) \    if( dct_asm.name != dct_ref.name ) \    { \        used_asm = 1; \        memcpy( buf3, buf1, 32*32 ); \        memcpy( buf4, buf1, 32*32 ); \        memcpy( dct1, buf5, 512 ); \        memcpy( dct2, buf5, 512 ); \        dct_c.name( buf3, (void*)dct1 ); \        dct_asm.name( buf4, (void*)dct2 ); \        if( memcmp( buf3, buf4, 32*32 ) ) \        { \            ok = 0; \            fprintf( stderr, #name " [FAILED]\n" ); \        } \    }    ok = 1; used_asm = 0;    TEST_IDCT( add4x4_idct );    TEST_IDCT( add8x8_idct );    TEST_IDCT( add16x16_idct );    report( "add_idct4 :" );    ok = 1; used_asm = 0;    TEST_IDCT( add8x8_idct8 );    TEST_IDCT( add16x16_idct8 );    report( "add_idct8 :" );#undef TEST_IDCT    ok = 1; used_asm = 0;    if( dct_asm.dct4x4dc != dct_ref.dct4x4dc )    {        int16_t dct1[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};        int16_t dct2[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};        used_asm = 1;        dct_c.dct4x4dc( dct1 );        dct_asm.dct4x4dc( dct2 );        if( memcmp( dct1, dct2, 32 ) )        {            ok = 0;            fprintf( stderr, " - dct4x4dc :        [FAILED]\n" );        }    }    if( dct_asm.dct4x4dc != dct_ref.dct4x4dc )    {        int16_t dct1[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};        int16_t dct2[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};        used_asm = 1;        dct_c.idct4x4dc( dct1 );        dct_asm.idct4x4dc( dct2 );        if( memcmp( dct1, dct2, 32 ) )        {            ok = 0;            fprintf( stderr, " - idct4x4dc :        [FAILED]\n" );        }    }    report( "(i)dct4x4dc :" );    ok = 1; used_asm = 0;    if( dct_asm.dct2x2dc != dct_ref.dct2x2dc )    {        int16_t dct1[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};        int16_t dct2[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};        used_asm = 1;        dct_c.dct2x2dc( dct1 );        dct_asm.dct2x2dc( dct2 );        if( memcmp( dct1, dct2, 4*2 ) )        {            ok = 0;            fprintf( stderr, " - dct2x2dc :        [FAILED]\n" );        }    }    if( dct_asm.idct2x2dc != dct_ref.idct2x2dc )    {        int16_t dct1[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};        int16_t dct2[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};        used_asm = 1;        dct_c.idct2x2dc( dct1 );        dct_asm.idct2x2dc( dct2 );        if( memcmp( dct1, dct2, 4*2 ) )        {            ok = 0;            fprintf( stderr, " - idct2x2dc :       [FAILED]\n" );        }    }    report( "(i)dct2x2dc :" );    return ret;}static int check_mc( int cpu_ref, int cpu_new ){    x264_mc_functions_t mc_c;    x264_mc_functions_t mc_ref;    x264_mc_functions_t mc_a;    uint8_t *src     = &buf1[2*32+2];    uint8_t *src2[4] = { &buf1[2*32+2],  &buf1[7*32+2],                         &buf1[12*32+2], &buf1[17*32+2] };    uint8_t *dst1    = &buf3[2*32+2];    uint8_t *dst2    = &buf4[2*32+2];    int dx, dy, i, j, w;    int ret = 0, ok, used_asm;    x264_mc_init( 0, &mc_c );    x264_mc_init( cpu_ref, &mc_ref );    x264_mc_init( cpu_new, &mc_a );#define MC_TEST_LUMA( w, h ) \        if( mc_a.mc_luma != mc_ref.mc_luma ) \        { \            used_asm = 1; \            memset(buf3, 0xCD, 1024); \            memset(buf4, 0xCD, 1024); \            mc_c.mc_luma( src2, 32, dst1, 16, dx, dy, w, h );     \            mc_a.mc_luma( src2, 32, dst2, 16, dx, dy, w, h );   \            if( memcmp( buf3, buf4, 1024 ) )               \            { \                fprintf( stderr, "mc_luma[mv(%d,%d) %2dx%-2d]     [FAILED]\n", dx, dy, w, h );   \                ok = 0; \            } \        }#define MC_TEST_CHROMA( w, h ) \        if( mc_a.mc_chroma != mc_ref.mc_chroma ) \        { \            used_asm = 1; \            memset(buf3, 0xCD, 1024); \            memset(buf4, 0xCD, 1024); \            mc_c.mc_chroma( src, 32, dst1, 16, dx, dy, w, h );     \            mc_a.mc_chroma( src, 32, dst2, 16, dx, dy, w, h );   \            /* mc_chroma width=2 may write garbage to the right of dst. ignore that. */\            for( j=0; j<h; j++ ) \                for( i=w; i<4; i++ ) \                    dst2[i+j*16] = dst1[i+j*16]; \            if( memcmp( buf3, buf4, 1024 ) )               \            { \                fprintf( stderr, "mc_chroma[mv(%d,%d) %2dx%-2d]     [FAILED]\n", dx, dy, w, h );   \                ok = 0; \            } \        }    ok = 1; used_asm = 0;    for( dy = 0; dy < 4; dy++ )        for( dx = 0; dx < 4; dx++ )        {            MC_TEST_LUMA( 16, 16 );            MC_TEST_LUMA( 16, 8 );            MC_TEST_LUMA( 8, 16 );            MC_TEST_LUMA( 8, 8 );            MC_TEST_LUMA( 8, 4 );            MC_TEST_LUMA( 4, 8 );            MC_TEST_LUMA( 4, 4 );        }    report( "mc luma :" );    ok = 1; used_asm = 0;    for( dy = -1; dy < 9; dy++ )        for( dx = -1; dx < 9; dx++ )        {            MC_TEST_CHROMA( 8, 8 );            MC_TEST_CHROMA( 8, 4 );            MC_TEST_CHROMA( 4, 8 );            MC_TEST_CHROMA( 4, 4 );            MC_TEST_CHROMA( 4, 2 );            MC_TEST_CHROMA( 2, 4 );            MC_TEST_CHROMA( 2, 2 );        }    report( "mc chroma :" );#undef MC_TEST_LUMA#undef MC_TEST_CHROMA#define MC_TEST_AVG( name, ... ) \    for( i = 0, ok = 1, used_asm = 0; i < 10; i++ ) \    { \        memcpy( buf3, buf1, 1024 ); \        memcpy( buf4, buf1, 1024 ); \        if( mc_a.name[i] != mc_ref.name[i] ) \        { \            used_asm = 1; \            mc_c.name[i]( buf3, 32, buf2, 16, ##__VA_ARGS__ ); \            mc_a.name[i]( buf4, 32, buf2, 16, ##__VA_ARGS__ ); \            if( memcmp( buf3, buf4, 1024 ) )               \

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -