⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 checkasm.c.svn-base

📁 一个快速的H.264解码器
💻 SVN-BASE
📖 第 1 页 / 共 2 页
字号:
#include <stdio.h>#include <stdlib.h>#include <string.h>#include "common/common.h"#include "common/cpu.h"#ifdef HAVE_MMXEXT#include "common/i386/pixel.h"#include "common/i386/dct.h"#include "common/i386/mc.h"#endif#ifdef ARCH_PPC#include "common/ppc/pixel.h"#include "common/ppc/mc.h"#endif/* buf1, buf2: initialised to random data and shouldn't write into them */uint8_t * buf1, * buf2;/* buf3, buf4: used to store output */uint8_t * buf3, * buf4;/* buf5: temp */uint8_t * buf5;#define report( name ) { \    if( used_asm ) \        fprintf( stderr, " - %-21s [%s]\n", name, ok ? "OK" : "FAILED" ); \    if( !ok ) ret = -1; \}static int check_pixel( int cpu_ref, int cpu_new ){    x264_pixel_function_t pixel_c;    x264_pixel_function_t pixel_ref;    x264_pixel_function_t pixel_asm;    int ret = 0, ok, used_asm;    int i;    x264_pixel_init( 0, &pixel_c );    x264_pixel_init( cpu_ref, &pixel_ref );    x264_pixel_init( cpu_new, &pixel_asm );#define TEST_PIXEL( name ) \    for( i = 0, ok = 1, used_asm = 0; i < 7; i++ ) \    { \        int res_c, res_asm; \        if( pixel_asm.name[i] != pixel_ref.name[i] ) \        { \            used_asm = 1; \            res_c   = pixel_c.name[i]( buf1, 32, buf2, 24 ); \            res_asm = pixel_asm.name[i]( buf1, 32, buf2, 24 ); \            if( res_c != res_asm ) \            { \                ok = 0; \                fprintf( stderr, #name "[%d]: %d != %d [FAILED]\n", i, res_c, res_asm ); \            } \        } \    } \    report( "pixel " #name " :" );    TEST_PIXEL( sad );    TEST_PIXEL( ssd );    TEST_PIXEL( satd );    return ret;}static int check_dct( int cpu_ref, int cpu_new ){    x264_dct_function_t dct_c;    x264_dct_function_t dct_ref;    x264_dct_function_t dct_asm;    int ret = 0, ok, used_asm;    int16_t dct1[16][4][4] __attribute((aligned(16)));    int16_t dct2[16][4][4] __attribute((aligned(16)));    x264_dct_init( 0, &dct_c );    x264_dct_init( cpu_ref, &dct_ref);    x264_dct_init( cpu_new, &dct_asm );#define TEST_DCT( name, t1, t2, size ) \    if( dct_asm.name != dct_ref.name ) \    { \        used_asm = 1; \        dct_c.name( t1, buf1, 32, buf2, 24 ); \        dct_asm.name( t2, buf1, 32, buf2, 24 ); \        if( memcmp( t1, t2, size ) ) \        { \            ok = 0; \            fprintf( stderr, #name " [FAILED]\n" ); \        } \    }    ok = 1; used_asm = 0;    TEST_DCT( sub4x4_dct, dct1[0], dct2[0], 16*2 );    TEST_DCT( sub8x8_dct, dct1, dct2, 16*2*4 );    TEST_DCT( sub16x16_dct, dct1, dct2, 16*2*16 );    report( "sub_dct4 :" );    ok = 1; used_asm = 0;    TEST_DCT( sub8x8_dct8, (void*)dct1[0], (void*)dct2[0], 64*2 );    TEST_DCT( sub16x16_dct8, (void*)dct1, (void*)dct2, 64*2*4 );    report( "sub_dct8 :" );#undef TEST_DCT    /* copy coefs because idct8 modifies them in place */    memcpy( buf5, dct1, 512 );#define TEST_IDCT( name ) \    if( dct_asm.name != dct_ref.name ) \    { \        used_asm = 1; \        memcpy( buf3, buf1, 32*32 ); \        memcpy( buf4, buf1, 32*32 ); \        memcpy( dct1, buf5, 512 ); \        memcpy( dct2, buf5, 512 ); \        dct_c.name( buf3, 32, (void*)dct1 ); \        dct_asm.name( buf4, 32, (void*)dct2 ); \        if( memcmp( buf3, buf4, 32*32 ) ) \        { \            ok = 0; \            fprintf( stderr, #name " [FAILED]\n" ); \        } \    }    ok = 1; used_asm = 0;    TEST_IDCT( add4x4_idct );    TEST_IDCT( add8x8_idct );    TEST_IDCT( add16x16_idct );    report( "add_idct4 :" );    ok = 1; used_asm = 0;    TEST_IDCT( add8x8_idct8 );    TEST_IDCT( add16x16_idct8 );    report( "add_idct8 :" );#undef TEST_IDCT    ok = 1; used_asm = 0;    if( dct_asm.dct4x4dc != dct_ref.dct4x4dc )    {        int16_t dct1[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};        int16_t dct2[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};        used_asm = 1;        dct_c.dct4x4dc( dct1 );        dct_asm.dct4x4dc( dct2 );        if( memcmp( dct1, dct2, 32 ) )        {            ok = 0;            fprintf( stderr, " - dct4x4dc :        [FAILED]\n" );        }    }    if( dct_asm.dct4x4dc != dct_ref.dct4x4dc )    {        int16_t dct1[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};        int16_t dct2[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}};        used_asm = 1;        dct_c.idct4x4dc( dct1 );        dct_asm.idct4x4dc( dct2 );        if( memcmp( dct1, dct2, 32 ) )        {            ok = 0;            fprintf( stderr, " - idct4x4dc :        [FAILED]\n" );        }    }    report( "(i)dct4x4dc :" );    ok = 1; used_asm = 0;    if( dct_asm.dct2x2dc != dct_ref.dct2x2dc )    {        int16_t dct1[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};        int16_t dct2[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};        used_asm = 1;        dct_c.dct2x2dc( dct1 );        dct_asm.dct2x2dc( dct2 );        if( memcmp( dct1, dct2, 4*2 ) )        {            ok = 0;            fprintf( stderr, " - dct2x2dc :        [FAILED]\n" );        }    }    if( dct_asm.idct2x2dc != dct_ref.idct2x2dc )    {        int16_t dct1[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};        int16_t dct2[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}};        used_asm = 1;        dct_c.idct2x2dc( dct1 );        dct_asm.idct2x2dc( dct2 );        if( memcmp( dct1, dct2, 4*2 ) )        {            ok = 0;            fprintf( stderr, " - idct2x2dc :       [FAILED]\n" );        }    }    report( "(i)dct2x2dc :" );    return ret;}static int check_mc( int cpu_ref, int cpu_new ){    x264_mc_functions_t mc_c;    x264_mc_functions_t mc_ref;    x264_mc_functions_t mc_a;    uint8_t *src     = &buf1[2*32+2];    uint8_t *src2[4] = { &buf1[2*32+2],  &buf1[7*32+2],                         &buf1[12*32+2], &buf1[17*32+2] };    uint8_t *dst1    = &buf3[2*32+2];    uint8_t *dst2    = &buf4[2*32+2];    int dx, dy, i, w;    int ret = 0, ok, used_asm;    x264_mc_init( 0, &mc_c );    x264_mc_init( cpu_ref, &mc_ref );    x264_mc_init( cpu_new, &mc_a );#define MC_TEST_LUMA( w, h ) \        if( mc_a.mc_luma != mc_ref.mc_luma ) \        { \            used_asm = 1; \            memset(buf3, 0xCD, 1024); \            memset(buf4, 0xCD, 1024); \            mc_c.mc_luma( src2, 32, dst1, 16, dx, dy, w, h );     \            mc_a.mc_luma( src2, 32, dst2, 16, dx, dy, w, h );   \            if( memcmp( buf3, buf4, 1024 ) )               \            { \                fprintf( stderr, "mc_luma[mv(%d,%d) %2dx%-2d]     [FAILED]\n", dx, dy, w, h );   \                ok = 0; \            } \        }#define MC_TEST_CHROMA( w, h ) \        if( mc_a.mc_chroma != mc_ref.mc_chroma ) \        { \            used_asm = 1; \            memset(buf3, 0xCD, 1024); \            memset(buf4, 0xCD, 1024); \            mc_c.mc_chroma( src, 32, dst1, 16, dx, dy, w, h );     \            mc_a.mc_chroma( src, 32, dst2, 16, dx, dy, w, h );   \            if( memcmp( buf3, buf4, 1024 ) )               \            { \                fprintf( stderr, "mc_chroma[mv(%d,%d) %2dx%-2d]     [FAILED]\n", dx, dy, w, h );   \                ok = 0; \            } \        }    ok = 1; used_asm = 0;    for( dy = 0; dy < 4; dy++ )        for( dx = 0; dx < 4; dx++ )        {            MC_TEST_LUMA( 16, 16 );            MC_TEST_LUMA( 16, 8 );            MC_TEST_LUMA( 8, 16 );            MC_TEST_LUMA( 8, 8 );            MC_TEST_LUMA( 8, 4 );            MC_TEST_LUMA( 4, 8 );            MC_TEST_LUMA( 4, 4 );        }    report( "mc luma :" );    ok = 1; used_asm = 0;    for( dy = 0; dy < 9; dy++ )        for( dx = 0; dx < 9; dx++ )        {            MC_TEST_CHROMA( 8, 8 );            MC_TEST_CHROMA( 8, 4 );            MC_TEST_CHROMA( 4, 8 );            MC_TEST_CHROMA( 4, 4 );            MC_TEST_CHROMA( 4, 2 );            MC_TEST_CHROMA( 2, 4 );            MC_TEST_CHROMA( 2, 2 );        }    report( "mc chroma :" );#undef MC_TEST_LUMA#undef MC_TEST_CHROMA#define MC_TEST_AVG( name, ... ) \    for( i = 0, ok = 1, used_asm = 0; i < 10; i++ ) \    { \        memcpy( buf3, buf1, 1024 ); \        memcpy( buf4, buf1, 1024 ); \        if( mc_a.name[i] != mc_ref.name[i] ) \        { \            used_asm = 1; \            mc_c.name[i]( buf3, 32, buf2, 24, ##__VA_ARGS__ ); \            mc_a.name[i]( buf4, 32, buf2, 24, ##__VA_ARGS__ ); \            if( memcmp( buf3, buf4, 1024 ) )               \            { \                ok = 0; \                fprintf( stderr, #name "[%d]: [FAILED]\n", i ); \            } \        } \    }    MC_TEST_AVG( avg );    report( "mc avg :" );    for( w = -64; w <= 128 && ok; w++ )        MC_TEST_AVG( avg_weight, w );    report( "mc wpredb :" );    return ret;}static int check_deblock( int cpu_ref, int cpu_new ){    x264_deblock_function_t db_c;    x264_deblock_function_t db_ref;    x264_deblock_function_t db_a;    int ret = 0, ok = 1, used_asm = 0;    int alphas[36], betas[36];    int8_t tcs[36][4];    int a, c, i, j;    x264_deblock_init( 0, &db_c );    x264_deblock_init( cpu_ref, &db_ref );    x264_deblock_init( cpu_new, &db_a );    /* not exactly the real values of a,b,tc but close enough */    a = 255; c = 250;    for( i = 35; i >= 0; i-- )    {        alphas[i] = a;        betas[i] = (i+1)/2;        tcs[i][0] = tcs[i][2] = (c+6)/10;        tcs[i][1] = tcs[i][3] = (c+9)/20;        a = a*9/10;        c = c*9/10;    }#define TEST_DEBLOCK( name, ... ) \    for( i = 0; i < 36; i++ ) \    { \        for( j = 0; j < 1024; j++ ) \            /* two distributions of random to excersize different failure modes */\            buf1[j] = rand() & (i&1 ? 0xf : 0xff ); \

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -