📄 checkasm.c.svn-base
字号:
#include <stdio.h>#include <stdlib.h>#include <string.h>#include "common/common.h"#include "common/cpu.h"#ifdef HAVE_MMXEXT#include "common/i386/pixel.h"#include "common/i386/dct.h"#include "common/i386/mc.h"#endif#ifdef ARCH_PPC#include "common/ppc/pixel.h"#include "common/ppc/mc.h"#endif/* buf1, buf2: initialised to random data and shouldn't write into them */uint8_t * buf1, * buf2;/* buf3, buf4: used to store output */uint8_t * buf3, * buf4;/* buf5: temp */uint8_t * buf5;#define report( name ) { \ if( used_asm ) \ fprintf( stderr, " - %-21s [%s]\n", name, ok ? "OK" : "FAILED" ); \ if( !ok ) ret = -1; \}static int check_pixel( int cpu_ref, int cpu_new ){ x264_pixel_function_t pixel_c; x264_pixel_function_t pixel_ref; x264_pixel_function_t pixel_asm; int ret = 0, ok, used_asm; int i; x264_pixel_init( 0, &pixel_c ); x264_pixel_init( cpu_ref, &pixel_ref ); x264_pixel_init( cpu_new, &pixel_asm );#define TEST_PIXEL( name ) \ for( i = 0, ok = 1, used_asm = 0; i < 7; i++ ) \ { \ int res_c, res_asm; \ if( pixel_asm.name[i] != pixel_ref.name[i] ) \ { \ used_asm = 1; \ res_c = pixel_c.name[i]( buf1, 32, buf2, 24 ); \ res_asm = pixel_asm.name[i]( buf1, 32, buf2, 24 ); \ if( res_c != res_asm ) \ { \ ok = 0; \ fprintf( stderr, #name "[%d]: %d != %d [FAILED]\n", i, res_c, res_asm ); \ } \ } \ } \ report( "pixel " #name " :" ); TEST_PIXEL( sad ); TEST_PIXEL( ssd ); TEST_PIXEL( satd ); return ret;}static int check_dct( int cpu_ref, int cpu_new ){ x264_dct_function_t dct_c; x264_dct_function_t dct_ref; x264_dct_function_t dct_asm; int ret = 0, ok, used_asm; int16_t dct1[16][4][4] __attribute((aligned(16))); int16_t dct2[16][4][4] __attribute((aligned(16))); x264_dct_init( 0, &dct_c ); x264_dct_init( cpu_ref, &dct_ref); x264_dct_init( cpu_new, &dct_asm );#define TEST_DCT( name, t1, t2, size ) \ if( dct_asm.name != dct_ref.name ) \ { \ used_asm = 1; \ dct_c.name( t1, buf1, 32, buf2, 24 ); \ dct_asm.name( t2, buf1, 32, buf2, 24 ); \ if( memcmp( t1, t2, size ) ) \ { \ ok = 0; \ fprintf( stderr, #name " [FAILED]\n" ); \ } \ } ok = 1; used_asm = 0; TEST_DCT( sub4x4_dct, dct1[0], dct2[0], 16*2 ); TEST_DCT( sub8x8_dct, dct1, dct2, 16*2*4 ); TEST_DCT( sub16x16_dct, dct1, dct2, 16*2*16 ); report( "sub_dct4 :" ); ok = 1; used_asm = 0; TEST_DCT( sub8x8_dct8, (void*)dct1[0], (void*)dct2[0], 64*2 ); TEST_DCT( sub16x16_dct8, (void*)dct1, (void*)dct2, 64*2*4 ); report( "sub_dct8 :" );#undef TEST_DCT /* copy coefs because idct8 modifies them in place */ memcpy( buf5, dct1, 512 );#define TEST_IDCT( name ) \ if( dct_asm.name != dct_ref.name ) \ { \ used_asm = 1; \ memcpy( buf3, buf1, 32*32 ); \ memcpy( buf4, buf1, 32*32 ); \ memcpy( dct1, buf5, 512 ); \ memcpy( dct2, buf5, 512 ); \ dct_c.name( buf3, 32, (void*)dct1 ); \ dct_asm.name( buf4, 32, (void*)dct2 ); \ if( memcmp( buf3, buf4, 32*32 ) ) \ { \ ok = 0; \ fprintf( stderr, #name " [FAILED]\n" ); \ } \ } ok = 1; used_asm = 0; TEST_IDCT( add4x4_idct ); TEST_IDCT( add8x8_idct ); TEST_IDCT( add16x16_idct ); report( "add_idct4 :" ); ok = 1; used_asm = 0; TEST_IDCT( add8x8_idct8 ); TEST_IDCT( add16x16_idct8 ); report( "add_idct8 :" );#undef TEST_IDCT ok = 1; used_asm = 0; if( dct_asm.dct4x4dc != dct_ref.dct4x4dc ) { int16_t dct1[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}}; int16_t dct2[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}}; used_asm = 1; dct_c.dct4x4dc( dct1 ); dct_asm.dct4x4dc( dct2 ); if( memcmp( dct1, dct2, 32 ) ) { ok = 0; fprintf( stderr, " - dct4x4dc : [FAILED]\n" ); } } if( dct_asm.dct4x4dc != dct_ref.dct4x4dc ) { int16_t dct1[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}}; int16_t dct2[4][4] __attribute((aligned(16))) = { {-12, 42, 23, 67},{2, 90, 89,56}, {67,43,-76,91},{56,-78,-54,1}}; used_asm = 1; dct_c.idct4x4dc( dct1 ); dct_asm.idct4x4dc( dct2 ); if( memcmp( dct1, dct2, 32 ) ) { ok = 0; fprintf( stderr, " - idct4x4dc : [FAILED]\n" ); } } report( "(i)dct4x4dc :" ); ok = 1; used_asm = 0; if( dct_asm.dct2x2dc != dct_ref.dct2x2dc ) { int16_t dct1[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}}; int16_t dct2[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}}; used_asm = 1; dct_c.dct2x2dc( dct1 ); dct_asm.dct2x2dc( dct2 ); if( memcmp( dct1, dct2, 4*2 ) ) { ok = 0; fprintf( stderr, " - dct2x2dc : [FAILED]\n" ); } } if( dct_asm.idct2x2dc != dct_ref.idct2x2dc ) { int16_t dct1[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}}; int16_t dct2[2][2] __attribute((aligned(16))) = { {-12, 42},{2, 90}}; used_asm = 1; dct_c.idct2x2dc( dct1 ); dct_asm.idct2x2dc( dct2 ); if( memcmp( dct1, dct2, 4*2 ) ) { ok = 0; fprintf( stderr, " - idct2x2dc : [FAILED]\n" ); } } report( "(i)dct2x2dc :" ); return ret;}static int check_mc( int cpu_ref, int cpu_new ){ x264_mc_functions_t mc_c; x264_mc_functions_t mc_ref; x264_mc_functions_t mc_a; uint8_t *src = &buf1[2*32+2]; uint8_t *src2[4] = { &buf1[2*32+2], &buf1[7*32+2], &buf1[12*32+2], &buf1[17*32+2] }; uint8_t *dst1 = &buf3[2*32+2]; uint8_t *dst2 = &buf4[2*32+2]; int dx, dy, i, w; int ret = 0, ok, used_asm; x264_mc_init( 0, &mc_c ); x264_mc_init( cpu_ref, &mc_ref ); x264_mc_init( cpu_new, &mc_a );#define MC_TEST_LUMA( w, h ) \ if( mc_a.mc_luma != mc_ref.mc_luma ) \ { \ used_asm = 1; \ memset(buf3, 0xCD, 1024); \ memset(buf4, 0xCD, 1024); \ mc_c.mc_luma( src2, 32, dst1, 16, dx, dy, w, h ); \ mc_a.mc_luma( src2, 32, dst2, 16, dx, dy, w, h ); \ if( memcmp( buf3, buf4, 1024 ) ) \ { \ fprintf( stderr, "mc_luma[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w, h ); \ ok = 0; \ } \ }#define MC_TEST_CHROMA( w, h ) \ if( mc_a.mc_chroma != mc_ref.mc_chroma ) \ { \ used_asm = 1; \ memset(buf3, 0xCD, 1024); \ memset(buf4, 0xCD, 1024); \ mc_c.mc_chroma( src, 32, dst1, 16, dx, dy, w, h ); \ mc_a.mc_chroma( src, 32, dst2, 16, dx, dy, w, h ); \ if( memcmp( buf3, buf4, 1024 ) ) \ { \ fprintf( stderr, "mc_chroma[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w, h ); \ ok = 0; \ } \ } ok = 1; used_asm = 0; for( dy = 0; dy < 4; dy++ ) for( dx = 0; dx < 4; dx++ ) { MC_TEST_LUMA( 16, 16 ); MC_TEST_LUMA( 16, 8 ); MC_TEST_LUMA( 8, 16 ); MC_TEST_LUMA( 8, 8 ); MC_TEST_LUMA( 8, 4 ); MC_TEST_LUMA( 4, 8 ); MC_TEST_LUMA( 4, 4 ); } report( "mc luma :" ); ok = 1; used_asm = 0; for( dy = 0; dy < 9; dy++ ) for( dx = 0; dx < 9; dx++ ) { MC_TEST_CHROMA( 8, 8 ); MC_TEST_CHROMA( 8, 4 ); MC_TEST_CHROMA( 4, 8 ); MC_TEST_CHROMA( 4, 4 ); MC_TEST_CHROMA( 4, 2 ); MC_TEST_CHROMA( 2, 4 ); MC_TEST_CHROMA( 2, 2 ); } report( "mc chroma :" );#undef MC_TEST_LUMA#undef MC_TEST_CHROMA#define MC_TEST_AVG( name, ... ) \ for( i = 0, ok = 1, used_asm = 0; i < 10; i++ ) \ { \ memcpy( buf3, buf1, 1024 ); \ memcpy( buf4, buf1, 1024 ); \ if( mc_a.name[i] != mc_ref.name[i] ) \ { \ used_asm = 1; \ mc_c.name[i]( buf3, 32, buf2, 24, ##__VA_ARGS__ ); \ mc_a.name[i]( buf4, 32, buf2, 24, ##__VA_ARGS__ ); \ if( memcmp( buf3, buf4, 1024 ) ) \ { \ ok = 0; \ fprintf( stderr, #name "[%d]: [FAILED]\n", i ); \ } \ } \ } MC_TEST_AVG( avg ); report( "mc avg :" ); for( w = -64; w <= 128 && ok; w++ ) MC_TEST_AVG( avg_weight, w ); report( "mc wpredb :" ); return ret;}static int check_deblock( int cpu_ref, int cpu_new ){ x264_deblock_function_t db_c; x264_deblock_function_t db_ref; x264_deblock_function_t db_a; int ret = 0, ok = 1, used_asm = 0; int alphas[36], betas[36]; int8_t tcs[36][4]; int a, c, i, j; x264_deblock_init( 0, &db_c ); x264_deblock_init( cpu_ref, &db_ref ); x264_deblock_init( cpu_new, &db_a ); /* not exactly the real values of a,b,tc but close enough */ a = 255; c = 250; for( i = 35; i >= 0; i-- ) { alphas[i] = a; betas[i] = (i+1)/2; tcs[i][0] = tcs[i][2] = (c+6)/10; tcs[i][1] = tcs[i][3] = (c+9)/20; a = a*9/10; c = c*9/10; }#define TEST_DEBLOCK( name, ... ) \ for( i = 0; i < 36; i++ ) \ { \ for( j = 0; j < 1024; j++ ) \ /* two distributions of random to excersize different failure modes */\ buf1[j] = rand() & (i&1 ? 0xf : 0xff ); \
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -