📄 checkasm.c
字号:
#define MC_TEST_AVG( name, ... ) \ for( i = 0, ok = 1, used_asm = 0; i < 10; i++ ) \ { \ memcpy( buf3, buf1, 1024 ); \ memcpy( buf4, buf1, 1024 ); \ if( mc_a.name[i] != mc_ref.name[i] ) \ { \ used_asm = 1; \ mc_c.name[i]( buf3, 32, buf2, 16, ##__VA_ARGS__ ); \ mc_a.name[i]( buf4, 32, buf2, 16, ##__VA_ARGS__ ); \ if( memcmp( buf3, buf4, 1024 ) ) \ { \ ok = 0; \ fprintf( stderr, #name "[%d]: [FAILED]\n", i ); \ } \ } \ } MC_TEST_AVG( avg ); report( "mc avg :" ); for( w = -64; w <= 128 && ok; w++ ) MC_TEST_AVG( avg_weight, w ); report( "mc wpredb :" ); return ret;}static int check_deblock( int cpu_ref, int cpu_new ){ x264_deblock_function_t db_c; x264_deblock_function_t db_ref; x264_deblock_function_t db_a; int ret = 0, ok = 1, used_asm = 0; int alphas[36], betas[36]; int8_t tcs[36][4]; int a, c, i, j; x264_deblock_init( 0, &db_c ); x264_deblock_init( cpu_ref, &db_ref ); x264_deblock_init( cpu_new, &db_a ); /* not exactly the real values of a,b,tc but close enough */ a = 255; c = 250; for( i = 35; i >= 0; i-- ) { alphas[i] = a; betas[i] = (i+1)/2; tcs[i][0] = tcs[i][2] = (c+6)/10; tcs[i][1] = tcs[i][3] = (c+9)/20; a = a*9/10; c = c*9/10; }#define TEST_DEBLOCK( name, ... ) \ for( i = 0; i < 36; i++ ) \ { \ for( j = 0; j < 1024; j++ ) \ /* two distributions of random to excersize different failure modes */\ buf1[j] = rand() & (i&1 ? 0xf : 0xff ); \ memcpy( buf3, buf1, 1024 ); \ memcpy( buf4, buf1, 1024 ); \ if( db_a.name != db_ref.name ) \ { \ used_asm = 1; \ db_c.name( &buf3[8*32], 32, alphas[i], betas[i], ##__VA_ARGS__ ); \ db_a.name( &buf4[8*32], 32, alphas[i], betas[i], ##__VA_ARGS__ ); \ if( memcmp( buf3, buf4, 1024 ) ) \ { \ ok = 0; \ fprintf( stderr, #name "(a=%d, b=%d): [FAILED]\n", alphas[i], betas[i] ); \ break; \ } \ } \ } TEST_DEBLOCK( deblock_h_luma, tcs[i] ); TEST_DEBLOCK( deblock_v_luma, tcs[i] ); TEST_DEBLOCK( deblock_h_chroma, tcs[i] ); TEST_DEBLOCK( deblock_v_chroma, tcs[i] ); TEST_DEBLOCK( deblock_h_luma_intra ); TEST_DEBLOCK( deblock_v_luma_intra ); TEST_DEBLOCK( deblock_h_chroma_intra ); TEST_DEBLOCK( deblock_v_chroma_intra ); report( "deblock :" ); return ret;}static int check_quant( int cpu_ref, int cpu_new ){ x264_quant_function_t qf_c; x264_quant_function_t qf_ref; x264_quant_function_t qf_a; int16_t dct1[64] __attribute__((__aligned__(16))); int16_t dct2[64] __attribute__((__aligned__(16))); uint8_t cqm_buf[64] __attribute__((__aligned__(16))); int ret = 0, ok, used_asm; int oks[2] = {1,1}, used_asms[2] = {0,0}; int i, i_cqm, qp; x264_t h_buf; x264_t *h = &h_buf; h->pps = h->pps_array; x264_param_default( &h->param ); h->param.rc.i_qp_min = 26; for( i_cqm = 0; i_cqm < 4; i_cqm++ ) { if( i_cqm == 0 ) for( i = 0; i < 6; i++ ) h->pps->scaling_list[i] = x264_cqm_flat16; else if( i_cqm == 1 ) for( i = 0; i < 6; i++ ) h->pps->scaling_list[i] = x264_cqm_jvt[i]; else { if( i_cqm == 2 ) for( i = 0; i < 64; i++ ) cqm_buf[i] = 10 + rand() % 246; else for( i = 0; i < 64; i++ ) cqm_buf[i] = 1; for( i = 0; i < 6; i++ ) h->pps->scaling_list[i] = cqm_buf; } x264_cqm_init( h ); x264_quant_init( h, 0, &qf_c ); x264_quant_init( h, cpu_ref, &qf_ref ); x264_quant_init( h, cpu_new, &qf_a );#define INIT_QUANT8() \ { \ static const int scale1d[8] = {32,31,24,31,32,31,24,31}; \ int x, y; \ for( y = 0; y < 8; y++ ) \ for( x = 0; x < 8; x++ ) \ { \ unsigned int scale = (255*scale1d[y]*scale1d[x])/16; \ dct1[y*8+x] = dct2[y*8+x] = (rand()%(2*scale+1))-scale; \ } \ }#define INIT_QUANT4() \ { \ static const int scale1d[4] = {4,6,4,6}; \ int x, y; \ for( y = 0; y < 4; y++ ) \ for( x = 0; x < 4; x++ ) \ { \ unsigned int scale = 255*scale1d[y]*scale1d[x]; \ dct1[y*4+x] = dct2[y*4+x] = (rand()%(2*scale+1))-scale; \ } \ }#define TEST_QUANT_DC( name, cqm ) \ if( qf_a.name != qf_ref.name ) \ { \ used_asms[0] = 1; \ for( qp = 51; qp > 0; qp-- ) \ { \ for( i = 0; i < 16; i++ ) \ dct1[i] = dct2[i] = (rand() & 0x1fff) - 0xfff; \ qf_c.name( (void*)dct1, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \ qf_a.name( (void*)dct2, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \ if( memcmp( dct1, dct2, 16*2 ) ) \ { \ oks[0] = 0; \ fprintf( stderr, #name "(cqm=%d): [FAILED]\n", i_cqm ); \ break; \ } \ } \ }#define TEST_QUANT( qname, block, w ) \ if( qf_a.qname != qf_ref.qname ) \ { \ used_asms[0] = 1; \ for( qp = 51; qp > 0; qp-- ) \ { \ INIT_QUANT##w() \ qf_c.qname( (void*)dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \ qf_a.qname( (void*)dct2, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \ if( memcmp( dct1, dct2, w*w*2 ) ) \ { \ oks[0] = 0; \ fprintf( stderr, #qname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \ break; \ } \ } \ } TEST_QUANT( quant_8x8, CQM_8IY, 8 ); TEST_QUANT( quant_8x8, CQM_8PY, 8 ); TEST_QUANT( quant_4x4, CQM_4IY, 4 ); TEST_QUANT( quant_4x4, CQM_4PY, 4 ); TEST_QUANT_DC( quant_4x4_dc, **h->quant4_mf[CQM_4IY] ); TEST_QUANT_DC( quant_2x2_dc, **h->quant4_mf[CQM_4IC] );#define TEST_DEQUANT( qname, dqname, block, w ) \ if( qf_a.dqname != qf_ref.dqname ) \ { \ used_asms[1] = 1; \ for( qp = 51; qp > 0; qp-- ) \ { \ INIT_QUANT##w() \ qf_c.qname( (void*)dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \ memcpy( dct2, dct1, w*w*2 ); \ qf_c.dqname( (void*)dct1, h->dequant##w##_mf[block], qp ); \ qf_a.dqname( (void*)dct2, h->dequant##w##_mf[block], qp ); \ if( memcmp( dct1, dct2, w*w*2 ) ) \ { \ oks[1] = 0; \ fprintf( stderr, #dqname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \ break; \ } \ } \ } TEST_DEQUANT( quant_8x8, dequant_8x8, CQM_8IY, 8 ); TEST_DEQUANT( quant_8x8, dequant_8x8, CQM_8PY, 8 ); TEST_DEQUANT( quant_4x4, dequant_4x4, CQM_4IY, 4 ); TEST_DEQUANT( quant_4x4, dequant_4x4, CQM_4PY, 4 ); } ok = oks[0]; used_asm = used_asms[0]; report( "quant :" ); ok = oks[1]; used_asm = used_asms[1]; report( "dequant :" ); return ret;}static int check_intra( int cpu_ref, int cpu_new ){ int ret = 0, ok = 1, used_asm = 0; int i; DECLARE_ALIGNED( uint8_t, edge[33], 8 ); struct { x264_predict_t predict_16x16[4+3]; x264_predict_t predict_8x8c[4+3]; x264_predict8x8_t predict_8x8[9+3]; x264_predict_t predict_4x4[9+3]; } ip_c, ip_ref, ip_a; x264_predict_16x16_init( 0, ip_c.predict_16x16 ); x264_predict_8x8c_init( 0, ip_c.predict_8x8c ); x264_predict_8x8_init( 0, ip_c.predict_8x8 ); x264_predict_4x4_init( 0, ip_c.predict_4x4 ); x264_predict_16x16_init( cpu_ref, ip_ref.predict_16x16 ); x264_predict_8x8c_init( cpu_ref, ip_ref.predict_8x8c ); x264_predict_8x8_init( cpu_ref, ip_ref.predict_8x8 ); x264_predict_4x4_init( cpu_ref, ip_ref.predict_4x4 ); x264_predict_16x16_init( cpu_new, ip_a.predict_16x16 ); x264_predict_8x8c_init( cpu_new, ip_a.predict_8x8c ); x264_predict_8x8_init( cpu_new, ip_a.predict_8x8 ); x264_predict_4x4_init( cpu_new, ip_a.predict_4x4 ); x264_predict_8x8_filter( buf1+48, edge, ALL_NEIGHBORS, ALL_NEIGHBORS );#define INTRA_TEST( name, dir, ... ) \ if( ip_a.name[dir] != ip_ref.name[dir] )\ { \ used_asm = 1; \ memcpy( buf3, buf1, 32*20 );\ memcpy( buf4, buf1, 32*20 );\ ip_c.name[dir]( buf3+48, ##__VA_ARGS__ );\ ip_a.name[dir]( buf4+48, ##__VA_ARGS__ );\ if( memcmp( buf3, buf4, 32*20 ) )\ {\ fprintf( stderr, #name "[%d] : [FAILED]\n", dir );\ ok = 0;\ int j,k;\ for(k=-1; k<16; k++)\ printf("%2x ", edge[16+k]);\ printf("\n");\ for(j=0; j<8; j++){\ printf("%2x ", edge[j]);\ for(k=0; k<8; k++)\ printf("%2x ", buf4[48+k+j*32]);\ printf("\n");\ }\ printf("\n");\ for(j=0; j<8; j++){\ printf(" ");\ for(k=0; k<8; k++)\ printf("%2x ", buf3[48+k+j*32]);\ printf("\n");\ }\ }\ } for( i = 0; i < 12; i++ ) INTRA_TEST( predict_4x4, i ); for( i = 0; i < 7; i++ ) INTRA_TEST( predict_8x8c, i ); for( i = 0; i < 7; i++ ) INTRA_TEST( predict_16x16, i ); for( i = 0; i < 12; i++ ) INTRA_TEST( predict_8x8, i, edge ); report( "intra pred :" ); return ret;}int check_all( int cpu_ref, int cpu_new ){ return check_pixel( cpu_ref, cpu_new ) + check_dct( cpu_ref, cpu_new ) + check_mc( cpu_ref, cpu_new ) + check_intra( cpu_ref, cpu_new ) + check_deblock( cpu_ref, cpu_new ) + check_quant( cpu_ref, cpu_new );}int main(int argc, char *argv[]){ int ret = 0; int cpu0 = 0, cpu1 = 0; int i; buf1 = x264_malloc( 1024 ); /* 32 x 32 */ buf2 = x264_malloc( 1024 ); buf3 = x264_malloc( 1024 ); buf4 = x264_malloc( 1024 ); buf5 = x264_malloc( 1024 ); i = ( argc > 1 ) ? atoi(argv[1]) : x264_mdate(); fprintf( stderr, "x264: using random seed %u\n", i ); srand( i ); for( i = 0; i < 1024; i++ ) { buf1[i] = rand() & 0xFF; buf2[i] = rand() & 0xFF; buf3[i] = buf4[i] = 0; }#ifdef HAVE_MMX fprintf( stderr, "x264: MMXEXT against C\n" ); cpu1 = X264_CPU_MMX | X264_CPU_MMXEXT; ret = check_all( 0, cpu1 ); if( x264_cpu_detect() & X264_CPU_SSE2 ) { fprintf( stderr, "\nx264: SSE2 against C\n" ); cpu0 = cpu1; cpu1 |= X264_CPU_SSE | X264_CPU_SSE2; ret |= check_all( cpu0, cpu1 ); if( x264_cpu_detect() & X264_CPU_SSSE3 ) { fprintf( stderr, "\nx264: SSSE3 against C\n" ); cpu0 = cpu1; cpu1 |= X264_CPU_SSE3 | X264_CPU_SSSE3; ret |= check_all( cpu0, cpu1 ); } }#elif ARCH_PPC if( x264_cpu_detect() & X264_CPU_ALTIVEC ) { fprintf( stderr, "x264: ALTIVEC against C\n" ); ret = check_all( 0, X264_CPU_ALTIVEC ); }#endif if( ret == 0 ) { fprintf( stderr, "x264: All tests passed Yeah :)\n" ); return 0; } fprintf( stderr, "x264: at least one test has failed. Go and fix that Right Now!\n" ); return -1;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -