📄 checkasm.c.svn-base
字号:
memcpy( buf3, buf1, 1024 ); \ memcpy( buf4, buf1, 1024 ); \ if( db_a.name != db_ref.name ) \ { \ used_asm = 1; \ db_c.name( &buf3[8*32], 32, alphas[i], betas[i], ##__VA_ARGS__ ); \ db_a.name( &buf4[8*32], 32, alphas[i], betas[i], ##__VA_ARGS__ ); \ if( memcmp( buf3, buf4, 1024 ) ) \ { \ ok = 0; \ fprintf( stderr, #name "(a=%d, b=%d): [FAILED]\n", alphas[i], betas[i] ); \ break; \ } \ } \ } TEST_DEBLOCK( deblock_h_luma, tcs[i] ); TEST_DEBLOCK( deblock_v_luma, tcs[i] ); TEST_DEBLOCK( deblock_h_chroma, tcs[i] ); TEST_DEBLOCK( deblock_v_chroma, tcs[i] ); TEST_DEBLOCK( deblock_h_luma_intra ); TEST_DEBLOCK( deblock_v_luma_intra ); TEST_DEBLOCK( deblock_h_chroma_intra ); TEST_DEBLOCK( deblock_v_chroma_intra ); report( "deblock :" ); return ret;}static int check_quant( int cpu_ref, int cpu_new ){ x264_quant_function_t qf_c; x264_quant_function_t qf_ref; x264_quant_function_t qf_a; int16_t dct1[64], dct2[64]; uint8_t cqm_buf[64]; int ret = 0, ok, used_asm; int oks[2] = {1,1}, used_asms[2] = {0,0}; int i, i_cqm; x264_t h_buf; x264_t *h = &h_buf; h->pps = h->pps_array; x264_param_default( &h->param ); for( i_cqm = 0; i_cqm < 4; i_cqm++ ) { if( i_cqm == 0 ) for( i = 0; i < 6; i++ ) h->pps->scaling_list[i] = x264_cqm_flat16; else if( i_cqm == 1 ) for( i = 0; i < 6; i++ ) h->pps->scaling_list[i] = x264_cqm_jvt[i]; else { if( i_cqm == 2 ) for( i = 0; i < 64; i++ ) cqm_buf[i] = 10 + rand() % 246; else for( i = 0; i < 64; i++ ) cqm_buf[i] = 1; for( i = 0; i < 6; i++ ) h->pps->scaling_list[i] = cqm_buf; } x264_cqm_init( h ); x264_quant_init( h, 0, &qf_c ); x264_quant_init( h, cpu_ref, &qf_ref ); x264_quant_init( h, cpu_new, &qf_a );#define INIT_QUANT8() \ { \ static const int scale1d[8] = {32,31,24,31,32,31,24,31}; \ int x, y; \ for( y = 0; y < 8; y++ ) \ for( x = 0; x < 8; x++ ) \ { \ unsigned int scale = (255*scale1d[y]*scale1d[x])/16; \ dct1[y*8+x] = dct2[y*8+x] = (rand()%(2*scale+1))-scale; \ } \ }#define INIT_QUANT4() \ { \ static const int scale1d[4] = {4,6,4,6}; \ int x, y; \ for( y = 0; y < 4; y++ ) \ for( x = 0; x < 4; x++ ) \ { \ unsigned int scale = 255*scale1d[y]*scale1d[x]; \ dct1[y*4+x] = dct2[y*4+x] = (rand()%(2*scale+1))-scale; \ } \ }#define TEST_QUANT( name, cqm ) \ if( qf_a.name != qf_ref.name ) \ { \ used_asms[0] = 1; \ for( i = 0; i < 64; i++ ) \ dct1[i] = dct2[i] = (rand() & 0x1fff) - 0xfff; \ qf_c.name( (void*)dct1, cqm, 20, (1<<20)/6 ); \ qf_a.name( (void*)dct2, cqm, 20, (1<<20)/6 ); \ if( memcmp( dct1, dct2, 64*2 ) ) \ { \ oks[0] = 0; \ fprintf( stderr, #name "(cqm=%d): [FAILED]\n", i_cqm ); \ } \ }#define TEST_QUANT8( qname, cqm, shift, divider ) \ if( qf_a.qname != qf_ref.qname ) \ { \ int qp; \ used_asms[0] = 1; \ for( qp = 51; qp > 0; qp-- ) \ { \ INIT_QUANT8() \ qf_c.qname( (void*)dct1, cqm[qp%6], shift+qp/6, (1<<(shift+qp/6))/divider ); \ qf_a.qname( (void*)dct2, cqm[qp%6], shift+qp/6, (1<<(shift+qp/6))/divider ); \ if( memcmp( dct1, dct2, 64*2 ) ) \ { \ oks[0] = 0; \ fprintf( stderr, #qname "(qp=%d, cqm=%d, intra=%d): [FAILED]\n", qp, i_cqm, divider==3 ); \ break; \ } \ } \ }#define TEST_QUANT4( qname, cqm, shift, divider ) \ if( qf_a.qname != qf_ref.qname ) \ { \ int qp; \ used_asms[0] = 1; \ for( qp = 51; qp > 0; qp-- ) \ { \ INIT_QUANT4() \ qf_c.qname( (void*)dct1, cqm[qp%6], shift+qp/6, (1<<(shift+qp/6))/divider ); \ qf_a.qname( (void*)dct2, cqm[qp%6], shift+qp/6, (1<<(shift+qp/6))/divider ); \ if( memcmp( dct1, dct2, 16*2 ) ) \ { \ oks[0] = 0; \ fprintf( stderr, #qname "(qp=%d, cqm=%d, intra=%d): [FAILED]\n", qp, i_cqm, divider==3 ); \ break; \ } \ } \ } TEST_QUANT8( quant_8x8_core, h->quant8_mf[CQM_8IY], 16, 3 ); TEST_QUANT8( quant_8x8_core, h->quant8_mf[CQM_8PY], 16, 6 ); TEST_QUANT4( quant_4x4_core, h->quant4_mf[CQM_4IY], 15, 3 ); TEST_QUANT4( quant_4x4_core, h->quant4_mf[CQM_4PY], 15, 6 ); TEST_QUANT( quant_4x4_dc_core, ***h->quant4_mf[CQM_4IY] ); TEST_QUANT( quant_2x2_dc_core, ***h->quant4_mf[CQM_4IC] );#define TEST_DEQUANT8( qname, dqname, cqm, dqm, shift, divider ) \ if( qf_a.dqname != qf_ref.dqname ) \ { \ int qp; \ used_asms[1] = 1; \ for( qp = 51; qp > 0; qp-- ) \ { \ INIT_QUANT8() \ qf_c.qname( (void*)dct1, cqm[qp%6], shift+qp/6, (1<<(shift+qp/6))/divider ); \ memcpy( dct2, dct1, 64*2 ); \ qf_c.dqname( (void*)dct1, dqm, qp ); \ qf_a.dqname( (void*)dct2, dqm, qp ); \ if( memcmp( dct1, dct2, 64*2 ) ) \ { \ oks[1] = 0; \ fprintf( stderr, #dqname "(qp=%d, cqm=%d, intra=%d): [FAILED]\n", qp, i_cqm, divider==3 ); \ break; \ } \ } \ }#define TEST_DEQUANT4( qname, dqname, cqm, dqm, shift, divider ) \ if( qf_a.dqname != qf_ref.dqname ) \ { \ int qp; \ used_asms[1] = 1; \ for( qp = 51; qp > 0; qp-- ) \ { \ INIT_QUANT4() \ qf_c.qname( (void*)dct1, cqm[qp%6], shift+qp/6, (1<<(shift+qp/6))/divider ); \ memcpy( dct2, dct1, 16*2 ); \ qf_c.dqname( (void*)dct1, dqm, qp ); \ qf_a.dqname( (void*)dct2, dqm, qp ); \ if( memcmp( dct1, dct2, 16*2 ) ) \ { \ oks[1] = 0; \ fprintf( stderr, #dqname "(qp=%d, cqm=%d, intra=%d): [FAILED]\n", qp, i_cqm, divider==3 ); \ break; \ } \ } \ } TEST_DEQUANT8( quant_8x8_core, dequant_8x8, h->quant8_mf[CQM_8IY], h->dequant8_mf[CQM_8IY], 16, 3 ); TEST_DEQUANT8( quant_8x8_core, dequant_8x8, h->quant8_mf[CQM_8PY], h->dequant8_mf[CQM_8PY], 16, 6 ); TEST_DEQUANT4( quant_4x4_core, dequant_4x4, h->quant4_mf[CQM_4IY], h->dequant4_mf[CQM_4IY], 15, 3 ); TEST_DEQUANT4( quant_4x4_core, dequant_4x4, h->quant4_mf[CQM_4PY], h->dequant4_mf[CQM_4PY], 15, 6 ); } ok = oks[0]; used_asm = used_asms[0]; report( "quant :" ); ok = oks[1]; used_asm = used_asms[1]; report( "dequant :" ); return ret;}static int check_intra( int cpu_ref, int cpu_new ){ int ret = 0, ok = 1, used_asm = 0; int i; struct { x264_predict_t predict_16x16[4+3]; x264_predict_t predict_8x8c[4+3]; x264_predict8x8_t predict_8x8[9+3]; x264_predict_t predict_4x4[9+3]; } ip_c, ip_ref, ip_a; x264_predict_16x16_init( 0, ip_c.predict_16x16 ); x264_predict_8x8c_init( 0, ip_c.predict_8x8c ); x264_predict_8x8_init( 0, ip_c.predict_8x8 ); x264_predict_4x4_init( 0, ip_c.predict_4x4 ); x264_predict_16x16_init( cpu_ref, ip_ref.predict_16x16 ); x264_predict_8x8c_init( cpu_ref, ip_ref.predict_8x8c ); x264_predict_8x8_init( cpu_ref, ip_ref.predict_8x8 ); x264_predict_4x4_init( cpu_ref, ip_ref.predict_4x4 ); x264_predict_16x16_init( cpu_new, ip_a.predict_16x16 ); x264_predict_8x8c_init( cpu_new, ip_a.predict_8x8c ); x264_predict_8x8_init( cpu_new, ip_a.predict_8x8 ); x264_predict_4x4_init( cpu_new, ip_a.predict_4x4 );#define INTRA_TEST( name, dir, ... ) \ if( ip_a.name[dir] != ip_ref.name[dir] )\ { \ used_asm = 1; \ memcpy( buf3, buf1, 32*20 );\ memcpy( buf4, buf1, 32*20 );\ ip_c.name[dir]( buf3+48, 32, ##__VA_ARGS__ );\ ip_a.name[dir]( buf4+48, 32, ##__VA_ARGS__ );\ if( memcmp( buf3, buf4, 32*20 ) )\ {\ fprintf( stderr, #name "[%d] : [FAILED]\n", dir );\ ok = 0;\ }\ } for( i = 0; i < 12; i++ ) INTRA_TEST( predict_4x4, i ); for( i = 0; i < 7; i++ ) INTRA_TEST( predict_8x8c, i ); for( i = 0; i < 7; i++ ) INTRA_TEST( predict_16x16, i ); for( i = 0; i < 12; i++ ) INTRA_TEST( predict_8x8, i, 0xf ); INTRA_TEST( predict_8x8, I_PRED_8x8_V, MB_LEFT|MB_TOP ); INTRA_TEST( predict_8x8, I_PRED_8x8_DC, MB_LEFT|MB_TOP ); INTRA_TEST( predict_8x8, I_PRED_8x8_V, MB_LEFT|MB_TOP|MB_TOPLEFT ); INTRA_TEST( predict_8x8, I_PRED_8x8_DC, MB_LEFT|MB_TOP|MB_TOPLEFT ); INTRA_TEST( predict_8x8, I_PRED_8x8_V, MB_LEFT|MB_TOP|MB_TOPRIGHT ); INTRA_TEST( predict_8x8, I_PRED_8x8_DC, MB_LEFT|MB_TOP|MB_TOPRIGHT ); report( "intra pred :" ); return ret;}int check_all( int cpu_ref, int cpu_new ){ return check_pixel( cpu_ref, cpu_new ) + check_dct( cpu_ref, cpu_new ) + check_mc( cpu_ref, cpu_new ) + check_intra( cpu_ref, cpu_new ) + check_deblock( cpu_ref, cpu_new ) + check_quant( cpu_ref, cpu_new );}int main(int argc, char *argv[]){ int ret = 0; int i; buf1 = x264_malloc( 1024 ); /* 32 x 32 */ buf2 = x264_malloc( 1024 ); buf3 = x264_malloc( 1024 ); buf4 = x264_malloc( 1024 ); buf5 = x264_malloc( 1024 ); i = ( argc > 1 ) ? atoi(argv[1]) : x264_mdate(); fprintf( stderr, "x264: using random seed %u\n", i ); srand( i ); for( i = 0; i < 1024; i++ ) { buf1[i] = rand() & 0xFF; buf2[i] = rand() & 0xFF; buf3[i] = buf4[i] = 0; }#ifdef HAVE_MMXEXT fprintf( stderr, "x264: MMXEXT against C\n" ); ret = check_all( 0, X264_CPU_MMX | X264_CPU_MMXEXT );#ifdef HAVE_SSE2 if( x264_cpu_detect() & X264_CPU_SSE2 ) { fprintf( stderr, "\nx264: SSE2 against C\n" ); ret |= check_all( X264_CPU_MMX | X264_CPU_MMXEXT, X264_CPU_MMX | X264_CPU_MMXEXT | X264_CPU_SSE | X264_CPU_SSE2 ); }#endif#elif ARCH_PPC fprintf( stderr, "x264: ALTIVEC against C\n" ); ret = check_all( 0, X264_CPU_ALTIVEC );#endif if( ret == 0 ) { fprintf( stderr, "x264: All tests passed Yeah :)\n" ); return 0; } fprintf( stderr, "x264: at least one test has failed. Go and fix that Right Now!\n" ); return -1;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -