📄 test_dsp.cpp
字号:
Dsp.Name, tm.Get_Sec(), Crc, Dsp.CALL); \ /*PRINT16x16(Dst);*/ \ CHECKI( Crc, CRC0 ); \ }#define TEST82(S, CNT, CALL, CRC0) \ printf("== %s ==\n", S); \ for(Cpu=Cpu_List; *Cpu != SKL_CPU_LAST; Cpu++) { \ Skl_Init_Mb_DSP( &Dsp, *Cpu ); \ tm.Reset(); \ for(n=0; n<CNT*10; ++n) { \ for(i=0; i<16*WIDTH; ++i) Dst[i] = (i*37+23)&0xff; \ for(i=0; i<16; i+=2) \ Dsp.HFilter_31(Dst+i*WIDTH,Dst+(i+1)*WIDTH,WIDTH/8); \ for(i=0; i<WIDTH; i+=2) \ Dsp.VFilter_31(Dst+i,Dst+1+i,WIDTH,2); \ } \ Dsp.Switch_Off(); \ tm.Stop(); \ for(Crc=0, j=0; j<16; ++j) for(i=0; i<16; ++i) \ Crc += Dst[i+j*WIDTH]^i^j; \ printf( "%s:\t%.3f s \tCrc: %d \tFunc:%p\n", \ Dsp.Name, tm.Get_Sec(), Crc, Dsp.CALL); \ /*PRINT16x16(Dst);*/ \ CHECKI( Crc, CRC0 ); \ }#define TEST9(S, CNT, CALL, CRC0) \ printf("== %s ==\n", S); \ for(Cpu=Cpu_List; *Cpu != SKL_CPU_LAST; Cpu++) { \ Skl_Init_Mb_DSP( &Dsp, *Cpu ); \ tm.Reset(); \ for(n=0; n<CNT*200; ++n) { \ for(i=0; i<64; ++i) Dst16[i] = ((i*73+31)&0x3ff)-512; \ Dsp.CALL(Dst16, Src+1+WIDTH, WIDTH); \ /*if (n==0) { PRINT_NxN(Src,18,18,WIDTH);PRINT_NxN(Dst16,8,8,8); }*/\ } \ Dsp.Switch_Off(); \ tm.Stop(); \ for(Crc=0, i=0; i<64; ++i) Crc += Dst16[i]^i; \ printf( "%s:\t%.3f s \tCrc: %d \tFunc:%p\n", \ Dsp.Name, tm.Get_Sec(), Crc, Dsp.CALL); \ CHECKI( Crc, CRC0 ); \ }#define TEST10(S, CNT, CALL, SAD0, SAD1, SAD2) \ printf("== %s ==\n", S); \ for(Cpu=Cpu_List; *Cpu != SKL_CPU_LAST; Cpu++) { \ Skl_Init_Mb_DSP( &Dsp, *Cpu ); \ SKL_UINT32 Sad[3]; \ for(i=0; i<16*WIDTH; ++i) Dst[i] = (i*37+23)&0xff; \ tm.Reset(); \ for(n=0; n<CNT*150; ++n) \ Dsp.CALL(Dst, Src, WIDTH, Sad); \ Dsp.Switch_Off(); \ tm.Stop(); \ printf( "%s: \t%.3f s Sad:H:0x%x V:0x%x HV:0x%x Func:%p\n", \ Dsp.Name, tm.Get_Sec(), Sad[0], Sad[1], Sad[2], Dsp.CALL); \ CHECKUI( Sad[0], SAD0 ); \ CHECKUI( Sad[1], SAD1 ); \ CHECKUI( Sad[2], SAD2 ); \ }TEST_FUNC(Test_Mb_DSP){ SKL_CPU_FEATURE Cpu_List[] = { SKL_CPU_C, SKL_CPU_X86, SKL_CPU_MMX, SKL_CPU_SSE, SKL_CPU_LAST }; SKL_MB_DSP Dsp; SKL_RANDOM Rnd; int i, j, n; const int CNT1 = 150; const int WIDTH = 128; SKL_BYTE Src[WIDTH*WIDTH], Dst[16*WIDTH]; SKL_INT16 Src16[WIDTH*WIDTH]; SKL_INT16 Src6[6*64], Dst16[2*64]; SKL_UINT32 Crc; SKL_PTIMER tm; SKL_CPU_FEATURE *Cpu; for(i=0; i<WIDTH*WIDTH; ++i) Src[i] = (SKL_BYTE)Rnd.Get_Int(256); for(i=0; i<WIDTH*WIDTH; ++i) Src16[i] = (SKL_INT16)Rnd.Get_Int(256); for(i=0; i<6*64; ++i) Src6[i] = (SKL_INT16)(Rnd.Get_Int(512)-128); TEST2("Add-FF-16 ", CNT1, Add->HP_16x8[0], 16796); TEST2("Add-FH-16 Rnd0 ", CNT1, Add->HP_16x8[1], 16836); TEST2("Add-HF-16 Rnd0 ", CNT1, Add->HP_16x8[2], 16720); TEST2("Add-HH-16 Rnd0 ", CNT1, Add->HP_16x8[3], 16697); TEST2("Add-FF-8 ", CNT1, Add->HP_8x8 [0], 16164); TEST2("Add-FH-8 Rnd0 ", CNT1, Add->HP_8x8 [1], 16289); TEST2("Add-HF-8 Rnd0 ", CNT1, Add->HP_8x8 [2], 16183); TEST2("Add-HH-8 Rnd0 ", CNT1, Add->HP_8x8 [3], 16268); TEST2("Add-FF-4 ", CNT1, Add->HP_8x4 [0], 15760); TEST2("Add-FH-4 Rnd0 ", CNT1, Add->HP_8x4 [1], 15715); TEST2("Add-HF-4 Rnd0 ", CNT1, Add->HP_8x4 [2], 15797); TEST2("Add-HH-4 Rnd0 ", CNT1, Add->HP_8x4 [3], 15765); TEST2("Copy-FF-16 ", CNT1, Copy[1]->HP_16x8[0], 16726); TEST2("Copy-FF-16 ", CNT1, Copy[0]->HP_16x8[0], 16726); TEST2("Copy-FH-16 Rnd1 ", CNT1, Copy[1]->HP_16x8[1], 16709); TEST2("Copy-FH-16 Rnd0 ", CNT1, Copy[0]->HP_16x8[1], 16780); TEST2("Copy-HF-16 Rnd1 ", CNT1, Copy[1]->HP_16x8[2], 16583); TEST2("Copy-HF-16 Rnd0 ", CNT1, Copy[0]->HP_16x8[2], 16653); TEST2("Copy-HH-16 Rnd1 ", CNT1, Copy[1]->HP_16x8[3], 16607); TEST2("Copy-HH-16 Rnd0 ", CNT1, Copy[0]->HP_16x8[3], 16643); TEST2("Copy-FF-8 ", CNT1, Copy[1]->HP_8x8 [0], 16199); TEST2("Copy-FF-8 ", CNT1, Copy[0]->HP_8x8 [0], 16199); TEST2("Copy-FH-8 Rnd1 ", CNT1, Copy[1]->HP_8x8 [1], 16381); TEST2("Copy-FH-8 Rnd0 ", CNT1, Copy[0]->HP_8x8 [1], 16412); TEST2("Copy-HF-8 Rnd1 ", CNT1, Copy[1]->HP_8x8 [2], 16174); TEST2("Copy-HF-8 Rnd0 ", CNT1, Copy[0]->HP_8x8 [2], 16214); TEST2("Copy-HH-8 Rnd1 ", CNT1, Copy[1]->HP_8x8 [3], 16351); TEST2("Copy-HH-8 Rnd0 ", CNT1, Copy[0]->HP_8x8 [3], 16370); TEST2("Copy-FF-4 ", CNT1, Copy[1]->HP_8x4 [0], 15714); TEST2("Copy-FF-4 ", CNT1, Copy[0]->HP_8x4 [0], 15714); TEST2("Copy-FH-4 Rnd1 ", CNT1, Copy[1]->HP_8x4 [1], 15631); TEST2("Copy-FH-4 Rnd0 ", CNT1, Copy[0]->HP_8x4 [1], 15643); TEST2("Copy-HF-4 Rnd1 ", CNT1, Copy[1]->HP_8x4 [2], 15772); TEST2("Copy-HF-4 Rnd0 ", CNT1, Copy[0]->HP_8x4 [2], 15791); TEST2("Copy-HH-4 Rnd1 ", CNT1, Copy[1]->HP_8x4 [3], 15723); TEST2("Copy-HH-4 Rnd0 ", CNT1, Copy[0]->HP_8x4 [3], 15733); TEST4("Copy-16x8_8To16 ", CNT1, Copy_16x8_8To16, 16726); TEST4("Copy-8x8_8To16 ", CNT1, Copy_8x8_8To16, 14670); TEST5("Diff-16x8_8To16 ", CNT1, Diff_16x8_8To16, 770213); TEST5("Diff-8x8_8To16 ", CNT1, Diff_8x8_8To16, 1659376); TEST6("Diff-16x8_88To16", CNT1, Diff_16x8_88To16, -1011); TEST6("Diff-8x8_88To16 ", CNT1, Diff_8x8_88To16, 1273630); TEST71("SAD-16x7-Frame ", CNT1, SAD_16x7_Frame, 1792); TEST71("SAD-16x7-Field ", CNT1, SAD_16x7_Field, 5632); TEST72("Reorder-Frame ", CNT1, Reorder_Frame_16x16, 23552); TEST81("Copy-Up8x8_16To8", CNT1*10, Copy_Upsampled_8x8_16To8, 57216, 255); TEST81("Add-Up8x8_16To8 ", CNT1*10, Add_Upsampled_8x8_16To8, 24897, 0); TEST82("H/V Filter_31 ", CNT1*10, HFilter_31, 31872); TEST9("DownFilt_31 ", CNT1*10, Filter_18x18_To_8x8, 8326); TEST9("DownFilt_Diff_31 ", CNT1*10, Filter_Diff_18x18_To_8x8, -10774); TEST10("SAD-16-HP-Rnd0 ", CNT1* 4, Copy[0]->SAD_HP_16x16, 0x4b2e, 0x4a64, 0x46d6); TEST10("SAD-16-HP-Rnd1 ", CNT1* 4, Copy[1]->SAD_HP_16x16, 0x4b18, 0x4a58, 0x46d7); TEST10("SAD- 8-HP-Rnd0 ", CNT1*16, Copy[0]->SAD_HP_8x8 , 0x1497, 0x12a8, 0x1118); TEST10("SAD- 8-HP-Rnd1 ", CNT1*16, Copy[1]->SAD_HP_8x8 , 0x148e, 0x12a3, 0x111a);}END_FUNC////////////////////////////////////////////////////////// // Quarter-pixel teststaticvoid QP_16x16(SKL_BYTE *Dst, const SKL_BYTE *Src, int Quads, SKL_BYTE *YTmp, int BpS, const SKL_MB_FUNCS * const Ops){ switch(Quads) { case 0: Ops->HP_16x8[0](Dst, Src, BpS); Ops->HP_16x8[0](Dst+8*BpS, Src+8*BpS, BpS); break; case 1: Ops->H_Pass_Avrg(Dst, Src, 16, BpS); break; case 2: Ops->H_Pass(Dst, Src, 16, BpS); break; case 3: Ops->H_Pass_Avrg_Up(Dst, Src, 16, BpS); break; case 4: Ops->V_Pass_Avrg(Dst, Src, 16, BpS); break; case 5: Ops->H_Pass_Avrg(YTmp, Src, 17, BpS); Ops->V_Pass_Avrg(Dst, YTmp, 16, BpS); break; case 6: Ops->H_Pass(YTmp, Src, 17, BpS); Ops->V_Pass_Avrg(Dst, YTmp, 16, BpS); break; case 7: Ops->H_Pass_Avrg_Up(YTmp, Src, 17, BpS); Ops->V_Pass_Avrg(Dst, YTmp, 16, BpS); break; case 8: Ops->V_Pass(Dst, Src, 16, BpS); break; case 9: Ops->H_Pass_Avrg(YTmp, Src, 17, BpS); Ops->V_Pass(Dst, YTmp, 16, BpS); break; case 10: Ops->H_Pass( YTmp, Src, 17, BpS); Ops->V_Pass(Dst, YTmp, 16, BpS); break; case 11: Ops->H_Pass_Avrg_Up(YTmp, Src, 17, BpS); Ops->V_Pass(Dst, YTmp, 16, BpS); break; case 12: Ops->V_Pass_Avrg_Up(Dst, Src, 16, BpS); break; case 13: Ops->H_Pass_Avrg(YTmp, Src, 17, BpS); Ops->V_Pass_Avrg_Up(Dst, YTmp, 16, BpS); break; case 14: Ops->H_Pass(YTmp, Src,17, BpS); Ops->V_Pass_Avrg_Up( Dst, YTmp, 16, BpS); break; case 15: Ops->H_Pass_Avrg_Up(YTmp, Src, 17, BpS); Ops->V_Pass_Avrg_Up(Dst, YTmp, 16, BpS); break; }}staticvoid QP_8x8(SKL_BYTE *Dst, const SKL_BYTE *Src, int Quads, SKL_BYTE *YTmp, int BpS, const SKL_MB_FUNCS * const Ops){ switch(Quads) { case 0: Ops->HP_8x8[0](Dst, Src, BpS); break; case 1: Ops->H_Pass_Avrg_8(Dst, Src, 8, BpS); break; case 2: Ops->H_Pass_8(Dst, Src, 8, BpS); break; case 3: Ops->H_Pass_Avrg_Up_8(Dst, Src, 8, BpS); break; case 4: Ops->V_Pass_Avrg_8(Dst, Src, 8, BpS); break; case 5: Ops->H_Pass_Avrg_8(YTmp, Src, 9, BpS); Ops->V_Pass_Avrg_8(Dst, YTmp, 8, BpS); break; case 6: Ops->H_Pass_8(YTmp, Src, 9, BpS); Ops->V_Pass_Avrg_8(Dst, YTmp, 8, BpS); break; case 7: Ops->H_Pass_Avrg_Up_8(YTmp, Src, 9, BpS); Ops->V_Pass_Avrg_8(Dst, YTmp, 8, BpS); break; case 8: Ops->V_Pass_8(Dst, Src, 8, BpS); break; case 9: Ops->H_Pass_Avrg_8(YTmp, Src, 9, BpS); Ops->V_Pass_8(Dst, YTmp, 8, BpS); break; case 10: Ops->H_Pass_8(YTmp, Src, 9, BpS); Ops->V_Pass_8(Dst, YTmp, 8, BpS); break; case 11: Ops->H_Pass_Avrg_Up_8(YTmp, Src, 9, BpS); Ops->V_Pass_8(Dst, YTmp, 8, BpS); break; case 12: Ops->V_Pass_Avrg_Up_8(Dst, Src, 8, BpS); break; case 13: Ops->H_Pass_Avrg_8(YTmp, Src, 9, BpS); Ops->V_Pass_Avrg_Up_8(Dst, YTmp, 8, BpS); break; case 14: Ops->H_Pass_8(YTmp, Src, 9, BpS); Ops->V_Pass_Avrg_Up_8( Dst, YTmp, 8, BpS); break; case 15: Ops->H_Pass_Avrg_Up_8(YTmp, Src, 9, BpS); Ops->V_Pass_Avrg_Up_8(Dst, YTmp, 8, BpS); break; }}#if 0#define PRINT(S) \ if (q==0) { \ printf( " == Src ==\n" ); \ for(j=0; j<=(S); ++j) \ { for(i=0; i<=(S); ++i) printf( "%3d ", Src[i+j*WIDTH] ); printf( "\n" ); } \ } \ printf( " == Dst(%d) ==\n", q ); \ for(j=0; j<=(S); ++j) \ { for(i=0; i<=(S); ++i) printf( "%3d ", Dst[i+j*WIDTH] ); printf( "\n" ); }#else#define PRINT(S)#endifTEST_FUNC(Test_Mb_QP_DSP){//#define DONT_DO_16x16//#define DONT_DO_8x8//#define DONT_DO_COPY//#define DONT_DO_ADD SKL_CPU_FEATURE Cpu_List[] = { SKL_CPU_C, SKL_CPU_MMX, SKL_CPU_REF, SKL_CPU_LAST }; SKL_MB_DSP Dsp; SKL_RANDOM Rnd; int i, j, n; const int CNT = 1000 * 100; const int WIDTH = 64; SKL_BYTE Src[17*WIDTH], Dst[17*WIDTH]; SKL_BYTE Tmp[16*17]; SKL_UINT32 Crc; SKL_PTIMER tm; for(i=0; i<17*WIDTH; ++i) Src[i] = (SKL_BYTE)Rnd.Get_Int(256); static int Crcs[2][16][4] = { // [rnd=0/1][q=0..15][copy/add - 16x16/8x8] { {598249, 598172, 593475, 598728}, {596752, 598425, 593556, 598704}, {597568, 599293, 594172, 598697}, {598796, 598076, 594210, 598099}, {598668, 598868, 593975, 599170}, {592954, 599059, 592947, 598786}, {595466, 598931, 592671, 599157}, {594275, 598930, 593278, 598851}, {598323, 597485, 593909, 598789}, {593878, 598707, 592971, 598567}, {594254, 598445, 593021, 599224}, {594198, 598567, 592933, 598858}, {597465, 596799, 593393, 598755}, {593883, 598291, 592874, 599033}, {594904, 598322, 593010, 599033}, {593220, 599312, 592937, 598731} }, { {598249, -1, 593475, -1}, {596718, -1, 593523, -1}, {597565, -1, 594171, -1}, {598540, -1, 594201, -1}, {598620, -1, 593953, -1}, {593800, -1, 593132, -1}, {595183, -1, 592621, -1}, {594211, -1, 593187, -1}, {598325, -1, 593909, -1}, {593806, -1, 593091, -1}, {594260, -1, 593014, -1}, {594137, -1, 592899, -1}, {597898, -1, 593349, -1}, {593804, -1, 592890, -1}, {594704, -1, 592963, -1}, {593122, -1, 592884, -1} }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -