📄 test_dsp.cpp
字号:
}; for(int Rounding=0; Rounding<=1; ++Rounding) { for(int q=0; q<=15; ++q) { for(SKL_CPU_FEATURE *Cpu=Cpu_List; *Cpu != SKL_CPU_LAST; Cpu++) { Skl_Init_Mb_DSP( &Dsp, *Cpu );#ifndef DONT_DO_16x16#ifndef DONT_DO_COPY tm.Reset(); for(n=0; n<CNT; ++n) { SKL_BZERO(Dst, sizeof(Dst)); QP_16x16(Dst, Src, q, Tmp, WIDTH, Dsp.Copy[Rounding]); } Dsp.Switch_Off(); tm.Stop(); for(Crc=0,j=0; j<17*WIDTH; ++j) Crc += (Dst[j]^j); printf( "%s: copy 16x16 quads=%d Rounding=%d \t%.3f usec/call \tCrc: %d\n", Dsp.Name, q, Rounding, tm.Get_Sec() * 1.e6f/n, Crc ); CHECKI(Crc, Crcs[Rounding][q][0]); PRINT(16)#endif#ifndef DONT_DO_ADD if (Rounding==0) { tm.Reset(); for(n=0; n<CNT; ++n) { for(j=0; j<17*WIDTH; ++j) Dst[j] = (j*53+22)&0xff; QP_16x16(Dst, Src, q, Tmp, WIDTH, Dsp.Add); } Dsp.Switch_Off(); tm.Stop(); for(Crc=0,j=0; j<17*WIDTH; ++j) Crc += (Dst[j]^j); printf( "%s: add 16x16 quads=%d Rounding=%d \t%.3f usec/call \tCrc: %d\n", Dsp.Name, q, Rounding, tm.Get_Sec() * 1.e6f/n, Crc ); CHECKI(Crc, Crcs[Rounding][q][1]); PRINT(16) }#endif#endif#ifndef DONT_DO_8x8#ifndef DONT_DO_COPY tm.Reset(); for(n=0; n<CNT; ++n) { SKL_BZERO(Dst, sizeof(Dst)); QP_8x8(Dst, Src, q, Tmp, WIDTH, Dsp.Copy[Rounding]); } Dsp.Switch_Off(); tm.Stop(); for(Crc=0,j=0; j<17*WIDTH; ++j) Crc += (Dst[j]^j); printf( "%s: copy 8x8 quads=%d Rounding=%d \t%.3f usec/call \tCrc: %d\n", Dsp.Name, q, Rounding, tm.Get_Sec() * 1.e6f/n, Crc ); CHECKI(Crc, Crcs[Rounding][q][2]); PRINT(8)#endif#ifndef DONT_DO_ADD if (Rounding==0) { tm.Reset(); for(n=0; n<CNT; ++n) { for(j=0; j<17*WIDTH; ++j) Dst[j] = (j*53+22)&0xff; QP_8x8(Dst, Src, q, Tmp, WIDTH, Dsp.Add); } Dsp.Switch_Off(); tm.Stop(); for(Crc=0,j=0; j<17*WIDTH; ++j) Crc += (Dst[j]^j); printf( "%s: add 8x8 quads=%d Rounding=%d \t%.3f usec/call \tCrc: %d\n", Dsp.Name, q, Rounding, tm.Get_Sec() * 1.e6f/n, Crc ); CHECKI(Crc, Crcs[Rounding][q][3]); PRINT(8) }#endif#endif } } }}END_FUNC#undef PRINT//////////////////////////////////////////////////////////TEST_FUNC(Test_GMC_DSP){ SKL_GMC_DSP Dsp; SKL_CPU_FEATURE Cpu_List[] = {// SKL_CPU_C, SKL_CPU_REF, SKL_CPU_LAST }; SKL_RANDOM Rnd; int i, j, n; const int CNT = 1000 * 50; const int WIDTH = 64; SKL_BYTE Src[(WIDTH+8+8)*WIDTH], Dst[WIDTH*WIDTH] = {0}; SKL_UINT32 Crc; int Pts[4][2]; SKL_PTIMER tm; for(i=0; i<(int)sizeof(Src); ++i) Src[i] = 0x57; for(j=0; j<WIDTH; j++) for(i=0; i<WIDTH; i++) Src[i+j*WIDTH] = Rnd.Get_Int(256); SKL_BYTE *S = Src + 8 +8*WIDTH; SKL_UINT32 Crcs[3][4][2][2] = { {{{8393568,8388819},{8393399,8388982}},{{8393067,8389288},{8394034,8388660}}, {{8394324,8389557},{8392501,8388292}},{{8391667,8388039},{8391581,8387852}}}, {{{8392223,8388635},{8392433,8388450}},{{8393452,8389064},{8392916,8388292}}, {{8392911,8388341},{8394027,8389546}},{{8393499,8388410},{8391674,8388356}}}, {{{8392360,8388039},{8394520,8389246}},{{8393808,8388341},{8392711,8389546}}, {{8393138,8388510},{8392848,8388274}},{{8393739,8388628},{8392100,8387880}}} }; int MV_Crcs[3][4][2][4] = { {{{0,0,0,0},{0,0,0,1}},{{0,0,1,1},{0,-1,1,-1}}, {{1,1,1,2},{1,0,1,0}},{{1,0,2,0},{1,0,2,0}}}, {{{-7,-4,-14,-8},{5,9,10,18}},{{5,10,10,21},{5,-4,10,-8}}, {{-3,-1,-6,-2},{1,5,2,10}},{{-7,-8,-14,-17},{5,-10,11,-20}}}, {{{1,8,2,17},{1,-9,2,-18}},{{1,-1,2,-2},{-7,-3,-14,-6}}, {{5,10,10,21},{-7,-9,-14,-19}},{{-3,8,-6,17},{5,-8,10,-17}}} }; for(int Nb_Pts=1; Nb_Pts<=3; ++Nb_Pts) { for(int Acc=0; Acc<=3; ++Acc) { for(int Rounding=0; Rounding<=1; ++Rounding) { Rnd.Set_Seed( Rounding + Acc*57 + Nb_Pts*39 + 3 ); for(int k=0; k<4; ++k) { Pts[k][0] = Rnd.Get_SInt(-2,2); Pts[k][1] = Rnd.Get_SInt(-2,2); } for(SKL_CPU_FEATURE *Cpu=Cpu_List; *Cpu != SKL_CPU_LAST; Cpu++) { int mv[2][2]; int x, y; Skl_Init_GMC_DSP( &Dsp, *Cpu ); Dsp.Setup(WIDTH,WIDTH, Pts, Nb_Pts, Acc); for(i=0; i<(int)sizeof(Dst); ++i) Dst[i] = 0; tm.Reset(); for(n=0; n<CNT; ++n) { for(y=0; y<WIDTH/16; y+=16) { for(x=0; x<WIDTH/16; x+=16) Dsp.Predict_16x16(&Dsp, Dst+x+y*WIDTH, S, WIDTH,x,y, Rounding); Dsp.Get_Average_MV(mv[0], x,y,0); Dsp.Get_Average_MV(mv[1], x,y,1); } } Dsp.Switch_Off(); tm.Stop(); for(Crc=0,j=0; j<(int)sizeof(Dst); ++j) Crc += (Dst[j]^j); printf( "%s: 16x16 Nb:%d Acc=%d Rounding=%d \t%.3f ms \tCrc: %d\n", Dsp.Name, Nb_Pts, Acc, Rounding, tm.Get_mSec(), Crc ); CHECKI(Crc, Crcs[Nb_Pts-1][Acc][Rounding][0]); CHECKI(mv[0][0], MV_Crcs[Nb_Pts-1][Acc][Rounding][0]); CHECKI(mv[0][1], MV_Crcs[Nb_Pts-1][Acc][Rounding][1]); CHECKI(mv[1][0], MV_Crcs[Nb_Pts-1][Acc][Rounding][2]); CHECKI(mv[1][1], MV_Crcs[Nb_Pts-1][Acc][Rounding][3]); for(i=0; i<(int)sizeof(Dst); ++i) Dst[i] = 0; tm.Reset(); for(n=0; n<CNT*2; ++n) { for(y=0; y<WIDTH/32; y+=8) { for(x=0; x<WIDTH/32; x+=8) Dsp.Predict_8x8(&Dsp, Dst+x+y*WIDTH, S, WIDTH/2, WIDTH,x,y,Rounding); } } Dsp.Switch_Off(); tm.Stop(); for(Crc=0,j=0; j<(int)sizeof(Dst); ++j) Crc += (Dst[j]^j); printf( "%s: 8x8 Nb:%d Acc=%d Rounding=%d \t%.3f ms \tCrc: %d\n", Dsp.Name, Nb_Pts, Acc, Rounding, tm.Get_mSec(), Crc ); CHECKI(Crc, Crcs[Nb_Pts-1][Acc][Rounding][1]); } } } }}END_FUNC//////////////////////////////////////////////////////////// Quant tests#define TEST_Q1(S, I, CNT, CALL, CRC0) \ Dsp.Init_Quantizer(Q,M, 0L, I); \ tm.Reset(); \ for(n=0; n<CNT; ++n) \ for(Crc=0, q=MIN_Q; q<=MAX_Q; q++) { \ Dsp.CALL(Dst, Src, Q, q, 1+q*2); \ for(i=0; i<8*8; ++i) Crc += Dst[i]^i; \ } \ Dsp.Switch_Off(); \ tm.Stop(); \ printf( "%s:\t%.3f s \tCrc: %d\n", \ S, tm.Get_Sec(), Crc); \ CHECKI(Crc, CRC0)#define TEST_Q2(S, I, CNT, CALL, CRC0) \ Dsp.Init_Quantizer(Q,M, 0L,I); \ tm.Reset(); \ for(n=0; n<CNT; ++n) \ for(Crc=0, q=MIN_Q; q<=MAX_Q; ++q) { \ Dsp.CALL(Dst, Src, Q, q); \ for(i=0; i<8*8; ++i) Crc += Dst[i]^i; \ } \ Dsp.Switch_Off(); \ tm.Stop(); \ printf( "%s:\t%.3f s \tCrc: %d\n", \ S, tm.Get_Sec(), Crc); \ CHECKI(Crc, CRC0)#define TEST_Q3(S, I, CNT, CALL, CRC0) \ Dsp.Init_Quantizer(Q,M, 0L,I); \ tm.Reset(); \ for(n=0; n<CNT; ++n) \ for(Crc=0, q=MIN_Q; q<=MAX_Q; ++q) { \ Dsp.CALL(Dst, Src, Q, q, 0xff); \ for(i=0; i<8*8; ++i) Crc += Dst[i]^i; \ } \ Dsp.Switch_Off(); \ tm.Stop(); \ printf( "%s:\t%.3f s \tCrc: %d\n", \ S, tm.Get_Sec(), Crc); \ CHECKI(Crc, CRC0)#define TEST_Q4(S, FUNC, SIZE, CNT) \ for(i=0; i<SIZE; ++i) Dst[i] = 0xdead; \ tm.Reset(); \ for(n=0; n<CNT; ++n) Dsp.FUNC(Dst); \ Dsp.Switch_Off(); \ tm.Stop(); \ for(i=0; i<SIZE; ++i) CHECK(Dst[i]==0); \ printf( "%s:\t%.3f s\n", S, tm.Get_Sec())TEST_FUNC(Test_Quant_DSP){ SKL_CPU_FEATURE Cpu_List[] = { SKL_CPU_C, // SKL_CPU_X86, SKL_CPU_MMX, SKL_CPU_SSE,// SKL_CPU_SSE2, SKL_CPU_REF, // SKL_CPU_ALT, SKL_CPU_LAST }; SKL_INT16 Src[8*8], Dst[8*8]; SKL_BYTE M[64]; int i, q, n; const int MAX_Q = 31; const int MIN_Q = 1; const int CNT1 = 7000; SKL_UINT32 Crc; SKL_PTIMER tm; SKL_CPU_FEATURE *Cpu; for(i=0; i<64; ++i) M[i] = (i+1)*255/64; // (i<8) ? 8 : i; for(i=0; i<8*8; ++i) { Src[i] = 1 + (i-32) * (i&6); Dst[i] = 0; } Src[0] = 129; SKL_INT16 Q[4][31][2][64]; SKL_QUANT_DSP Dsp; printf( "== Quant/Dequant H263 (CNT=%d) ==\n", CNT1 ); for(Cpu=Cpu_List; *Cpu != SKL_CPU_LAST; Cpu++) { Skl_Init_Quant_DSP( &Dsp, *Cpu, 0 ); printf( "--- Impl:%s ---\n", Dsp.Name); TEST_Q4(" -Zero-Coeffs-", Zero, 64, 150*CNT1); TEST_Q4(" -Zero16-Coeffs-", Zero16, 16, 16*150*CNT1); TEST_Q1("Quant -Intra-H263 ", 1, CNT1, Quant_Intra, 47876); TEST_Q2("Quant -Inter-H263 ", 0, CNT1, Quant_Inter, 47901); TEST_Q1("Dequant-Intra-H263 ", 1, CNT1, Dequant_Intra, 146631); TEST_Q3("Dequant-Inter-H263 ", 0, CNT1, Dequant_Inter, 145753); } printf( "== Quant/Dequant MPEG4 (CNT=%d) ==\n", CNT1 ); for(Cpu=Cpu_List; *Cpu != SKL_CPU_LAST; Cpu++) { Skl_Init_Quant_DSP( &Dsp, *Cpu, 1 ); printf( "--- Impl:%s ---\n", Dsp.Name);// C/MMX MPEG-Quantization is BROKEN for now ('coz of precision)// TEST_Q1("Quant -Intra-MPEG4", 1, CNT1, Quant_Intra, 54922);// TEST_Q2("Quant -Inter-MPEG4", 0, CNT1, Quant_Inter, 59144); TEST_Q1("Dequant-Intra-MPEG4", 1, CNT1, Dequant_Intra, 282711); TEST_Q3("Dequant-Inter-MPEG4", 0, CNT1, Dequant_Inter, 304735); } printf( "== Quant/Dequant MPEG2 (CNT=%d) ==\n", CNT1 ); for(Cpu=Cpu_List; *Cpu != SKL_CPU_LAST; Cpu++) { Skl_Init_Quant_DSP( &Dsp, *Cpu, 2 ); printf( "--- Impl:%s ---\n", Dsp.Name); // MMX hack does not perform mismatch control. => different CRC int Crc = (!strcmp(Dsp.Name,"MPEG2-Ref")) ? 354040 : 354022; TEST_Q1("Dequant-Intra-MPEG2", 1, CNT1, Dequant_Intra, Crc); TEST_Q3("Dequant-Inter-MPEG2", 0, CNT1, Dequant_Inter, 333603); }}END_FUNCTEST_FUNC(Test_Quant_Sparse){ SKL_QUANT_DSP Dsp; SKL_CPU_FEATURE *Cpu; const int CNT = 2000; SKL_PTIMER tm; SKL_CPU_FEATURE Cpu_List[] = { SKL_CPU_C, SKL_CPU_MMX, SKL_CPU_SSE, SKL_CPU_LAST }; SKL_INT16 Src0[256][64], Src1[64], Src2[64]; int i,j,k,n, Rows; SKL_RANDOM Rnd(7641452); printf( "=== Sparse_8x8 tests ===\n" ); for(Rows = 0x00; Rows<=0xff; ++Rows) { for(j=0; j<8; ++j) if (Rows & (1<<8)) for(i=0; i<8; ++i) Src0[Rows][j*8+i] = Rnd.Get_SInt(-2048, 2047); } for(Cpu=Cpu_List; *Cpu != SKL_CPU_LAST; Cpu++) { Skl_Init_Quant_DSP( &Dsp, *Cpu, 0 ); tm.Reset(); for(n=0; n<CNT; ++n) { for(Rows = 0x00; Rows<=0xff; ++Rows) { for(k=0; k<64; ++k) Src1[k] = Src2[k] = Src0[Rows][k]; Dsp.IDct(Src1); Dsp.IDct_Sparse(Src2); for(i=0; i<64; ++i) CHECKI(Src2[i], Src1[i]); } } Dsp.Switch_Off(); tm.Stop(); printf( "- %s - \tSparse : %.3f s", Dsp.Name, tm.Get_Sec() ); tm.Reset(); for(n=0; n<CNT; ++n) { for(Rows = 0x00; Rows<=0xff; ++Rows) { SKL_BYTE Dst[64]; for(k=0; k<64; ++k) Src1[k] = Src2[k] = Src0[Rows][k];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -