📄 skl_mb_c.cpp
字号:
ADD(Dst[0], Src[0]); for(x=0; x<7; ++x) Filter_Add_31(Dst+2*x+1, Dst+2*x+2, Src+x, Src+x+1); ADD(Dst[15], Src[7]); Dst += BpS; for(y=0; y<7; ++y) { SKL_BYTE *const Dst2 = Dst + BpS; Filter_Add_31(Dst, Dst2, Src, Src+8); for(x=0; x<7; ++x) Filter_Add_9331(Dst+2*x+1, Dst2+2*x+1, Src+x, Src+x+8); Filter_Add_31(Dst+15, Dst2+15, Src+7, Src+7+8); Src += 8; Dst += 2*BpS; } ADD(Dst[0], Src[0]); for(x=0; x<7; ++x) Filter_Add_31(Dst+2*x+1, Dst+2*x+2, Src+x, Src+x+1); ADD(Dst[15], Src[7]);}void Skl_HFilter_31_C(SKL_BYTE *Src1, SKL_BYTE *Src2, int Nb_Blks){ Nb_Blks *= 8; while(Nb_Blks-->0) { SKL_BYTE a = ( 3*Src1[0] + 1*Src2[0] + 2 ) >> 2; SKL_BYTE b = ( 1*Src1[0] + 3*Src2[0] + 2 ) >> 2; *Src1++ = a; *Src2++ = b; }}void Skl_VFilter_31_C(SKL_BYTE *Src1, SKL_BYTE *Src2, const int BpS, int Nb_Blks){ Nb_Blks *= 8; while(Nb_Blks-->0) { SKL_BYTE a = ( 3*Src1[0] + 1*Src2[0] + 2 ) >> 2; SKL_BYTE b = ( 1*Src1[0] + 3*Src2[0] + 2 ) >> 2; *Src1 = a; *Src2 = b; Src1 += BpS; Src2 += BpS; }}//////////////////////////////////////////////////////////// (3,1) downsamplingvoid Skl_Filter_18x18_To_8x8_C(SKL_INT16 *Dst, const SKL_BYTE *Src, const int BpS){ SKL_INT16 *T, Tmp[18*8]; int i, j; T = Tmp; Src -= BpS; for(j=-1; j<17; j++) { for(i=0; i<8; ++i) T[i] = Src[2*i-1] + 3*Src[2*i+0] + 3*Src[2*i+1] + Src[2*i+2]; T += 8; Src += BpS; } T = Tmp + 8; for(j=0; j<8; j++) { for(i=0; i<8; ++i) Dst[i] = ( T[-8+i] + 3*T[0+i] + 3*T[8+i] + T[16+i] + 32 ) / 64; Dst += 8; T += 16; }}void Skl_Filter_Diff_18x18_To_8x8_C(SKL_INT16 *Dst, const SKL_BYTE *Src, const int BpS){ SKL_INT16 *T, Tmp[18*8]; int i, j; T = Tmp; Src -= BpS; for(j=-1; j<17; j++) { for(i=0; i<8; ++i) T[i] = Src[2*i-1] + 3*Src[2*i+0] + 3*Src[2*i+1] + Src[2*i+2]; T += 8; Src += BpS; } T = Tmp; for(j=0; j<8; j++) { for(i=0; i<8; ++i) Dst[i] -= ( T[i] + 3*T[8+i] + 3*T[16+i] + T[24+i] + 32 ) / 64; Dst += 8; T += 16; }}//////////////////////////////////////////////////////////// 8b to 16b transfer opsvoid Skl_Copy_16x8_8To16_C(SKL_INT16 *Dst, const SKL_BYTE *Src, const int BpS){ for(int y=8; y>0; --y) { Dst[0] = (SKL_INT16)Src[0]; Dst[64+0] = (SKL_INT16)Src[0+8]; Dst[1] = (SKL_INT16)Src[1]; Dst[64+1] = (SKL_INT16)Src[1+8]; Dst[2] = (SKL_INT16)Src[2]; Dst[64+2] = (SKL_INT16)Src[2+8]; Dst[3] = (SKL_INT16)Src[3]; Dst[64+3] = (SKL_INT16)Src[3+8]; Dst[4] = (SKL_INT16)Src[4]; Dst[64+4] = (SKL_INT16)Src[4+8]; Dst[5] = (SKL_INT16)Src[5]; Dst[64+5] = (SKL_INT16)Src[5+8]; Dst[6] = (SKL_INT16)Src[6]; Dst[64+6] = (SKL_INT16)Src[6+8]; Dst[7] = (SKL_INT16)Src[7]; Dst[64+7] = (SKL_INT16)Src[7+8]; Src += BpS; Dst += 8; }}void Skl_Copy_8x8_8To16_C(SKL_INT16 *Dst, const SKL_BYTE *Src, const int BpS){ for(int y=8; y>0; --y) { Dst[0] = (SKL_INT16)Src[0]; Dst[1] = (SKL_INT16)Src[1]; Dst[2] = (SKL_INT16)Src[2]; Dst[3] = (SKL_INT16)Src[3]; Dst[4] = (SKL_INT16)Src[4]; Dst[5] = (SKL_INT16)Src[5]; Dst[6] = (SKL_INT16)Src[6]; Dst[7] = (SKL_INT16)Src[7]; Src += BpS; Dst += 8; }}void Skl_Diff_16x8_8To16_C(SKL_INT16 *Dst, const SKL_BYTE *Src, const int BpS){ for(int y=8; y>0; --y) { Dst[0] -= (SKL_INT16)Src[0]; Dst[64+0] -= (SKL_INT16)Src[0+8]; Dst[1] -= (SKL_INT16)Src[1]; Dst[64+1] -= (SKL_INT16)Src[1+8]; Dst[2] -= (SKL_INT16)Src[2]; Dst[64+2] -= (SKL_INT16)Src[2+8]; Dst[3] -= (SKL_INT16)Src[3]; Dst[64+3] -= (SKL_INT16)Src[3+8]; Dst[4] -= (SKL_INT16)Src[4]; Dst[64+4] -= (SKL_INT16)Src[4+8]; Dst[5] -= (SKL_INT16)Src[5]; Dst[64+5] -= (SKL_INT16)Src[5+8]; Dst[6] -= (SKL_INT16)Src[6]; Dst[64+6] -= (SKL_INT16)Src[6+8]; Dst[7] -= (SKL_INT16)Src[7]; Dst[64+7] -= (SKL_INT16)Src[7+8]; Src += BpS; Dst += 8; }}void Skl_Diff_8x8_8To16_C(SKL_INT16 *Dst, const SKL_BYTE *Src, const int BpS){ for(int y=8; y>0; --y) { Dst[0] -= (SKL_INT16)Src[0]; Dst[1] -= (SKL_INT16)Src[1]; Dst[2] -= (SKL_INT16)Src[2]; Dst[3] -= (SKL_INT16)Src[3]; Dst[4] -= (SKL_INT16)Src[4]; Dst[5] -= (SKL_INT16)Src[5]; Dst[6] -= (SKL_INT16)Src[6]; Dst[7] -= (SKL_INT16)Src[7]; Src += BpS; Dst += 8; }}void Skl_Diff_16x8_88To16_C(SKL_INT16 *Dst, const SKL_BYTE *Src1, const SKL_BYTE *Src2, const int BpS){ for(int y=8; y>0; --y) { Dst[ 0] = (SKL_INT16)Src1[0] -(SKL_INT16)Src2[0]; Dst[ 1] = (SKL_INT16)Src1[1] -(SKL_INT16)Src2[1]; Dst[ 2] = (SKL_INT16)Src1[2] -(SKL_INT16)Src2[2]; Dst[ 3] = (SKL_INT16)Src1[3] -(SKL_INT16)Src2[3]; Dst[ 4] = (SKL_INT16)Src1[4] -(SKL_INT16)Src2[4]; Dst[ 5] = (SKL_INT16)Src1[5] -(SKL_INT16)Src2[5]; Dst[ 6] = (SKL_INT16)Src1[6] -(SKL_INT16)Src2[6]; Dst[ 7] = (SKL_INT16)Src1[7] -(SKL_INT16)Src2[7]; Dst[64+0] = (SKL_INT16)Src1[8+0] -(SKL_INT16)Src2[8+0]; Dst[64+1] = (SKL_INT16)Src1[8+1] -(SKL_INT16)Src2[8+1]; Dst[64+2] = (SKL_INT16)Src1[8+2] -(SKL_INT16)Src2[8+2]; Dst[64+3] = (SKL_INT16)Src1[8+3] -(SKL_INT16)Src2[8+3]; Dst[64+4] = (SKL_INT16)Src1[8+4] -(SKL_INT16)Src2[8+4]; Dst[64+5] = (SKL_INT16)Src1[8+5] -(SKL_INT16)Src2[8+5]; Dst[64+6] = (SKL_INT16)Src1[8+6] -(SKL_INT16)Src2[8+6]; Dst[64+7] = (SKL_INT16)Src1[8+7] -(SKL_INT16)Src2[8+7]; Src1 += BpS; Src2 += BpS; Dst += 8; }}void Skl_Diff_8x8_88To16_C(SKL_INT16 *Dst, const SKL_BYTE *Src1, const SKL_BYTE *Src2, const int BpS){ for(int y=8; y>0; --y) { Dst[ 0] = (SKL_INT16)Src1[0] -(SKL_INT16)Src2[0]; Dst[ 1] = (SKL_INT16)Src1[1] -(SKL_INT16)Src2[1]; Dst[ 2] = (SKL_INT16)Src1[2] -(SKL_INT16)Src2[2]; Dst[ 3] = (SKL_INT16)Src1[3] -(SKL_INT16)Src2[3]; Dst[ 4] = (SKL_INT16)Src1[4] -(SKL_INT16)Src2[4]; Dst[ 5] = (SKL_INT16)Src1[5] -(SKL_INT16)Src2[5]; Dst[ 6] = (SKL_INT16)Src1[6] -(SKL_INT16)Src2[6]; Dst[ 7] = (SKL_INT16)Src1[7] -(SKL_INT16)Src2[7]; Src1 += BpS; Src2 += BpS; Dst += 8; }}// 8b to 8b transfer opsvoid Skl_Move_16x8_C(SKL_BYTE *Dst, const SKL_BYTE *Src, const int BpS){ for(int y=8; y>0; --y) { memcpy( Dst, Src, 16 ); Dst += BpS; Src += BpS; }}void Skl_Move_8x8_C(SKL_BYTE *Dst, const SKL_BYTE *Src, const int BpS){ for(int y=8; y>0; --y) { memcpy( Dst, Src, 8 ); Dst += BpS; Src += BpS; }}#undef COPY#undef ADD#undef MIX#undef PRELUDE#undef EPILOG//////////////////////////////////////////////////////////// Edge replicationvoid Skl_Make_Edges_C(SKL_BYTE * const * const YUV, const int Width, const int Height, const int BpS){ int k; SKL_BYTE *sY = YUV[0]; SKL_BYTE *sU = YUV[1]; SKL_BYTE *sV = YUV[2]; for(k=Height/2; k>0; --k) { memset(sY -16, sY[0 ], 16); memset(sY+Width, sY[Width-1 ], 16); memset(sY -16+BpS, sY[0 +BpS], 16); memset(sY+Width+BpS, sY[Width-1+BpS], 16); sY += 2*BpS; memset(sU -8, sU[0 ], 8); memset(sU+Width/2, sU[Width/2-1], 8); memset(sV -8, sV[0 ], 8); memset(sV+Width/2, sV[Width/2-1], 8); sU += BpS; sV += BpS; } SKL_BYTE *dY_Top = YUV[0] - 16 - 16*BpS; SKL_BYTE *sY_Top = YUV[0] - 16; SKL_BYTE *dY_Bot = YUV[0] - 16 + Height*BpS; SKL_BYTE *sY_Bot = YUV[0] - 16 + Height*BpS - BpS; SKL_BYTE *dC_Top = YUV[1] - 8 - 8*BpS; SKL_BYTE *sC_Top = YUV[1] - 8; SKL_BYTE *dC_Bot = YUV[1] - 8 + (Height/2)*BpS; SKL_BYTE *sC_Bot = YUV[1] - 8 + (Height/2)*BpS - BpS; for(k=8;k>0;k--) { memcpy(dY_Top , sY_Top, BpS); memcpy(dY_Top+BpS, sY_Top, BpS); memcpy(dC_Top , sC_Top, BpS); memcpy(dY_Bot , sY_Bot, BpS); memcpy(dY_Bot+BpS, sY_Bot, BpS); memcpy(dC_Bot , sC_Bot, BpS); dY_Top += 2*BpS; dC_Top += BpS; dY_Bot += 2*BpS; dC_Bot += BpS; }}//////////////////////////////////////////////////////////// Frame/Field ops (rather unused)//////////////////////////////////////////////////////////#define ABS(x) ((x)<0 ? -(x) : (x))#define SAD(a,b) \ Tmp = Src[0*64+(a)*8+j+8] - Src[0*64+(b)*8+j+8]; Sum += ABS(Tmp); \ Tmp = Src[1*64+(a)*8+j+8] - Src[1*64+(b)*8+j+8]; Sum += ABS(Tmp);#define SAD2(a,b) SAD((a),(b)); SAD((a)+16, (b)+16);SKL_UINT32 Skl_SAD_16x7_Frame_C(const SKL_INT16 *Src){ SKL_UINT32 Sum = 0; for(int j=-8; j<0; ++j) { SKL_INT32 Tmp; SAD2(0,1); SAD2(1,2); SAD2(2,3); SAD2(3,4); SAD2(4,5); SAD2(5,6); SAD2(6,7); } return Sum;}SKL_UINT32 Skl_SAD_16x7_Field_C(const SKL_INT16 *Src){ SKL_UINT32 Sum = 0; SKL_INT32 Tmp; for(int j=-8; j<0; ++j) { SAD2(0,2); SAD2(2,4); SAD2(2,4); SAD2(3,5); SAD2(4,6); SAD2(5,7); SAD(6,0+16); SAD(7,1+16); } return Sum;}#undef SAD2#undef SAD#undef ABS void Skl_Reorder_Frame_16x16_C(SKL_INT16 *Src) // Src[4*64]{ // the permutation: [01234567|89abcdef] -> [0248ace|13579bdf] // decomposes into the 4 elementary cycles: // 1->2->4->8 ->1 3->6->c->9 ->3 // 7->e->d->b ->7 5->10 ->5 // + the 2 fixed point 0/f, of course. SKL_INT16 Line[8];#define CIRCULATE(a,b,c,d) \ memcpy(Line, Src+(a)*8, sizeof(Line)); \ memcpy(Src+(a)*8, Src+(b)*8, sizeof(Line));\ memcpy(Src+(b)*8, Src+(c)*8, sizeof(Line));\ memcpy(Src+(c)*8, Src+(d)*8, sizeof(Line));\ memcpy(Src+(d)*8, Line, sizeof(Line))#define SWAP(a,b) \ memcpy(Line, Src+(a)*8, sizeof(Line)); \ memcpy(Src+(a)*8, Src+(b)*8, sizeof(Line));\ memcpy(Src+(b)*8, Line, sizeof(Line))#define BLK 8 // jump to next block below CIRCULATE(1,2,4,8 +BLK); CIRCULATE(3,6,12 +BLK,9 +BLK); CIRCULATE(7,14 +BLK,13 +BLK, 11 +BLK); SWAP(5,10 +BLK); Src += 64; CIRCULATE(1,2,4,8 +BLK); CIRCULATE(3,6,12 +BLK,9 +BLK); CIRCULATE(7,14 +BLK,13 +BLK, 11 +BLK); SWAP(5,10 +BLK);#undef BLK#undef SWAP#undef CIRCULATE}//////////////////////////////////////////////////////////} // extern "C"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -