📄 skl_mb_c.cpp
字号:
extern SKL_MB_QP_PASS_SIGNATURE(Skl_V_Pass_Avrg_Up_8_Add_Rnd0_MMX);SKL_MB_FUNCS Skl_MB_Funcs_Copy_Rnd1_MMX = { { Skl_Copy_16x8_FF_MMX, Skl_Copy_16x8_FH_Rnd1_MMX, Skl_Copy_16x8_HF_Rnd1_MMX, Skl_Copy_16x8_HH_Rnd1_MMX }, { Skl_Copy_8x8_FF_MMX, Skl_Copy_8x8_FH_Rnd1_MMX, Skl_Copy_8x8_HF_Rnd1_MMX, Skl_Copy_8x8_HH_Rnd1_MMX }, { Skl_Copy_8x4_FF_MMX, Skl_Copy_8x4_FH_Rnd1_MMX, Skl_Copy_8x4_HF_Rnd1_MMX, Skl_Copy_8x4_HH_Rnd1_MMX }, Skl_H_Pass_16_Copy_Rnd1_MMX, Skl_H_Pass_Avrg_16_Copy_Rnd1_MMX, Skl_H_Pass_Avrg_Up_16_Copy_Rnd1_MMX, Skl_H_Pass_16_Copy_Rnd1_MMX, Skl_H_Pass_Avrg_16_Copy_Rnd1_MMX, Skl_H_Pass_Avrg_Up_16_Copy_Rnd1_MMX, Skl_V_Pass_16_Copy_Rnd1_MMX, Skl_V_Pass_Avrg_16_Copy_Rnd1_MMX, Skl_V_Pass_Avrg_Up_16_Copy_Rnd1_MMX, Skl_H_Pass_8_Copy_Rnd1_MMX, Skl_H_Pass_Avrg_8_Copy_Rnd1_MMX, Skl_H_Pass_Avrg_Up_8_Copy_Rnd1_MMX, Skl_H_Pass_8_Copy_Rnd1_MMX, Skl_H_Pass_Avrg_8_Copy_Rnd1_MMX, Skl_H_Pass_Avrg_Up_8_Copy_Rnd1_MMX, Skl_V_Pass_8_Copy_Rnd1_MMX, Skl_V_Pass_Avrg_8_Copy_Rnd1_MMX, Skl_V_Pass_Avrg_Up_8_Copy_Rnd1_MMX, Skl_SAD_HP_16x16_Rnd1_C, Skl_SAD_HP_8x8_Rnd1_C};SKL_MB_FUNCS Skl_MB_Funcs_Copy_Rnd0_MMX = { { Skl_Copy_16x8_FF_MMX, Skl_Copy_16x8_FH_Rnd0_MMX, Skl_Copy_16x8_HF_Rnd0_MMX, Skl_Copy_16x8_HH_Rnd0_MMX }, { Skl_Copy_8x8_FF_MMX, Skl_Copy_8x8_FH_Rnd0_MMX, Skl_Copy_8x8_HF_Rnd0_MMX, Skl_Copy_8x8_HH_Rnd0_MMX }, { Skl_Copy_8x4_FF_MMX, Skl_Copy_8x4_FH_Rnd0_MMX, Skl_Copy_8x4_HF_Rnd0_MMX, Skl_Copy_8x4_HH_Rnd0_MMX }, Skl_H_Pass_16_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_16_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_Up_16_Copy_Rnd0_MMX, Skl_H_Pass_16_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_16_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_Up_16_Copy_Rnd0_MMX, Skl_V_Pass_16_Copy_Rnd0_MMX, Skl_V_Pass_Avrg_16_Copy_Rnd0_MMX, Skl_V_Pass_Avrg_Up_16_Copy_Rnd0_MMX, Skl_H_Pass_8_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_8_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_Up_8_Copy_Rnd0_MMX, Skl_H_Pass_8_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_8_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_Up_8_Copy_Rnd0_MMX, Skl_V_Pass_8_Copy_Rnd0_MMX, Skl_V_Pass_Avrg_8_Copy_Rnd0_MMX, Skl_V_Pass_Avrg_Up_8_Copy_Rnd0_MMX, Skl_SAD_HP_16x16_Rnd0_C, Skl_SAD_HP_8x8_Rnd0_C};SKL_MB_FUNCS Skl_MB_Funcs_Add_Rnd0_MMX = { { Skl_Add_16x8_FF_MMX, Skl_Add_16x8_FH_Rnd0_MMX, Skl_Add_16x8_HF_Rnd0_MMX, Skl_Add_16x8_HH_Rnd0_MMX }, { Skl_Add_8x8_FF_MMX, Skl_Add_8x8_FH_Rnd0_MMX, Skl_Add_8x8_HF_Rnd0_MMX, Skl_Add_8x8_HH_Rnd0_MMX }, { Skl_Add_8x4_FF_MMX, Skl_Add_8x4_FH_Rnd0_MMX, Skl_Add_8x4_HF_Rnd0_MMX, Skl_Add_8x4_HH_Rnd0_MMX }, Skl_H_Pass_16_Add_Rnd0_MMX, Skl_H_Pass_Avrg_16_Add_Rnd0_MMX, Skl_H_Pass_Avrg_Up_16_Add_Rnd0_MMX, Skl_H_Pass_16_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_16_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_Up_16_Copy_Rnd0_MMX, Skl_V_Pass_16_Add_Rnd0_MMX, Skl_V_Pass_Avrg_16_Add_Rnd0_MMX, Skl_V_Pass_Avrg_Up_16_Add_Rnd0_MMX, Skl_H_Pass_8_Add_Rnd0_MMX, Skl_H_Pass_Avrg_8_Add_Rnd0_MMX, Skl_H_Pass_Avrg_Up_8_Add_Rnd0_MMX, Skl_H_Pass_8_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_8_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_Up_8_Copy_Rnd0_MMX, Skl_V_Pass_8_Add_Rnd0_MMX, Skl_V_Pass_Avrg_8_Add_Rnd0_MMX, Skl_V_Pass_Avrg_Up_8_Add_Rnd0_MMX, Skl_SAD_HP_16x16_Rnd0_C, Skl_SAD_HP_8x8_Rnd0_C};extern MB_FUNC(Skl_H_Pass_2Taps_MMX);extern MB_FUNC(Skl_V_Pass_2Taps_MMX);extern MB_FUNC(Skl_HV_Pass_2Taps_MMX);const SKL_HV_FILTER Skl_Filter_2_MMX = { Skl_H_Pass_2Taps_MMX, Skl_V_Pass_2Taps_MMX, Skl_HV_Pass_2Taps_MMX};//////////////////////////////////////////////////////////// tables sent to ASM codeextern SKL_INT16 Skl_MMX_Expand[256][4]; // 17 tables, 2K each => 34K // Mirroring can be acheived composing 11 basic tables // (for instance: (23,19,-6,3)=(20,20,-6,3)+(3,-1,0,0) // Using Symmetries (and bswap) could reduce further // the memory to 7 tables (->14K).extern SKL_INT16 Skl_FIR_1_0_0_0[256][4];extern SKL_INT16 Skl_FIR_3_1_0_0[256][4];extern SKL_INT16 Skl_FIR_6_3_1_0[256][4];extern SKL_INT16 Skl_FIR_14_3_2_1[256][4];extern SKL_INT16 Skl_FIR_20_6_3_1[256][4];extern SKL_INT16 Skl_FIR_20_20_6_3[256][4];extern SKL_INT16 Skl_FIR_23_19_6_3[256][4];extern SKL_INT16 Skl_FIR_7_20_20_6[256][4];extern SKL_INT16 Skl_FIR_6_20_20_6[256][4];extern SKL_INT16 Skl_FIR_6_20_20_7[256][4];extern SKL_INT16 Skl_FIR_3_6_20_20[256][4];extern SKL_INT16 Skl_FIR_3_6_19_23[256][4];extern SKL_INT16 Skl_FIR_1_3_6_20[256][4];extern SKL_INT16 Skl_FIR_1_2_3_14[256][4];extern SKL_INT16 Skl_FIR_0_1_3_6[256][4];extern SKL_INT16 Skl_FIR_0_0_1_3[256][4];extern SKL_INT16 Skl_FIR_0_0_0_1[256][4];extern void Skl_Init_QP_MMX();//////////////////////////////////////////////////////////SKL_INT16 Skl_FIR_1_0_0_0[256][4];SKL_INT16 Skl_FIR_3_1_0_0[256][4];SKL_INT16 Skl_FIR_6_3_1_0[256][4];SKL_INT16 Skl_FIR_14_3_2_1[256][4];SKL_INT16 Skl_FIR_20_6_3_1[256][4];SKL_INT16 Skl_FIR_20_20_6_3[256][4];SKL_INT16 Skl_FIR_23_19_6_3[256][4];SKL_INT16 Skl_FIR_7_20_20_6[256][4];SKL_INT16 Skl_FIR_6_20_20_6[256][4];SKL_INT16 Skl_FIR_6_20_20_7[256][4];SKL_INT16 Skl_FIR_3_6_20_20[256][4];SKL_INT16 Skl_FIR_3_6_19_23[256][4];SKL_INT16 Skl_FIR_1_3_6_20[256][4];SKL_INT16 Skl_FIR_1_2_3_14[256][4];SKL_INT16 Skl_FIR_0_1_3_6[256][4];SKL_INT16 Skl_FIR_0_0_1_3[256][4];SKL_INT16 Skl_FIR_0_0_0_1[256][4];SKL_INT16 Skl_MMX_Expand[256][4];static void Init_FIR_Table(SKL_INT16 Tab[][4], int A, int B, int C, int D){ for(int i=0; i<256; ++i) { Tab[i][0] = i*A; Tab[i][1] = i*B; Tab[i][2] = i*C; Tab[i][3] = i*D; }}void Skl_Init_QP_MMX(){ for(int i=0; i<256; ++i) { Skl_MMX_Expand[i][0] = i; Skl_MMX_Expand[i][1] = i; Skl_MMX_Expand[i][2] = i; Skl_MMX_Expand[i][3] = i; } Init_FIR_Table(Skl_FIR_1_0_0_0, -1, 0, 0, 0); Init_FIR_Table(Skl_FIR_3_1_0_0, 3, -1, 0, 0); Init_FIR_Table(Skl_FIR_6_3_1_0, -6, 3, -1, 0); Init_FIR_Table(Skl_FIR_14_3_2_1, 14, -3, 2, -1); Init_FIR_Table(Skl_FIR_20_6_3_1, 20, -6, 3, -1); Init_FIR_Table(Skl_FIR_20_20_6_3, 20, 20, -6, 3); Init_FIR_Table(Skl_FIR_23_19_6_3, 23, 19, -6, 3); Init_FIR_Table(Skl_FIR_7_20_20_6, -7, 20, 20, -6); Init_FIR_Table(Skl_FIR_6_20_20_6, -6, 20, 20, -6); Init_FIR_Table(Skl_FIR_6_20_20_7, -6, 20, 20, -7); Init_FIR_Table(Skl_FIR_3_6_20_20, 3, -6, 20, 20); Init_FIR_Table(Skl_FIR_3_6_19_23, 3, -6, 19, 23); Init_FIR_Table(Skl_FIR_1_3_6_20, -1, 3, -6, 20); Init_FIR_Table(Skl_FIR_1_2_3_14, -1, 2, -3, 14); Init_FIR_Table(Skl_FIR_0_1_3_6, 0, -1, 3, -6); Init_FIR_Table(Skl_FIR_0_0_1_3, 0, 0, -1, 3); Init_FIR_Table(Skl_FIR_0_0_0_1, 0, 0, 0, -1);}//////////////////////////////////////////////////////////// SSE versionextern SKL_MB_FUNCS Skl_MB_Funcs_Add_SSE;extern SKL_MB_FUNCS Skl_MB_Funcs_Copy_SSE;extern MB_FUNC(Skl_Add_8x4_FF_SSE);extern MB_FUNC(Skl_Add_8x4_FH_Rnd0_SSE);extern MB_FUNC(Skl_Add_8x4_HF_Rnd0_SSE);extern MB_FUNC(Skl_Add_8x4_HH_Rnd0_SSE);extern MB_FUNC(Skl_Add_8x8_FF_SSE);extern MB_FUNC(Skl_Add_8x8_FH_Rnd0_SSE);extern MB_FUNC(Skl_Add_8x8_HF_Rnd0_SSE);extern MB_FUNC(Skl_Add_8x8_HH_Rnd0_SSE);extern MB_FUNC(Skl_Add_16x8_FF_SSE);extern MB_FUNC(Skl_Add_16x8_FH_Rnd0_SSE);extern MB_FUNC(Skl_Add_16x8_HF_Rnd0_SSE);extern MB_FUNC(Skl_Add_16x8_HH_Rnd0_SSE);extern MB_FUNC(Skl_Copy_8x4_FH_Rnd1_SSE);extern MB_FUNC(Skl_Copy_8x4_HF_Rnd1_SSE);extern MB_FUNC(Skl_Copy_8x4_HH_Rnd1_SSE);extern MB_FUNC(Skl_Copy_8x4_FH_Rnd0_SSE);extern MB_FUNC(Skl_Copy_8x4_HF_Rnd0_SSE);extern MB_FUNC(Skl_Copy_8x4_HH_Rnd0_SSE);extern MB_FUNC(Skl_Copy_8x8_FH_Rnd1_SSE);extern MB_FUNC(Skl_Copy_8x8_HF_Rnd1_SSE);extern MB_FUNC(Skl_Copy_8x8_HH_Rnd1_SSE);extern MB_FUNC(Skl_Copy_8x8_FH_Rnd0_SSE);extern MB_FUNC(Skl_Copy_8x8_HF_Rnd0_SSE);extern MB_FUNC(Skl_Copy_8x8_HH_Rnd0_SSE);extern MB_FUNC(Skl_Copy_16x8_FH_Rnd1_SSE);extern MB_FUNC(Skl_Copy_16x8_HF_Rnd1_SSE);extern MB_FUNC(Skl_Copy_16x8_HH_Rnd1_SSE);extern MB_FUNC(Skl_Copy_16x8_FH_Rnd0_SSE);extern MB_FUNC(Skl_Copy_16x8_HF_Rnd0_SSE);extern MB_FUNC(Skl_Copy_16x8_HH_Rnd0_SSE);SKL_MB_FUNCS Skl_MB_Funcs_Copy_Rnd1_SSE = { { Skl_Copy_16x8_FF_MMX, Skl_Copy_16x8_FH_Rnd1_SSE, Skl_Copy_16x8_HF_Rnd1_SSE, Skl_Copy_16x8_HH_Rnd1_SSE }, { Skl_Copy_8x8_FF_MMX, Skl_Copy_8x8_FH_Rnd1_SSE, Skl_Copy_8x8_HF_Rnd1_SSE, Skl_Copy_8x8_HH_Rnd1_SSE }, { Skl_Copy_8x4_FF_MMX, Skl_Copy_8x4_FH_Rnd1_SSE, Skl_Copy_8x4_HF_Rnd1_SSE, Skl_Copy_8x4_HH_Rnd1_SSE }, Skl_H_Pass_16_Copy_Rnd1_MMX, Skl_H_Pass_Avrg_16_Copy_Rnd1_MMX, Skl_H_Pass_Avrg_Up_16_Copy_Rnd1_MMX, Skl_H_Pass_16_Copy_Rnd1_MMX, Skl_H_Pass_Avrg_16_Copy_Rnd1_MMX, Skl_H_Pass_Avrg_Up_16_Copy_Rnd1_MMX, Skl_V_Pass_16_Copy_Rnd1_MMX, Skl_V_Pass_Avrg_16_Copy_Rnd1_MMX, Skl_V_Pass_Avrg_Up_16_Copy_Rnd1_MMX, Skl_H_Pass_8_Copy_Rnd1_MMX, Skl_H_Pass_Avrg_8_Copy_Rnd1_MMX, Skl_H_Pass_Avrg_Up_8_Copy_Rnd1_MMX, Skl_H_Pass_8_Copy_Rnd1_MMX, Skl_H_Pass_Avrg_8_Copy_Rnd1_MMX, Skl_H_Pass_Avrg_Up_8_Copy_Rnd1_MMX , Skl_V_Pass_8_Copy_Rnd1_MMX, Skl_V_Pass_Avrg_8_Copy_Rnd1_MMX, Skl_V_Pass_Avrg_Up_8_Copy_Rnd1_MMX, Skl_SAD_HP_16x16_Rnd1_C, Skl_SAD_HP_8x8_Rnd1_C};SKL_MB_FUNCS Skl_MB_Funcs_Copy_Rnd0_SSE = { { Skl_Copy_16x8_FF_MMX, Skl_Copy_16x8_FH_Rnd0_SSE, Skl_Copy_16x8_HF_Rnd0_SSE, Skl_Copy_16x8_HH_Rnd0_SSE }, { Skl_Copy_8x8_FF_MMX, Skl_Copy_8x8_FH_Rnd0_SSE, Skl_Copy_8x8_HF_Rnd0_SSE, Skl_Copy_8x8_HH_Rnd0_SSE }, { Skl_Copy_8x4_FF_MMX, Skl_Copy_8x4_FH_Rnd0_SSE, Skl_Copy_8x4_HF_Rnd0_SSE, Skl_Copy_8x4_HH_Rnd0_SSE }, Skl_H_Pass_16_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_16_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_Up_16_Copy_Rnd0_MMX, Skl_H_Pass_16_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_16_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_Up_16_Copy_Rnd0_MMX, Skl_V_Pass_16_Copy_Rnd0_MMX, Skl_V_Pass_Avrg_16_Copy_Rnd0_MMX, Skl_V_Pass_Avrg_Up_16_Copy_Rnd0_MMX, Skl_H_Pass_8_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_8_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_Up_8_Copy_Rnd0_MMX, Skl_H_Pass_8_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_8_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_Up_8_Copy_Rnd0_MMX , Skl_V_Pass_8_Copy_Rnd0_MMX, Skl_V_Pass_Avrg_8_Copy_Rnd0_MMX, Skl_V_Pass_Avrg_Up_8_Copy_Rnd0_MMX, Skl_SAD_HP_16x16_Rnd0_C, Skl_SAD_HP_8x8_Rnd0_C};SKL_MB_FUNCS Skl_MB_Funcs_Add_Rnd0_SSE = { { Skl_Add_16x8_FF_SSE, Skl_Add_16x8_FH_Rnd0_SSE, Skl_Add_16x8_HF_Rnd0_SSE, Skl_Add_16x8_HH_Rnd0_SSE }, { Skl_Add_8x8_FF_SSE, Skl_Add_8x8_FH_Rnd0_SSE, Skl_Add_8x8_HF_Rnd0_SSE, Skl_Add_8x8_HH_Rnd0_SSE }, { Skl_Add_8x4_FF_SSE, Skl_Add_8x4_FH_Rnd0_SSE, Skl_Add_8x4_HF_Rnd0_SSE, Skl_Add_8x4_HH_Rnd0_SSE }, Skl_H_Pass_16_Add_Rnd0_MMX, Skl_H_Pass_Avrg_16_Add_Rnd0_MMX, Skl_H_Pass_Avrg_Up_16_Add_Rnd0_MMX, Skl_H_Pass_16_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_16_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_Up_16_Copy_Rnd0_MMX, Skl_V_Pass_16_Add_Rnd0_MMX, Skl_V_Pass_Avrg_16_Add_Rnd0_MMX, Skl_V_Pass_Avrg_Up_16_Add_Rnd0_MMX, Skl_H_Pass_8_Add_Rnd0_MMX, Skl_H_Pass_Avrg_8_Add_Rnd0_MMX, Skl_H_Pass_Avrg_Up_8_Add_Rnd0_MMX, Skl_H_Pass_8_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_8_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_Up_8_Copy_Rnd0_MMX , Skl_V_Pass_8_Add_Rnd0_MMX, Skl_V_Pass_Avrg_8_Add_Rnd0_MMX, Skl_V_Pass_Avrg_Up_8_Add_Rnd0_MMX, Skl_SAD_HP_16x16_Rnd0_C, Skl_SAD_HP_8x8_Rnd0_C};//////////////////////////////////////////////////////////extern MB_FUNC(Skl_H_Pass_2Taps_SSE);extern MB_FUNC(Skl_V_Pass_2Taps_SSE);extern MB_FUNC(Skl_HV_Pass_2Taps_SSE);const SKL_HV_FILTER Skl_Filter_2_SSE = { Skl_H_Pass_2Taps_SSE, Skl_V_Pass_2Taps_SSE, Skl_HV_Pass_2Taps_SSE};//////////////////////////////////////////////////////////// SSE2 versionextern SKL_MB_FUNCS Skl_MB_Funcs_Add_SSE2;extern SKL_MB_FUNCS Skl_MB_Funcs_Copy_SSE2;extern MB_FUNC(Skl_Add_16x8_FF_SSE2);extern MB_FUNC(Skl_Add_16x8_FH_Rnd0_SSE2);extern MB_FUNC(Skl_Add_16x8_HF_Rnd0_SSE2);extern MB_FUNC(Skl_Add_16x8_HH_Rnd0_SSE2);extern MB_FUNC(Skl_Copy_16x8_FF_SSE2);extern MB_FUNC(Skl_Copy_16x8_FH_Rnd0_SSE2);extern MB_FUNC(Skl_Copy_16x8_HF_Rnd0_SSE2);extern MB_FUNC(Skl_Copy_16x8_HH_Rnd0_SSE2);extern MB_FUNC(Skl_Copy_16x8_FH_Rnd1_SSE2);extern MB_FUNC(Skl_Copy_16x8_HF_Rnd1_SSE2);extern MB_FUNC(Skl_Copy_16x8_HH_Rnd1_SSE2);SKL_MB_FUNCS Skl_MB_Funcs_Copy_Rnd1_SSE2 = { { Skl_Copy_16x8_FF_SSE2, Skl_Copy_16x8_FH_Rnd1_SSE2, Skl_Copy_16x8_HF_Rnd1_SSE2, Skl_Copy_16x8_HH_Rnd1_SSE2 }, { Skl_Copy_8x8_FF_MMX, Skl_Copy_8x8_FH_Rnd1_SSE, Skl_Copy_8x8_HF_Rnd1_SSE, Skl_Copy_8x8_HH_Rnd1_SSE }, { Skl_Copy_8x4_FF_MMX, Skl_Copy_8x4_FH_Rnd1_SSE, Skl_Copy_8x4_HF_Rnd1_SSE, Skl_Copy_8x4_HH_Rnd1_SSE }, Skl_H_Pass_16_Copy_Rnd1_MMX, Skl_H_Pass_Avrg_16_Copy_Rnd1_MMX, Skl_H_Pass_Avrg_Up_16_Copy_Rnd1_MMX, Skl_H_Pass_16_Copy_Rnd1_MMX, Skl_H_Pass_Avrg_16_Copy_Rnd1_MMX, Skl_H_Pass_Avrg_Up_16_Copy_Rnd1_MMX, Skl_V_Pass_16_Copy_Rnd1_MMX, Skl_V_Pass_Avrg_16_Copy_Rnd1_MMX, Skl_V_Pass_Avrg_Up_16_Copy_Rnd1_MMX, Skl_H_Pass_8_Copy_Rnd1_MMX, Skl_H_Pass_Avrg_8_Copy_Rnd1_MMX, Skl_H_Pass_Avrg_Up_8_Copy_Rnd1_MMX, Skl_H_Pass_8_Copy_Rnd1_MMX, Skl_H_Pass_Avrg_8_Copy_Rnd1_MMX, Skl_H_Pass_Avrg_Up_8_Copy_Rnd1_MMX , Skl_V_Pass_8_Copy_Rnd1_MMX, Skl_V_Pass_Avrg_8_Copy_Rnd1_MMX, Skl_V_Pass_Avrg_Up_8_Copy_Rnd1_MMX, Skl_SAD_HP_16x16_Rnd1_C, Skl_SAD_HP_8x8_Rnd1_C};SKL_MB_FUNCS Skl_MB_Funcs_Copy_Rnd0_SSE2 = { { Skl_Copy_16x8_FF_SSE2, Skl_Copy_16x8_FH_Rnd0_SSE2, Skl_Copy_16x8_HF_Rnd0_SSE2, Skl_Copy_16x8_HH_Rnd0_SSE2 }, { Skl_Copy_8x8_FF_MMX, Skl_Copy_8x8_FH_Rnd0_SSE, Skl_Copy_8x8_HF_Rnd0_SSE, Skl_Copy_8x8_HH_Rnd0_SSE }, { Skl_Copy_8x4_FF_MMX, Skl_Copy_8x4_FH_Rnd0_SSE, Skl_Copy_8x4_HF_Rnd0_SSE, Skl_Copy_8x4_HH_Rnd0_SSE }, Skl_H_Pass_16_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_16_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_Up_16_Copy_Rnd0_MMX, Skl_H_Pass_16_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_16_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_Up_16_Copy_Rnd0_MMX, Skl_V_Pass_16_Copy_Rnd0_MMX, Skl_V_Pass_Avrg_16_Copy_Rnd0_MMX, Skl_V_Pass_Avrg_Up_16_Copy_Rnd0_MMX, Skl_H_Pass_8_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_8_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_Up_8_Copy_Rnd0_MMX, Skl_H_Pass_8_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_8_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_Up_8_Copy_Rnd0_MMX, Skl_V_Pass_8_Copy_Rnd0_MMX, Skl_V_Pass_Avrg_8_Copy_Rnd0_MMX, Skl_V_Pass_Avrg_Up_8_Copy_Rnd0_MMX, Skl_SAD_HP_16x16_Rnd0_C, Skl_SAD_HP_8x8_Rnd0_C};SKL_MB_FUNCS Skl_MB_Funcs_Add_Rnd0_SSE2 = { { Skl_Add_16x8_FF_SSE2, Skl_Add_16x8_FH_Rnd0_SSE2, Skl_Add_16x8_HF_Rnd0_SSE2, Skl_Add_16x8_HH_Rnd0_SSE2 }, { Skl_Add_8x8_FF_MMX, Skl_Add_8x8_FH_Rnd0_SSE, Skl_Add_8x8_HF_Rnd0_SSE, Skl_Add_8x8_HH_Rnd0_SSE }, { Skl_Add_8x4_FF_MMX, Skl_Add_8x4_FH_Rnd0_SSE, Skl_Add_8x4_HF_Rnd0_SSE, Skl_Add_8x4_HH_Rnd0_SSE }, Skl_H_Pass_16_Add_Rnd0_MMX, Skl_H_Pass_Avrg_16_Add_Rnd0_MMX, Skl_H_Pass_Avrg_Up_16_Add_Rnd0_MMX, Skl_H_Pass_16_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_16_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_Up_16_Copy_Rnd0_MMX, Skl_V_Pass_16_Add_Rnd0_MMX, Skl_V_Pass_Avrg_16_Add_Rnd0_MMX, Skl_V_Pass_Avrg_Up_16_Add_Rnd0_MMX, Skl_H_Pass_8_Add_Rnd0_MMX, Skl_H_Pass_Avrg_8_Add_Rnd0_MMX, Skl_H_Pass_Avrg_Up_8_Add_Rnd0_MMX, Skl_H_Pass_8_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_8_Copy_Rnd0_MMX, Skl_H_Pass_Avrg_Up_8_Copy_Rnd0_MMX, Skl_V_Pass_8_Add_Rnd0_MMX, Skl_V_Pass_Avrg_8_Add_Rnd0_MMX, Skl_V_Pass_Avrg_Up_8_Add_Rnd0_MMX, Skl_SAD_HP_16x16_Rnd0_C, Skl_SAD_HP_8x8_Rnd0_C};#endif /* SKL_USE_ASM *///////////////////////////////////////////////////////////// Upsampling (3,1) filterstatic inline void Filter_31(SKL_BYTE *Dst1, SKL_BYTE *Dst2, const SKL_INT16 *Src1, const SKL_INT16 *Src2){ /* Src[] is assumed to be >=0. So we can use ">>2" instead of "/2" */ SKL_INT16 a = (3*Src1[0]+ Src2[0]+2) >> 2; SKL_INT16 b = ( Src1[0]+3*Src2[0]+2) >> 2; COPY( Dst1[0], a ); COPY( Dst2[0], b );}static inline void Filter_9331(SKL_BYTE *Dst1, SKL_BYTE *Dst2, const SKL_INT16 *Src1, const SKL_INT16 *Src2){ /* Src[] is assumed to be >=0. So we can use ">>4" instead of "/16" */ SKL_INT16 a = (9*Src1[0]+ 3*Src1[1]+ 3*Src2[0] + 1*Src2[1] + 8) >> 4; SKL_INT16 b = (3*Src1[0]+ 9*Src1[1]+ 1*Src2[0] + 3*Src2[1] + 8) >> 4; SKL_INT16 c = (3*Src1[0]+ 1*Src1[1]+ 9*Src2[0] + 3*Src2[1] + 8) >> 4; SKL_INT16 d = (1*Src1[0]+ 3*Src1[1]+ 3*Src2[0] + 9*Src2[1] + 8) >> 4; COPY( Dst1[0], a ); COPY( Dst1[1], b ); COPY( Dst2[0], c ); COPY( Dst2[1], d );}void Skl_Copy_Upsampled_8x8_16To8_C(SKL_BYTE *Dst, const SKL_INT16 *Src, const int BpS){ int x, y; COPY( Dst[0], Src[0] ); for(x=0; x<7; ++x) Filter_31(Dst+2*x+1, Dst+2*x+2, Src+x, Src+x+1); COPY( Dst[15], Src[7] ); Dst += BpS; for(y=0; y<7; ++y) { SKL_BYTE *const Dst2 = Dst + BpS; Filter_31(Dst, Dst2, Src, Src+8); for(x=0; x<7; ++x) Filter_9331(Dst+2*x+1, Dst2+2*x+1, Src+x, Src+x+8); Filter_31(Dst+15, Dst2+15, Src+7, Src+7+8); Src += 8; Dst += 2*BpS; } COPY( Dst[0], Src[0] ); for(x=0; x<7; ++x) Filter_31(Dst+2*x+1, Dst+2*x+2, Src+x, Src+x+1); COPY( Dst[15], Src[7] );}static inline void Filter_Add_31(SKL_BYTE *Dst1, SKL_BYTE *Dst2, const SKL_INT16 *Src1, const SKL_INT16 *Src2){ /* Here, we must use "/4", since Src[] is in [-256, 255] */ SKL_INT16 a = (3*Src1[0]+ Src2[0] + 2) / 4; SKL_INT16 b = ( Src1[0]+3*Src2[0] + 2) / 4; ADD(Dst1[0], a); ADD(Dst2[0], b);}static inline void Filter_Add_9331(SKL_BYTE *Dst1, SKL_BYTE *Dst2, const SKL_INT16 *Src1, const SKL_INT16 *Src2){ SKL_INT16 a = (9*Src1[0]+ 3*Src1[1]+ 3*Src2[0] + 1*Src2[1] + 8) / 16; SKL_INT16 b = (3*Src1[0]+ 9*Src1[1]+ 1*Src2[0] + 3*Src2[1] + 8) / 16; SKL_INT16 c = (3*Src1[0]+ 1*Src1[1]+ 9*Src2[0] + 3*Src2[1] + 8) / 16; SKL_INT16 d = (1*Src1[0]+ 3*Src1[1]+ 3*Src2[0] + 9*Src2[1] + 8) / 16; ADD(Dst1[0], a); ADD(Dst1[1], b); ADD(Dst2[0], c); ADD(Dst2[1], d);}void Skl_Add_Upsampled_8x8_16To8_C(SKL_BYTE *Dst, const SKL_INT16 *Src, const int BpS){ int x, y;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -