📄 qpel.c
字号:
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_8_Add_x86_64);
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_8_Add_x86_64);
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_Add_x86_64);
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_Add_x86_64);
XVID_QP_FUNCS xvid_QP_Funcs_x86_64 = {
xvid_H_Pass_16_x86_64, xvid_H_Pass_Avrg_16_x86_64, xvid_H_Pass_Avrg_Up_16_x86_64,
xvid_V_Pass_16_x86_64, xvid_V_Pass_Avrg_16_x86_64, xvid_V_Pass_Avrg_Up_16_x86_64,
xvid_H_Pass_8_x86_64, xvid_H_Pass_Avrg_8_x86_64, xvid_H_Pass_Avrg_Up_8_x86_64,
xvid_V_Pass_8_x86_64, xvid_V_Pass_Avrg_8_x86_64, xvid_V_Pass_Avrg_Up_8_x86_64
};
XVID_QP_FUNCS xvid_QP_Add_Funcs_x86_64 = {
xvid_H_Pass_Add_16_x86_64, xvid_H_Pass_Avrg_Add_16_x86_64, xvid_H_Pass_Avrg_Up_Add_16_x86_64,
xvid_V_Pass_Add_16_x86_64, xvid_V_Pass_Avrg_Add_16_x86_64, xvid_V_Pass_Avrg_Up_Add_16_x86_64,
xvid_H_Pass_8_Add_x86_64, xvid_H_Pass_Avrg_8_Add_x86_64, xvid_H_Pass_Avrg_Up_8_Add_x86_64,
xvid_V_Pass_8_Add_x86_64, xvid_V_Pass_Avrg_8_Add_x86_64, xvid_V_Pass_Avrg_Up_8_Add_x86_64,
};
#endif /* ARCH_IS_X86_64 */
/* tables for ASM
****************************************************************************/
#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
/* These symbols will be used outsie this file, so tell the compiler
* they're global. Only ia32 will define them in this file, x86_64
* will do in the assembly files */
extern uint16_t xvid_Expand_mmx[256][4]; /* 8b -> 64b expansion table */
extern int16_t xvid_FIR_1_0_0_0[256][4];
extern int16_t xvid_FIR_3_1_0_0[256][4];
extern int16_t xvid_FIR_6_3_1_0[256][4];
extern int16_t xvid_FIR_14_3_2_1[256][4];
extern int16_t xvid_FIR_20_6_3_1[256][4];
extern int16_t xvid_FIR_20_20_6_3[256][4];
extern int16_t xvid_FIR_23_19_6_3[256][4];
extern int16_t xvid_FIR_7_20_20_6[256][4];
extern int16_t xvid_FIR_6_20_20_6[256][4];
extern int16_t xvid_FIR_6_20_20_7[256][4];
extern int16_t xvid_FIR_3_6_20_20[256][4];
extern int16_t xvid_FIR_3_6_19_23[256][4];
extern int16_t xvid_FIR_1_3_6_20[256][4];
extern int16_t xvid_FIR_1_2_3_14[256][4];
extern int16_t xvid_FIR_0_1_3_6[256][4];
extern int16_t xvid_FIR_0_0_1_3[256][4];
extern int16_t xvid_FIR_0_0_0_1[256][4];
#endif
/* Arrays definitions, according to the target platform */
#ifdef ARCH_IS_IA32
uint16_t xvid_Expand_mmx[256][4]; /* 8b -> 64b expansion table */
#endif
#if !defined(ARCH_IS_X86_64)
/* Only ia32 will use these tables outside this file so mark them
* static for all other archs */
#if defined(ARCH_IS_IA32)
#define __SCOPE
#else
#define __SCOPE static
#endif
__SCOPE int16_t xvid_FIR_1_0_0_0[256][4];
__SCOPE int16_t xvid_FIR_3_1_0_0[256][4];
__SCOPE int16_t xvid_FIR_6_3_1_0[256][4];
__SCOPE int16_t xvid_FIR_14_3_2_1[256][4];
__SCOPE int16_t xvid_FIR_20_6_3_1[256][4];
__SCOPE int16_t xvid_FIR_20_20_6_3[256][4];
__SCOPE int16_t xvid_FIR_23_19_6_3[256][4];
__SCOPE int16_t xvid_FIR_7_20_20_6[256][4];
__SCOPE int16_t xvid_FIR_6_20_20_6[256][4];
__SCOPE int16_t xvid_FIR_6_20_20_7[256][4];
__SCOPE int16_t xvid_FIR_3_6_20_20[256][4];
__SCOPE int16_t xvid_FIR_3_6_19_23[256][4];
__SCOPE int16_t xvid_FIR_1_3_6_20[256][4];
__SCOPE int16_t xvid_FIR_1_2_3_14[256][4];
__SCOPE int16_t xvid_FIR_0_1_3_6[256][4];
__SCOPE int16_t xvid_FIR_0_0_1_3[256][4];
__SCOPE int16_t xvid_FIR_0_0_0_1[256][4];
#endif
static void Init_FIR_Table(int16_t Tab[][4],
int A, int B, int C, int D)
{
int i;
for(i=0; i<256; ++i) {
Tab[i][0] = i*A;
Tab[i][1] = i*B;
Tab[i][2] = i*C;
Tab[i][3] = i*D;
}
}
void xvid_Init_QP(void)
{
#ifdef ARCH_IS_IA32
int i;
for(i=0; i<256; ++i) {
xvid_Expand_mmx[i][0] = i;
xvid_Expand_mmx[i][1] = i;
xvid_Expand_mmx[i][2] = i;
xvid_Expand_mmx[i][3] = i;
}
#endif
/* Alternate way of filtering (cf. USE_TABLES flag in qpel_mmx.asm) */
Init_FIR_Table(xvid_FIR_1_0_0_0, -1, 0, 0, 0);
Init_FIR_Table(xvid_FIR_3_1_0_0, 3, -1, 0, 0);
Init_FIR_Table(xvid_FIR_6_3_1_0, -6, 3, -1, 0);
Init_FIR_Table(xvid_FIR_14_3_2_1, 14, -3, 2, -1);
Init_FIR_Table(xvid_FIR_20_6_3_1, 20, -6, 3, -1);
Init_FIR_Table(xvid_FIR_20_20_6_3, 20, 20, -6, 3);
Init_FIR_Table(xvid_FIR_23_19_6_3, 23, 19, -6, 3);
Init_FIR_Table(xvid_FIR_7_20_20_6, -7, 20, 20, -6);
Init_FIR_Table(xvid_FIR_6_20_20_6, -6, 20, 20, -6);
Init_FIR_Table(xvid_FIR_6_20_20_7, -6, 20, 20, -7);
Init_FIR_Table(xvid_FIR_3_6_20_20, 3, -6, 20, 20);
Init_FIR_Table(xvid_FIR_3_6_19_23, 3, -6, 19, 23);
Init_FIR_Table(xvid_FIR_1_3_6_20, -1, 3, -6, 20);
Init_FIR_Table(xvid_FIR_1_2_3_14, -1, 2, -3, 14);
Init_FIR_Table(xvid_FIR_0_1_3_6, 0, -1, 3, -6);
Init_FIR_Table(xvid_FIR_0_0_1_3, 0, 0, -1, 3);
Init_FIR_Table(xvid_FIR_0_0_0_1, 0, 0, 0, -1);
}
#endif /* !XVID_AUTO_INCLUDE */
#if defined(XVID_AUTO_INCLUDE) && defined(REFERENCE_CODE)
/*****************************************************************************
* "reference" filters impl. in plain C
****************************************************************************/
static
void FUNC_H(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t Rnd)
{
while(H-->0) {
int32_t i, k;
int32_t Sums[SIZE] = { 0 };
for(i=0; i<=SIZE; ++i)
for(k=0; k<SIZE; ++k)
Sums[k] += TABLE[i][k] * Src[i];
for(i=0; i<SIZE; ++i) {
int32_t C = ( Sums[i] + 16-Rnd ) >> 5;
if (C<0) C = 0; else if (C>255) C = 255;
STORE(Dst[i], C);
}
Src += BpS;
Dst += BpS;
}
}
static
void FUNC_V(uint8_t *Dst, const uint8_t *Src, int32_t W, int32_t BpS, int32_t Rnd)
{
while(W-->0) {
int32_t i, k;
int32_t Sums[SIZE] = { 0 };
const uint8_t *S = Src++;
uint8_t *D = Dst++;
for(i=0; i<=SIZE; ++i) {
for(k=0; k<SIZE; ++k)
Sums[k] += TABLE[i][k] * S[0];
S += BpS;
}
for(i=0; i<SIZE; ++i) {
int32_t C = ( Sums[i] + 16-Rnd )>>5;
if (C<0) C = 0; else if (C>255) C = 255;
STORE(D[0], C);
D += BpS;
}
}
}
static
void FUNC_HA(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t Rnd)
{
while(H-->0) {
int32_t i, k;
int32_t Sums[SIZE] = { 0 };
for(i=0; i<=SIZE; ++i)
for(k=0; k<SIZE; ++k)
Sums[k] += TABLE[i][k] * Src[i];
for(i=0; i<SIZE; ++i) {
int32_t C = ( Sums[i] + 16-Rnd ) >> 5;
if (C<0) C = 0; else if (C>255) C = 255;
C = (C+Src[i]+1-Rnd) >> 1;
STORE(Dst[i], C);
}
Src += BpS;
Dst += BpS;
}
}
static
void FUNC_HA_UP(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t Rnd)
{
while(H-->0) {
int32_t i, k;
int32_t Sums[SIZE] = { 0 };
for(i=0; i<=SIZE; ++i)
for(k=0; k<SIZE; ++k)
Sums[k] += TABLE[i][k] * Src[i];
for(i=0; i<SIZE; ++i) {
int32_t C = ( Sums[i] + 16-Rnd ) >> 5;
if (C<0) C = 0; else if (C>255) C = 255;
C = (C+Src[i+1]+1-Rnd) >> 1;
STORE(Dst[i], C);
}
Src += BpS;
Dst += BpS;
}
}
static
void FUNC_VA(uint8_t *Dst, const uint8_t *Src, int32_t W, int32_t BpS, int32_t Rnd)
{
while(W-->0) {
int32_t i, k;
int32_t Sums[SIZE] = { 0 };
const uint8_t *S = Src;
uint8_t *D = Dst;
for(i=0; i<=SIZE; ++i) {
for(k=0; k<SIZE; ++k)
Sums[k] += TABLE[i][k] * S[0];
S += BpS;
}
S = Src;
for(i=0; i<SIZE; ++i) {
int32_t C = ( Sums[i] + 16-Rnd )>>5;
if (C<0) C = 0; else if (C>255) C = 255;
C = ( C+S[0]+1-Rnd ) >> 1;
STORE(D[0], C);
D += BpS;
S += BpS;
}
Src++;
Dst++;
}
}
static
void FUNC_VA_UP(uint8_t *Dst, const uint8_t *Src, int32_t W, int32_t BpS, int32_t Rnd)
{
while(W-->0) {
int32_t i, k;
int32_t Sums[SIZE] = { 0 };
const uint8_t *S = Src;
uint8_t *D = Dst;
for(i=0; i<=SIZE; ++i) {
for(k=0; k<SIZE; ++k)
Sums[k] += TABLE[i][k] * S[0];
S += BpS;
}
S = Src + BpS;
for(i=0; i<SIZE; ++i) {
int32_t C = ( Sums[i] + 16-Rnd )>>5;
if (C<0) C = 0; else if (C>255) C = 255;
C = ( C+S[0]+1-Rnd ) >> 1;
STORE(D[0], C);
D += BpS;
S += BpS;
}
Dst++;
Src++;
}
}
#undef STORE
#undef FUNC_H
#undef FUNC_V
#undef FUNC_HA
#undef FUNC_VA
#undef FUNC_HA_UP
#undef FUNC_VA_UP
#elif defined(XVID_AUTO_INCLUDE) && !defined(REFERENCE_CODE)
/*****************************************************************************
* "fast" filters impl. in plain C
****************************************************************************/
#define CLIP_STORE(D,C) \
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \
STORE(D, C)
static void
FUNC_H(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t RND)
{
#if (SIZE==16)
while(H-->0) {
int C;
C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4];
CLIP_STORE(Dst[ 0],C);
C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
CLIP_STORE(Dst[ 1],C);
C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
CLIP_STORE(Dst[ 2],C);
C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
CLIP_STORE(Dst[ 3],C);
C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
CLIP_STORE(Dst[ 4],C);
C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
CLIP_STORE(Dst[ 5],C);
C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
CLIP_STORE(Dst[ 6],C);
C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
CLIP_STORE(Dst[ 7],C);
C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
CLIP_STORE(Dst[ 8],C);
C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
CLIP_STORE(Dst[ 9],C);
C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
CLIP_STORE(Dst[10],C);
C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
CLIP_STORE(Dst[11],C);
C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
CLIP_STORE(Dst[12],C);
C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
CLIP_STORE(Dst[13],C);
C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
CLIP_STORE(Dst[14],C);
C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
CLIP_STORE(Dst[15],C);
Src += BpS;
Dst += BpS;
}
#else
while(H-->0) {
int C;
C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] - Src[4];
CLIP_STORE(Dst[0],C);
C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
CLIP_STORE(Dst[1],C);
C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
CLIP_STORE(Dst[2],C);
C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
CLIP_STORE(Dst[3],C);
C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
CLIP_STORE(Dst[4],C);
C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
CLIP_STORE(Dst[5],C);
C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
CLIP_STORE(Dst[6],C);
C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
CLIP_STORE(Dst[7],C);
Src += BpS;
Dst += BpS;
}
#endif
}
#undef CLIP_STORE
#define CLIP_STORE(i,C) \
if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5; \
C = (C+Src[i]+1-RND) >> 1; \
STORE(Dst[i], C)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -