📄 qpel.c
字号:
/*****************************************************************************
*
* XVID MPEG-4 VIDEO CODEC
* - QPel interpolation -
*
* Copyright(C) 2003 Pascal Massimino <skal@planet-d.net>
*
* This program is free software ; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* $Id: qpel.c,v 1.8 2005/11/22 10:23:01 suxen_drol Exp $
*
****************************************************************************/
#ifndef XVID_AUTO_INCLUDE
#include <stdio.h>
#include "../portab.h"
#include "qpel.h"
/* Quarterpel FIR definition
****************************************************************************/
static const int32_t FIR_Tab_8[9][8] = {
{ 14, -3, 2, -1, 0, 0, 0, 0 },
{ 23, 19, -6, 3, -1, 0, 0, 0 },
{ -7, 20, 20, -6, 3, -1, 0, 0 },
{ 3, -6, 20, 20, -6, 3, -1, 0 },
{ -1, 3, -6, 20, 20, -6, 3, -1 },
{ 0, -1, 3, -6, 20, 20, -6, 3 },
{ 0, 0, -1, 3, -6, 20, 20, -7 },
{ 0, 0, 0, -1, 3, -6, 19, 23 },
{ 0, 0, 0, 0, -1, 2, -3, 14 }
};
static const int32_t FIR_Tab_16[17][16] = {
{ 14, -3, 2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 23, 19, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ -7, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -7 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 19, 23 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 2, -3, 14 }
};
/* Implementation
****************************************************************************/
#define XVID_AUTO_INCLUDE
/* First auto include this file to generate reference code for SIMD versions
* This set of functions are good for educational purpose, because they're
* straightforward to understand, use loops and so on... But obviously they
* sux when it comes to speed */
#define REFERENCE_CODE
/* 16x? filters */
#define SIZE 16
#define TABLE FIR_Tab_16
#define STORE(d,s) (d) = (s)
#define FUNC_H H_Pass_16_C_ref
#define FUNC_V V_Pass_16_C_ref
#define FUNC_HA H_Pass_Avrg_16_C_ref
#define FUNC_VA V_Pass_Avrg_16_C_ref
#define FUNC_HA_UP H_Pass_Avrg_Up_16_C_ref
#define FUNC_VA_UP V_Pass_Avrg_Up_16_C_ref
#include "qpel.c" /* self-include ourself */
/* note: B-frame always uses Rnd=0... */
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1
#define FUNC_H H_Pass_16_Add_C_ref
#define FUNC_V V_Pass_16_Add_C_ref
#define FUNC_HA H_Pass_Avrg_16_Add_C_ref
#define FUNC_VA V_Pass_Avrg_16_Add_C_ref
#define FUNC_HA_UP H_Pass_Avrg_Up_16_Add_C_ref
#define FUNC_VA_UP V_Pass_Avrg_Up_16_Add_C_ref
#include "qpel.c" /* self-include ourself */
#undef SIZE
#undef TABLE
/* 8x? filters */
#define SIZE 8
#define TABLE FIR_Tab_8
#define STORE(d,s) (d) = (s)
#define FUNC_H H_Pass_8_C_ref
#define FUNC_V V_Pass_8_C_ref
#define FUNC_HA H_Pass_Avrg_8_C_ref
#define FUNC_VA V_Pass_Avrg_8_C_ref
#define FUNC_HA_UP H_Pass_Avrg_Up_8_C_ref
#define FUNC_VA_UP V_Pass_Avrg_Up_8_C_ref
#include "qpel.c" /* self-include ourself */
/* note: B-frame always uses Rnd=0... */
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1
#define FUNC_H H_Pass_8_Add_C_ref
#define FUNC_V V_Pass_8_Add_C_ref
#define FUNC_HA H_Pass_Avrg_8_Add_C_ref
#define FUNC_VA V_Pass_Avrg_8_Add_C_ref
#define FUNC_HA_UP H_Pass_Avrg_Up_8_Add_C_ref
#define FUNC_VA_UP V_Pass_Avrg_Up_8_Add_C_ref
#include "qpel.c" /* self-include ourself */
#undef SIZE
#undef TABLE
/* Then we define more optimized C version where loops are unrolled, where
* FIR coeffcients are not read from memory but are hardcoded in instructions
* They should be faster */
#undef REFERENCE_CODE
/* 16x? filters */
#define SIZE 16
#define STORE(d,s) (d) = (s)
#define FUNC_H H_Pass_16_C
#define FUNC_V V_Pass_16_C
#define FUNC_HA H_Pass_Avrg_16_C
#define FUNC_VA V_Pass_Avrg_16_C
#define FUNC_HA_UP H_Pass_Avrg_Up_16_C
#define FUNC_VA_UP V_Pass_Avrg_Up_16_C
#include "qpel.c" /* self-include ourself */
/* note: B-frame always uses Rnd=0... */
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1
#define FUNC_H H_Pass_16_Add_C
#define FUNC_V V_Pass_16_Add_C
#define FUNC_HA H_Pass_Avrg_16_Add_C
#define FUNC_VA V_Pass_Avrg_16_Add_C
#define FUNC_HA_UP H_Pass_Avrg_Up_16_Add_C
#define FUNC_VA_UP V_Pass_Avrg_Up_16_Add_C
#include "qpel.c" /* self-include ourself */
#undef SIZE
#undef TABLE
/* 8x? filters */
#define SIZE 8
#define TABLE FIR_Tab_8
#define STORE(d,s) (d) = (s)
#define FUNC_H H_Pass_8_C
#define FUNC_V V_Pass_8_C
#define FUNC_HA H_Pass_Avrg_8_C
#define FUNC_VA V_Pass_Avrg_8_C
#define FUNC_HA_UP H_Pass_Avrg_Up_8_C
#define FUNC_VA_UP V_Pass_Avrg_Up_8_C
#include "qpel.c" /* self-include ourself */
/* note: B-frame always uses Rnd=0... */
#define STORE(d,s) (d) = ( (s)+(d)+1 ) >> 1
#define FUNC_H H_Pass_8_Add_C
#define FUNC_V V_Pass_8_Add_C
#define FUNC_HA H_Pass_Avrg_8_Add_C
#define FUNC_VA V_Pass_Avrg_8_Add_C
#define FUNC_HA_UP H_Pass_Avrg_Up_8_Add_C
#define FUNC_VA_UP V_Pass_Avrg_Up_8_Add_C
#include "qpel.c" /* self-include ourself */
#undef SIZE
#undef TABLE
#undef XVID_AUTO_INCLUDE
/* Global scope hooks
****************************************************************************/
XVID_QP_FUNCS *xvid_QP_Funcs = NULL;
XVID_QP_FUNCS *xvid_QP_Add_Funcs = NULL;
/* Reference plain C impl. declaration
****************************************************************************/
XVID_QP_FUNCS xvid_QP_Funcs_C_ref = {
H_Pass_16_C_ref, H_Pass_Avrg_16_C_ref, H_Pass_Avrg_Up_16_C_ref,
V_Pass_16_C_ref, V_Pass_Avrg_16_C_ref, V_Pass_Avrg_Up_16_C_ref,
H_Pass_8_C_ref, H_Pass_Avrg_8_C_ref, H_Pass_Avrg_Up_8_C_ref,
V_Pass_8_C_ref, V_Pass_Avrg_8_C_ref, V_Pass_Avrg_Up_8_C_ref
};
XVID_QP_FUNCS xvid_QP_Add_Funcs_C_ref = {
H_Pass_16_Add_C_ref, H_Pass_Avrg_16_Add_C_ref, H_Pass_Avrg_Up_16_Add_C_ref,
V_Pass_16_Add_C_ref, V_Pass_Avrg_16_Add_C_ref, V_Pass_Avrg_Up_16_Add_C_ref,
H_Pass_8_Add_C_ref, H_Pass_Avrg_8_Add_C_ref, H_Pass_Avrg_Up_8_Add_C_ref,
V_Pass_8_Add_C_ref, V_Pass_Avrg_8_Add_C_ref, V_Pass_Avrg_Up_8_Add_C_ref
};
/* Plain C impl. declaration (faster than ref one)
****************************************************************************/
XVID_QP_FUNCS xvid_QP_Funcs_C = {
H_Pass_16_C, H_Pass_Avrg_16_C, H_Pass_Avrg_Up_16_C,
V_Pass_16_C, V_Pass_Avrg_16_C, V_Pass_Avrg_Up_16_C,
H_Pass_8_C, H_Pass_Avrg_8_C, H_Pass_Avrg_Up_8_C,
V_Pass_8_C, V_Pass_Avrg_8_C, V_Pass_Avrg_Up_8_C
};
XVID_QP_FUNCS xvid_QP_Add_Funcs_C = {
H_Pass_16_Add_C, H_Pass_Avrg_16_Add_C, H_Pass_Avrg_Up_16_Add_C,
V_Pass_16_Add_C, V_Pass_Avrg_16_Add_C, V_Pass_Avrg_Up_16_Add_C,
H_Pass_8_Add_C, H_Pass_Avrg_8_Add_C, H_Pass_Avrg_Up_8_Add_C,
V_Pass_8_Add_C, V_Pass_Avrg_8_Add_C, V_Pass_Avrg_Up_8_Add_C
};
/* mmx impl. declaration (see. qpel_mmx.asm
****************************************************************************/
#ifdef ARCH_IS_IA32
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_16_mmx);
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_16_mmx);
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_16_mmx);
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_16_mmx);
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_16_mmx);
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_16_mmx);
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_8_mmx);
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_8_mmx);
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_8_mmx);
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_8_mmx);
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_mmx);
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_mmx);
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Add_16_mmx);
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Add_16_mmx);
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_Add_16_mmx);
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Add_16_mmx);
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Add_16_mmx);
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_Add_16_mmx);
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_8_Add_mmx);
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_8_Add_mmx);
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_8_Add_mmx);
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_8_Add_mmx);
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_Add_mmx);
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_Add_mmx);
XVID_QP_FUNCS xvid_QP_Funcs_mmx = {
xvid_H_Pass_16_mmx, xvid_H_Pass_Avrg_16_mmx, xvid_H_Pass_Avrg_Up_16_mmx,
xvid_V_Pass_16_mmx, xvid_V_Pass_Avrg_16_mmx, xvid_V_Pass_Avrg_Up_16_mmx,
xvid_H_Pass_8_mmx, xvid_H_Pass_Avrg_8_mmx, xvid_H_Pass_Avrg_Up_8_mmx,
xvid_V_Pass_8_mmx, xvid_V_Pass_Avrg_8_mmx, xvid_V_Pass_Avrg_Up_8_mmx
};
XVID_QP_FUNCS xvid_QP_Add_Funcs_mmx = {
xvid_H_Pass_Add_16_mmx, xvid_H_Pass_Avrg_Add_16_mmx, xvid_H_Pass_Avrg_Up_Add_16_mmx,
xvid_V_Pass_Add_16_mmx, xvid_V_Pass_Avrg_Add_16_mmx, xvid_V_Pass_Avrg_Up_Add_16_mmx,
xvid_H_Pass_8_Add_mmx, xvid_H_Pass_Avrg_8_Add_mmx, xvid_H_Pass_Avrg_Up_8_Add_mmx,
xvid_V_Pass_8_Add_mmx, xvid_V_Pass_Avrg_8_Add_mmx, xvid_V_Pass_Avrg_Up_8_Add_mmx,
};
#endif /* ARCH_IS_IA32 */
/* altivec impl. declaration (see qpel_altivec.c)
****************************************************************************/
#ifdef ARCH_IS_PPC
extern XVID_QP_PASS_SIGNATURE(H_Pass_16_Altivec_C);
extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_16_Altivec_C);
extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_Up_16_Altivec_C);
extern XVID_QP_PASS_SIGNATURE(V_Pass_16_Altivec_C);
extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_16_Altivec_C);
extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_Up_16_Altivec_C);
extern XVID_QP_PASS_SIGNATURE(H_Pass_8_Altivec_C);
extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_8_Altivec_C);
extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_Up_8_Altivec_C);
extern XVID_QP_PASS_SIGNATURE(V_Pass_8_Altivec_C);
extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_8_Altivec_C);
extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_Up_8_Altivec_C);
extern XVID_QP_PASS_SIGNATURE(H_Pass_16_Add_Altivec_C);
extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_16_Add_Altivec_C);
extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_Up_16_Add_Altivec_C);
extern XVID_QP_PASS_SIGNATURE(V_Pass_16_Add_Altivec_C);
extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_16_Add_Altivec_C);
extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_Up_16_Add_Altivec_C);
extern XVID_QP_PASS_SIGNATURE(H_Pass_8_Add_Altivec_C);
extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_8_Add_Altivec_C);
extern XVID_QP_PASS_SIGNATURE(H_Pass_Avrg_Up_8_Add_Altivec_C);
extern XVID_QP_PASS_SIGNATURE(V_Pass_8_Add_Altivec_C);
extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_8_Add_Altivec_C);
extern XVID_QP_PASS_SIGNATURE(V_Pass_Avrg_Up_8_Add_Altivec_C);
XVID_QP_FUNCS xvid_QP_Funcs_Altivec_C = {
H_Pass_16_Altivec_C, H_Pass_Avrg_16_Altivec_C, H_Pass_Avrg_Up_16_Altivec_C,
V_Pass_16_Altivec_C, V_Pass_Avrg_16_Altivec_C, V_Pass_Avrg_Up_16_Altivec_C,
H_Pass_8_Altivec_C, H_Pass_Avrg_8_Altivec_C, H_Pass_Avrg_Up_8_Altivec_C,
V_Pass_8_Altivec_C, V_Pass_Avrg_8_Altivec_C, V_Pass_Avrg_Up_8_Altivec_C
};
XVID_QP_FUNCS xvid_QP_Add_Funcs_Altivec_C = {
H_Pass_16_Add_Altivec_C, H_Pass_Avrg_16_Add_Altivec_C, H_Pass_Avrg_Up_16_Add_Altivec_C,
V_Pass_16_Add_Altivec_C, V_Pass_Avrg_16_Add_Altivec_C, V_Pass_Avrg_Up_16_Add_Altivec_C,
H_Pass_8_Add_Altivec_C, H_Pass_Avrg_8_Add_Altivec_C, H_Pass_Avrg_Up_8_Add_Altivec_C,
V_Pass_8_Add_Altivec_C, V_Pass_Avrg_8_Add_Altivec_C, V_Pass_Avrg_Up_8_Add_Altivec_C
};
#endif /* ARCH_IS_PPC */
/* mmx impl. (for 64bit bus) declaration (see. qpel_mmx.asm
****************************************************************************/
#ifdef ARCH_IS_X86_64
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_16_x86_64);
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_16_x86_64);
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_16_x86_64);
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_16_x86_64);
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_16_x86_64);
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_16_x86_64);
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_8_x86_64);
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_8_x86_64);
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_8_x86_64);
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_8_x86_64);
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_8_x86_64);
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_8_x86_64);
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Add_16_x86_64);
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Add_16_x86_64);
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_Up_Add_16_x86_64);
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Add_16_x86_64);
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Add_16_x86_64);
extern XVID_QP_PASS_SIGNATURE(xvid_V_Pass_Avrg_Up_Add_16_x86_64);
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_8_Add_x86_64);
extern XVID_QP_PASS_SIGNATURE(xvid_H_Pass_Avrg_8_Add_x86_64);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -