📄 motion_comp_mmx.c
字号:
} while (0)
#define pavg_m2r(src,dest) \
do { \
if (cpu == CPU_MMXEXT) \
pavgb_m2r (src, dest); \
/* else \
pavgusb_m2r (src, dest);*/ \
} while (0)
/* CPU_MMXEXT code */
static inline void MC_put1_8 (int height, uint8_t * dest, const uint8_t * ref,
const int stride)
{
do {
__m64 mm0;
movq_m2r (*ref, mm0);
movq_r2m (mm0, *dest);
ref += stride;
dest += stride;
} while (--height);
}
static inline void MC_put1_16 (int height, uint8_t * dest, const uint8_t * ref,
const int stride)
{
do {
__m64 mm0,mm1;
movq_m2r (*ref, mm0);
movq_m2r (*(ref+8), mm1);
ref += stride;
movq_r2m (mm0, *dest);
movq_r2m (mm1, *(dest+8));
dest += stride;
} while (--height);
}
static inline void MC_avg1_8 (int height, uint8_t * dest, const uint8_t * ref,
const int stride, const int cpu)
{
do {
__m64 mm0;
movq_m2r (*ref, mm0);
pavg_m2r (*dest, mm0);
ref += stride;
movq_r2m (mm0, *dest);
dest += stride;
} while (--height);
}
static inline void MC_avg1_16 (int height, uint8_t * dest, const uint8_t * ref,
const int stride, const int cpu)
{
do {
__m64 mm0,mm1;
movq_m2r (*ref, mm0);
movq_m2r (*(ref+8), mm1);
pavg_m2r (*dest, mm0);
pavg_m2r (*(dest+8), mm1);
movq_r2m (mm0, *dest);
ref += stride;
movq_r2m (mm1, *(dest+8));
dest += stride;
} while (--height);
}
static inline void MC_put2_8 (int height, uint8_t * dest, const uint8_t * ref,
const int stride, const int offset,
const int cpu)
{
do {
__m64 mm0;
movq_m2r (*ref, mm0);
pavg_m2r (*(ref+offset), mm0);
ref += stride;
movq_r2m (mm0, *dest);
dest += stride;
} while (--height);
}
static inline void MC_put2_16 (int height, uint8_t * dest, const uint8_t * ref,
const int stride, const int offset,
const int cpu)
{
do {
__m64 mm0,mm1;
movq_m2r (*ref, mm0);
movq_m2r (*(ref+8), mm1);
pavg_m2r (*(ref+offset), mm0);
pavg_m2r (*(ref+offset+8), mm1);
movq_r2m (mm0, *dest);
ref += stride;
movq_r2m (mm1, *(dest+8));
dest += stride;
} while (--height);
}
static inline void MC_avg2_8 (int height, uint8_t * dest, const uint8_t * ref,
const int stride, const int offset,
const int cpu)
{
do {
__m64 mm0;
movq_m2r (*ref, mm0);
pavg_m2r (*(ref+offset), mm0);
pavg_m2r (*dest, mm0);
ref += stride;
movq_r2m (mm0, *dest);
dest += stride;
} while (--height);
}
static inline void MC_avg2_16 (int height, uint8_t * dest, const uint8_t * ref,
const int stride, const int offset,
const int cpu)
{
do {
__m64 mm0,mm1;
movq_m2r (*ref, mm0);
movq_m2r (*(ref+8), mm1);
pavg_m2r (*(ref+offset), mm0);
pavg_m2r (*(ref+offset+8), mm1);
pavg_m2r (*dest, mm0);
pavg_m2r (*(dest+8), mm1);
ref += stride;
movq_r2m (mm0, *dest);
movq_r2m (mm1, *(dest+8));
dest += stride;
} while (--height);
}
static inline void MC_put4_8 (int height, uint8_t * dest, const uint8_t * ref,
const int stride, const int cpu)
{
__m64 mm0,mm1,mm2,mm3,mm5,mm6,mm7;
__m64 mask_one = _mm_set1_pi8(1);
movq_m2r (*ref, mm0);
movq_m2r (*(ref+1), mm1);
movq_r2r (mm0, mm7);
pxor_r2r (mm1, mm7);
pavg_r2r (mm1, mm0);
ref += stride;
do {
movq_m2r (*ref, mm2);
movq_r2r (mm0, mm5);
movq_m2r (*(ref+1), mm3);
movq_r2r (mm2, mm6);
pxor_r2r (mm3, mm6);
pavg_r2r (mm3, mm2);
por_r2r (mm6, mm7);
pxor_r2r (mm2, mm5);
pand_r2r (mm5, mm7);
pavg_r2r (mm2, mm0);
pand_m2r (mask_one, mm7);
psubusb_r2r (mm7, mm0);
ref += stride;
movq_r2m (mm0, *dest);
dest += stride;
movq_r2r (mm6, mm7); /* unroll ! */
movq_r2r (mm2, mm0); /* unroll ! */
} while (--height);
}
static inline void MC_put4_16 (int height, uint8_t * dest, const uint8_t * ref,
const int stride, const int cpu)
{
__m64 mask_one = _mm_set1_pi8(1);
do {
__m64 mm0,mm1,mm2,mm3,mm6,mm7;
movq_m2r (*ref, mm0);
movq_m2r (*(ref+stride+1), mm1);
movq_r2r (mm0, mm7);
movq_m2r (*(ref+1), mm2);
pxor_r2r (mm1, mm7);
movq_m2r (*(ref+stride), mm3);
movq_r2r (mm2, mm6);
pxor_r2r (mm3, mm6);
pavg_r2r (mm1, mm0);
pavg_r2r (mm3, mm2);
por_r2r (mm6, mm7);
movq_r2r (mm0, mm6);
pxor_r2r (mm2, mm6);
pand_r2r (mm6, mm7);
pand_m2r (mask_one, mm7);
pavg_r2r (mm2, mm0);
psubusb_r2r (mm7, mm0);
movq_r2m (mm0, *dest);
movq_m2r (*(ref+8), mm0);
movq_m2r (*(ref+stride+9), mm1);
movq_r2r (mm0, mm7);
movq_m2r (*(ref+9), mm2);
pxor_r2r (mm1, mm7);
movq_m2r (*(ref+stride+8), mm3);
movq_r2r (mm2, mm6);
pxor_r2r (mm3, mm6);
pavg_r2r (mm1, mm0);
pavg_r2r (mm3, mm2);
por_r2r (mm6, mm7);
movq_r2r (mm0, mm6);
pxor_r2r (mm2, mm6);
pand_r2r (mm6, mm7);
pand_m2r (mask_one, mm7);
pavg_r2r (mm2, mm0);
psubusb_r2r (mm7, mm0);
ref += stride;
movq_r2m (mm0, *(dest+8));
dest += stride;
} while (--height);
}
static inline void MC_avg4_8 (int height, uint8_t * dest, const uint8_t * ref,
const int stride, const int cpu)
{
__m64 mask_one = _mm_set1_pi8(1);
do {
__m64 mm0,mm1,mm2,mm3,mm6,mm7;
movq_m2r (*ref, mm0);
movq_m2r (*(ref+stride+1), mm1);
movq_r2r (mm0, mm7);
movq_m2r (*(ref+1), mm2);
pxor_r2r (mm1, mm7);
movq_m2r (*(ref+stride), mm3);
movq_r2r (mm2, mm6);
pxor_r2r (mm3, mm6);
pavg_r2r (mm1, mm0);
pavg_r2r (mm3, mm2);
por_r2r (mm6, mm7);
movq_r2r (mm0, mm6);
pxor_r2r (mm2, mm6);
pand_r2r (mm6, mm7);
pand_m2r (mask_one, mm7);
pavg_r2r (mm2, mm0);
psubusb_r2r (mm7, mm0);
movq_m2r (*dest, mm1);
pavg_r2r (mm1, mm0);
ref += stride;
movq_r2m (mm0, *dest);
dest += stride;
} while (--height);
}
static inline void MC_avg4_16 (int height, uint8_t * dest, const uint8_t * ref,
const int stride, const int cpu)
{
__m64 mask_one = _mm_set1_pi8(1);
do {
__m64 mm0,mm1,mm2,mm3,mm6,mm7;
movq_m2r (*ref, mm0);
movq_m2r (*(ref+stride+1), mm1);
movq_r2r (mm0, mm7);
movq_m2r (*(ref+1), mm2);
pxor_r2r (mm1, mm7);
movq_m2r (*(ref+stride), mm3);
movq_r2r (mm2, mm6);
pxor_r2r (mm3, mm6);
pavg_r2r (mm1, mm0);
pavg_r2r (mm3, mm2);
por_r2r (mm6, mm7);
movq_r2r (mm0, mm6);
pxor_r2r (mm2, mm6);
pand_r2r (mm6, mm7);
pand_m2r (mask_one, mm7);
pavg_r2r (mm2, mm0);
psubusb_r2r (mm7, mm0);
movq_m2r (*dest, mm1);
pavg_r2r (mm1, mm0);
movq_r2m (mm0, *dest);
movq_m2r (*(ref+8), mm0);
movq_m2r (*(ref+stride+9), mm1);
movq_r2r (mm0, mm7);
movq_m2r (*(ref+9), mm2);
pxor_r2r (mm1, mm7);
movq_m2r (*(ref+stride+8), mm3);
movq_r2r (mm2, mm6);
pxor_r2r (mm3, mm6);
pavg_r2r (mm1, mm0);
pavg_r2r (mm3, mm2);
por_r2r (mm6, mm7);
movq_r2r (mm0, mm6);
pxor_r2r (mm2, mm6);
pand_r2r (mm6, mm7);
pand_m2r (mask_one, mm7);
pavg_r2r (mm2, mm0);
psubusb_r2r (mm7, mm0);
movq_m2r (*(dest+8), mm1);
pavg_r2r (mm1, mm0);
ref += stride;
movq_r2m (mm0, *(dest+8));
dest += stride;
} while (--height);
}
static void MC_avg_o_16_mmxext (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_avg1_16 (height, dest, ref, stride, CPU_MMXEXT);
}
static void MC_avg_o_8_mmxext (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_avg1_8 (height, dest, ref, stride, CPU_MMXEXT);
}
static void MC_put_o_16_mmxext (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_put1_16 (height, dest, ref, stride);
}
static void MC_put_o_8_mmxext (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_put1_8 (height, dest, ref, stride);
}
static void MC_avg_x_16_mmxext (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_avg2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
}
static void MC_avg_x_8_mmxext (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_avg2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
}
static void MC_put_x_16_mmxext (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_put2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
}
static void MC_put_x_8_mmxext (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_put2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
}
static void MC_avg_y_16_mmxext (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_avg2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
}
static void MC_avg_y_8_mmxext (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_avg2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
}
static void MC_put_y_16_mmxext (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_put2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
}
static void MC_put_y_8_mmxext (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_put2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
}
static void MC_avg_xy_16_mmxext (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_avg4_16 (height, dest, ref, stride, CPU_MMXEXT);
}
static void MC_avg_xy_8_mmxext (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_avg4_8 (height, dest, ref, stride, CPU_MMXEXT);
}
static void MC_put_xy_16_mmxext (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_put4_16 (height, dest, ref, stride, CPU_MMXEXT);
}
static void MC_put_xy_8_mmxext (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_put4_8 (height, dest, ref, stride, CPU_MMXEXT);
}
MPEG2_MC_EXTERN (mmxext)
/*
static void MC_avg_o_16_3dnow (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_avg1_16 (height, dest, ref, stride, CPU_3DNOW);
}
static void MC_avg_o_8_3dnow (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_avg1_8 (height, dest, ref, stride, CPU_3DNOW);
}
static void MC_put_o_16_3dnow (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_put1_16 (height, dest, ref, stride);
}
static void MC_put_o_8_3dnow (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_put1_8 (height, dest, ref, stride);
}
static void MC_avg_x_16_3dnow (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_avg2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
}
static void MC_avg_x_8_3dnow (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_avg2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
}
static void MC_put_x_16_3dnow (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_put2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
}
static void MC_put_x_8_3dnow (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_put2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
}
static void MC_avg_y_16_3dnow (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_avg2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
}
static void MC_avg_y_8_3dnow (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_avg2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
}
static void MC_put_y_16_3dnow (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_put2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
}
static void MC_put_y_8_3dnow (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_put2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
}
static void MC_avg_xy_16_3dnow (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_avg4_16 (height, dest, ref, stride, CPU_3DNOW);
}
static void MC_avg_xy_8_3dnow (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_avg4_8 (height, dest, ref, stride, CPU_3DNOW);
}
static void MC_put_xy_16_3dnow (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_put4_16 (height, dest, ref, stride, CPU_3DNOW);
}
static void MC_put_xy_8_3dnow (uint8_t * dest, const uint8_t * ref,
int stride, int height)
{
MC_put4_8 (height, dest, ref, stride, CPU_3DNOW);
}
MPEG2_MC_EXTERN (3dnow)
*/
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -