📄 mmx_mlib.c
字号:
mlib_VideoInterpAveX_U8_U8_8x8( uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) { mlib_VideoInterpAveX_U8_U8_MxN( 8, 8, curr_block, ref_block, frame_stride, field_stride);}void mlib_VideoInterpAveX_U8_U8_8x4( uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) { mlib_VideoInterpAveX_U8_U8_MxN( 8, 4, curr_block, ref_block, frame_stride, field_stride);}static inline voidmlib_VideoInterpX_U8_U8_MxN( const uint8_t m, const uint8_t n, uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) {#define MMX_mmx_VideoInterpX_U8_U8_MxN#if !defined(HAVE_MMX) || !defined(MMX_mmx_VideoInterpX_U8_U8_MxN) int x,y; const int jump = frame_stride - m; for (y = 0; y < n; y++) { for (x = 0; x < m; x++) *curr_block++ = (*ref_block++ + *(ref_block + 1) + 1)/2; ref_block += jump; curr_block += jump; }#else int x,y; const int step = 8; const int jump = frame_stride - m; pxor_r2r(mm0,mm0); // load 0 into mm0 for (y = 0; y < n; y++) { for (x = 0; x < m/8; x++) { mmx_average_2_U8(curr_block, ref_block, ref_block + 1); curr_block += step; ref_block += step; } curr_block += jump; ref_block += jump; }#endif}voidmlib_VideoInterpX_U8_U8_16x16( uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) { mlib_VideoInterpX_U8_U8_MxN( 16, 16, curr_block, ref_block, frame_stride, field_stride);}voidmlib_VideoInterpX_U8_U8_16x8( uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) { mlib_VideoInterpX_U8_U8_MxN( 16, 8, curr_block, ref_block, frame_stride, field_stride);}voidmlib_VideoInterpX_U8_U8_8x8( uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) { mlib_VideoInterpX_U8_U8_MxN( 8, 8, curr_block, ref_block, frame_stride, field_stride);}voidmlib_VideoInterpX_U8_U8_8x4( uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) { mlib_VideoInterpX_U8_U8_MxN( 8, 4, curr_block, ref_block, frame_stride, field_stride);}// VideoInterp*XY - half pixel interpolation in both x and y directions// --------------------------------------------------------------------static inline void mlib_VideoInterpAveXY_U8_U8_MxN( const uint8_t m, const uint8_t n, uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) {#define MMX_mmx_VideoInterpAveXY_U8_U8_MxN#if !defined(HAVE_MMX) || !defined(MMX_mmx_VideoInterpAveXY_U8_U8_MxN) int x,y; const int jump = frame_stride - m; const uint8_t *ref_block_next = ref_block + field_stride; for (y = 0; y < n; y++) { for (x = 0; x < m; x++) *curr_block++ = (*curr_block + (*ref_block++ + *(ref_block + 1) + *ref_block_next++ + *(ref_block_next + 1) + 2)/4 + 1)/2; curr_block += jump; ref_block += jump; ref_block_next += jump; }#else int x,y; const int step = 8; const int jump = frame_stride - m; const uint8_t *ref_block_next = ref_block + field_stride; pxor_r2r(mm0,mm0); // load 0 into mm0 for (y = 0; y < n; y++) { for (x = 0; x < m/8; x++) { mmx_interp_average_4_U8(curr_block, ref_block, ref_block + 1, ref_block_next, ref_block_next + 1); curr_block += step; ref_block += step; ref_block_next += step; } curr_block += jump; ref_block += jump; ref_block_next += jump; }#endif}void mlib_VideoInterpAveXY_U8_U8_16x16( uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) { mlib_VideoInterpAveXY_U8_U8_MxN( 16, 16, curr_block, ref_block, frame_stride, field_stride);}void mlib_VideoInterpAveXY_U8_U8_16x8( uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) { mlib_VideoInterpAveXY_U8_U8_MxN( 16, 8, curr_block, ref_block, frame_stride, field_stride);}void mlib_VideoInterpAveXY_U8_U8_8x8( uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) { mlib_VideoInterpAveXY_U8_U8_MxN( 8, 8, curr_block, ref_block, frame_stride, field_stride);}void mlib_VideoInterpAveXY_U8_U8_8x4( uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) { mlib_VideoInterpAveXY_U8_U8_MxN( 8, 4, curr_block, ref_block, frame_stride, field_stride);}static inline void mlib_VideoInterpXY_U8_U8_MxN( const uint8_t m, const uint8_t n, uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) {#define MMX_mmx_VideoInterpXY_U8_U8_MxN#if !defined(HAVE_MMX) || !defined(MMX_mmx_VideoInterpXY_U8_U8_MxN) int x,y; const int jump = frame_stride - m; const uint8_t *ref_block_next = ref_block + field_stride; for (y = 0; y < n; y++) { for (x = 0; x < m; x++) *curr_block++ = (*ref_block++ + *(ref_block + 1) + *ref_block_next++ + *(ref_block_next + 1) + 2)/4; curr_block += jump; ref_block += jump; ref_block_next += jump; }#else int x,y; const int step = 8; const int jump = frame_stride - m; const uint8_t *ref_block_next = ref_block + field_stride; pxor_r2r(mm0,mm0); // load 0 into mm0 for (y = 0; y < n; y++) { for (x = 0; x < m/8; x++) { mmx_average_4_U8(curr_block, ref_block, ref_block + 1, ref_block_next, ref_block_next + 1); curr_block += step; ref_block += step; ref_block_next += step; } curr_block += jump; ref_block += jump; ref_block_next += jump; }#endif}void mlib_VideoInterpXY_U8_U8_16x16( uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) { mlib_VideoInterpXY_U8_U8_MxN( 16, 16, curr_block, ref_block, frame_stride, field_stride);}void mlib_VideoInterpXY_U8_U8_16x8( uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) { mlib_VideoInterpXY_U8_U8_MxN( 16, 8, curr_block, ref_block, frame_stride, field_stride);}void mlib_VideoInterpXY_U8_U8_8x8( uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) { mlib_VideoInterpXY_U8_U8_MxN( 8, 8, curr_block, ref_block, frame_stride, field_stride);}void mlib_VideoInterpXY_U8_U8_8x4( uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) { mlib_VideoInterpXY_U8_U8_MxN( 8, 4, curr_block, ref_block, frame_stride, field_stride);}// VideoInterp*Y - half pixel interpolation in the y direction// -----------------------------------------------------------static inline void mlib_VideoInterpAveY_U8_U8_MxN( const uint8_t m, const uint8_t n, uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) {#define MMX_mmx_VideoInterpAveY_U8_U8_MxN#if !defined(HAVE_MMX) || !defined(MMX_mmx_VideoInterpAveY_U8_U8_MxN) int x,y; const int jump = frame_stride - m; const uint8_t *ref_block_next = ref_block + field_stride; for (y = 0; y < n; y++) { for (x = 0; x < m; x++) *curr_block++ = (*curr_block + (*ref_block++ + *ref_block_next++ + 1)/2 + 1)/2; curr_block += jump; ref_block += jump; ref_block_next += jump; }#else int x,y; const int step = 8; const int jump = frame_stride - m; const uint8_t *ref_block_next = ref_block + field_stride; pxor_r2r(mm0,mm0); // load 0 into mm0 for (y = 0; y < n; y++) { for (x = 0; x < m/8; x++) { mmx_interp_average_2_U8(curr_block, ref_block, ref_block_next); curr_block += step; ref_block += step; ref_block_next += step; } curr_block += jump; ref_block += jump; ref_block_next += jump; }#endif}void mlib_VideoInterpAveY_U8_U8_16x16( uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) { mlib_VideoInterpAveY_U8_U8_MxN( 16, 16, curr_block, ref_block, frame_stride, field_stride);}void mlib_VideoInterpAveY_U8_U8_16x8( uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) { mlib_VideoInterpAveY_U8_U8_MxN( 16, 8, curr_block, ref_block, frame_stride, field_stride);}void mlib_VideoInterpAveY_U8_U8_8x8( uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) { mlib_VideoInterpAveY_U8_U8_MxN( 8, 8, curr_block, ref_block, frame_stride, field_stride);}void mlib_VideoInterpAveY_U8_U8_8x4( uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) { mlib_VideoInterpAveY_U8_U8_MxN( 8, 4, curr_block, ref_block, frame_stride, field_stride);}static inline voidmlib_VideoInterpY_U8_U8_MxN( const uint8_t m, const uint8_t n, uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) {#define MMX_mmx_VideoInterpY_U8_U8_MxN#if !defined(HAVE_MMX) || !defined(MMX_mmx_VideoInterpY_U8_U8_MxN) int x,y; const int jump = frame_stride - m; const uint8_t *ref_block_next = ref_block + field_stride; for (y = 0; y < n; y++) { for (x = 0; x < m; x++) *curr_block++ = (*ref_block++ + *ref_block_next++ + 1)/2; curr_block += jump; ref_block += jump; ref_block_next += jump; }#else int x,y; const int step = 8; const int jump = frame_stride - m; const uint8_t *ref_block_next = ref_block + field_stride; pxor_r2r(mm0,mm0); // load 0 into mm0 for (y = 0; y < n; y++) { for (x = 0; x < m/8; x++) { mmx_average_2_U8(curr_block, ref_block, ref_block_next); curr_block += step; ref_block += step; ref_block_next += step; } curr_block += jump; ref_block += jump; ref_block_next += jump; }#endif}voidmlib_VideoInterpY_U8_U8_16x16( uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) { mlib_VideoInterpY_U8_U8_MxN( 16, 16, curr_block, ref_block, frame_stride, field_stride);}voidmlib_VideoInterpY_U8_U8_16x8( uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) { mlib_VideoInterpY_U8_U8_MxN( 16, 8, curr_block, ref_block, frame_stride, field_stride);}voidmlib_VideoInterpY_U8_U8_8x8( uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) { mlib_VideoInterpY_U8_U8_MxN( 8, 8, curr_block, ref_block, frame_stride, field_stride);}voidmlib_VideoInterpY_U8_U8_8x4( uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride) { mlib_VideoInterpY_U8_U8_MxN( 8, 4, curr_block, ref_block, frame_stride, field_stride);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -