📄 altivec_mlib.c
字号:
const uint8_t *ref_block, int32_t stride){ ASSERT(((int)curr_block & 15) == 0); if (((int)ref_block & 15) != 0) { int i0 = 0, i1 = 16; asm("" "lvsl 2,%0,%1\n" "" : : "b" (ref_block), "b" (i0)); asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); } else { int i0 = 0; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); }}void mlib_VideoCopyRef_U8_U8_8x8_multiple(uint8_t *curr_block, uint8_t *ref_block, int32_t stride, int32_t count){ ASSERT(((int)curr_block & 7) == 0); while (count--) { int i0 = 0, i1 = 4; asm("" "lvx 0,%1,%2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); curr_block += 8, ref_block += 8; }}void mlib_VideoCopyRef_U8_U8_8x8(uint8_t *curr_block, const uint8_t *ref_block, int32_t stride){ ASSERT(((int)curr_block & 7) == 0); if ((((int)ref_block ^ (int)curr_block) & 15) != 0) { const int i0 = 0, i1 = 16, i2 = 4; asm("" "lvsl 2,%1,%2\n" "lvsl 3,%1,%3\n" "lvsr 4,%0,%2\n" "lvsr 5,%0,%3\n" "vperm 2,2,2,4\n" "vperm 3,3,3,5\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i0 + stride)); asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += stride, ref_block += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,3\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += stride, ref_block += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += stride, ref_block += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,3\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += stride, ref_block += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += stride, ref_block += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,3\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += stride, ref_block += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += stride, ref_block += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,3\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); } else { int i0 = 0, i1 = 4; asm("" "lvx 0,%1,%2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); }}void mlib_VideoCopyRef_U8_U8_8x4(uint8_t *curr_block, const uint8_t *ref_block, int32_t stride){ ASSERT(((int)curr_block & 7) == 0); if ((((int)ref_block ^ (int)curr_block) & 15) != 0) { const int i0 = 0, i1 = 16, i2 = 4; asm("" "lvsl 2,%1,%2\n" "lvsl 3,%1,%3\n" "lvsr 4,%0,%2\n" "lvsr 5,%0,%3\n" "vperm 2,2,2,4\n" "vperm 3,3,3,5\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i0 + stride)); asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += stride, ref_block += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,3\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += stride, ref_block += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += stride, ref_block += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,3\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); } else { int i0 = 0, i1 = 4; asm("" "lvx 0,%1,%2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); }}voidmlib_VideoInterpAveX_U8_U8_16x16(uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride){ int i; int i0 = 0, i1 = 16; ASSERT(((int)curr_block & 15) == 0); asm("" "vspltisb 0,1\n" "lvsl 2,%0,%1\n" "vaddubs 3,2,0\n" "" : : "b" (ref_block), "b" (i0)); for (i = 0; i < 16; i++) { asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 4,0,1,2\n" "vperm 5,0,1,3\n" "lvx 6,%0,%2\n" "vavgub 4,4,5\n" "vavgub 4,4,6\n" "stvx 4,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += frame_stride, i1 += frame_stride; }}void mlib_VideoInterpAveX_U8_U8_16x8(uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride){ int i; int i0 = 0, i1 = 16; ASSERT(((int)curr_block & 15) == 0); asm("" "vspltisb 0,1\n" "lvsl 2,%0,%1\n" "vaddubs 3,2,0\n" "" : : "b" (ref_block), "b" (i0), "b" (i1)); for (i = 0; i < 8; i++) { asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 4,0,1,2\n" "vperm 5,0,1,3\n" "lvx 6,%0,%2\n" "vavgub 4,4,5\n" "vavgub 4,4,6\n" "stvx 4,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += frame_stride, i1 += frame_stride; }}void mlib_VideoInterpAveX_U8_U8_8x8(uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride){ int i; const int i0 = 0, i1 = 16, i2 = 4; ASSERT(((int)curr_block & 7) == 0); asm("" "vspltisb 0,1\n" "lvsl 2,%1,%2\n" "lvsr 3,%0,%2\n" "lvsl 4,%1,%3\n" "lvsr 5,%0,%3\n" "vperm 2,2,2,3\n" "vperm 4,4,4,5\n" "vaddubs 3,2,0\n" "vaddubs 5,4,0\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i0 + frame_stride)); for (i = 0; i < 4; i++) { asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 6,0,1,2\n" "vperm 7,0,1,3\n" "lvx 8,%0,%2\n" "vavgub 6,6,7\n" "vavgub 6,6,8\n" "stvewx 6,%0,%2\n" "stvewx 6,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += frame_stride, ref_block += frame_stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 6,0,1,4\n" "vperm 7,0,1,5\n" "lvx 8,%0,%2\n" "vavgub 6,6,7\n" "vavgub 6,6,8\n" "stvewx 6,%0,%2\n" "stvewx 6,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += frame_stride, ref_block += frame_stride; }}void mlib_VideoInterpAveX_U8_U8_8x4(uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride){ int i; const int i0 = 0, i1 = 16, i2 = 4; ASSERT(((int)curr_block & 7) == 0); asm("" "vspltisb 0,1\n" "lvsl 2,%1,%2\n" "lvsr 3,%0,%2\n" "lvsl 4,%1,%3\n" "lvsr 5,%0,%3\n" "vperm 2,2,2,3\n" "vperm 4,4,4,5\n" "vaddubs 3,2,0\n" "vaddubs 5,4,0\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i0 + frame_stride)); for (i = 0; i < 2; i++) { asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 6,0,1,2\n" "vperm 7,0,1,3\n" "lvx 8,%0,%2\n" "vavgub 6,6,7\n" "vavgub 6,6,8\n" "stvewx 6,%0,%2\n"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -