📄 altivec_mlib.c
字号:
"stvewx 6,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += frame_stride, ref_block += frame_stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 6,0,1,4\n" "vperm 7,0,1,5\n" "lvx 8,%0,%2\n" "vavgub 6,6,7\n" "vavgub 6,6,8\n" "stvewx 6,%0,%2\n" "stvewx 6,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += frame_stride, ref_block += frame_stride; }}voidmlib_VideoInterpAveY_U8_U8_16x16(uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride){ int i; ASSERT(((int)curr_block & 15) == 0); if (((int)ref_block & 15) != 0) { int i0 = 0, i1 = 16; asm("" "lvsl 4,%0,%1\n" "" : : "b" (ref_block), "b" (i0)); for (i = 0; i < 16; i++) { asm("" "lvx 0,%1,%3\n" "lvx 1,%2,%3\n" "lvx 2,%1,%4\n" "lvx 3,%2,%4\n" "vperm 0,0,2,4\n" "vperm 1,1,3,4\n" "lvx 2,%0,%3\n" "vavgub 0,0,1\n" "vavgub 0,0,2\n" "stvx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (ref_block + field_stride), "b" (i0), "b" (i1)); i0 += frame_stride, i1 += frame_stride; } } else { int i0 = 0; for (i = 0; i < 16; i++) { asm("" "lvx 0,%1,%3\n" "lvx 1,%2,%3\n" "lvx 2,%0,%3\n" "vavgub 0,0,1\n" "vavgub 0,0,2\n" "stvx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (ref_block + field_stride), "b" (i0)); i0 += frame_stride; } }}void mlib_VideoInterpAveY_U8_U8_16x8(uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride){ int i; ASSERT(((int)curr_block & 15) == 0); if (((int)ref_block & 15) != 0) { int i0 = 0, i1 = 16; asm("" "lvsl 4,%0,%1\n" "" : : "b" (ref_block), "b" (i0)); for (i = 0; i < 8; i++) { asm("" "lvx 0,%1,%3\n" "lvx 1,%2,%3\n" "lvx 2,%1,%4\n" "lvx 3,%2,%4\n" "vperm 0,0,2,4\n" "vperm 1,1,3,4\n" "lvx 2,%0,%3\n" "vavgub 0,0,1\n" "vavgub 0,0,2\n" "stvx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (ref_block + field_stride), "b" (i0), "b" (i1)); i0 += frame_stride, i1 += frame_stride; } } else { int i0 = 0; for (i = 0; i < 8; i++) { asm("" "lvx 0,%1,%3\n" "lvx 1,%2,%3\n" "lvx 2,%0,%3\n" "vavgub 0,0,1\n" "vavgub 0,0,2\n" "stvx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (ref_block + field_stride), "b" (i0)); i0 += frame_stride; } }}void mlib_VideoInterpAveY_U8_U8_8x8(uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride){ int i; ASSERT(((int)curr_block & 7) == 0); if (((((int)ref_block ^ (int)curr_block) | field_stride) & 15) != 0) { const int i0 = 0, i1 = 16, i2 = 4; asm("" "lvsl 4,%1,%3\n" "lvsl 5,%1,%4\n" "lvsl 6,%2,%3\n" "lvsl 7,%2,%4\n" "lvsr 8,%0,%3\n" "lvsr 9,%0,%4\n" "vperm 4,4,4,8\n" "vperm 5,5,5,9\n" "vperm 6,6,6,8\n" "vperm 7,7,7,9\n" "" : : "b" (curr_block), "b" (ref_block), "b" (ref_block + field_stride), "b" (i0), "b" (i0 + frame_stride)); for (i = 0; i < 4; i++) { asm("" "lvx 0,%1,%3\n" "lvx 1,%1,%4\n" "lvx 2,%2,%3\n" "lvx 3,%2,%4\n" "vperm 8,0,1,4\n" "vperm 9,2,3,6\n" "lvx 10,%0,%3\n" "vavgub 8,8,9\n" "vavgub 8,8,10\n" "stvewx 8,%0,%3\n" "stvewx 8,%0,%5\n" "" : : "b" (curr_block), "b" (ref_block), "b" (ref_block + field_stride), "b" (i0), "b" (i1), "b" (i2)); curr_block += frame_stride, ref_block += frame_stride; asm("" "lvx 0,%1,%3\n" "lvx 1,%1,%4\n" "lvx 2,%2,%3\n" "lvx 3,%2,%4\n" "vperm 8,0,1,5\n" "vperm 9,2,3,7\n" "lvx 10,%0,%3\n" "vavgub 8,8,9\n" "vavgub 8,8,10\n" "stvewx 8,%0,%3\n" "stvewx 8,%0,%5\n" "" : : "b" (curr_block), "b" (ref_block), "b" (ref_block + field_stride), "b" (i0), "b" (i1), "b" (i2)); curr_block += frame_stride, ref_block += frame_stride; } } else { int i0 = 0, i1 = 4; for (i = 0; i < 8; i++) { asm("" "lvx 0,%1,%3\n" "lvx 1,%2,%3\n" "lvx 2,%0,%3\n" "vavgub 0,0,1\n" "vavgub 0,0,2\n" "stvewx 0,%0,%3\n" "stvewx 0,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (ref_block + field_stride), "b" (i0), "b" (i1)); i0 += frame_stride, i1 += frame_stride; } }}void mlib_VideoInterpAveY_U8_U8_8x4(uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride){ int i; ASSERT(((int)curr_block & 7) == 0); if (((((int)ref_block ^ (int)curr_block) | field_stride) & 15) != 0) { const int i0 = 0, i1 = 16, i2 = 4; asm("" "lvsl 4,%1,%3\n" "lvsl 5,%1,%4\n" "lvsl 6,%2,%3\n" "lvsl 7,%2,%4\n" "lvsr 8,%0,%3\n" "lvsr 9,%0,%4\n" "vperm 4,4,4,8\n" "vperm 5,5,5,9\n" "vperm 6,6,6,8\n" "vperm 7,7,7,9\n" "" : : "b" (curr_block), "b" (ref_block), "b" (ref_block + field_stride), "b" (i0), "b" (i0 + frame_stride)); for (i = 0; i < 2; i++) { asm("" "lvx 0,%1,%3\n" "lvx 1,%1,%4\n" "lvx 2,%2,%3\n" "lvx 3,%2,%4\n" "vperm 8,0,1,4\n" "vperm 9,2,3,6\n" "lvx 10,%0,%3\n" "vavgub 8,8,9\n" "vavgub 8,8,10\n" "stvewx 8,%0,%3\n" "stvewx 8,%0,%5\n" "" : : "b" (curr_block), "b" (ref_block), "b" (ref_block + field_stride), "b" (i0), "b" (i1), "b" (i2)); curr_block += frame_stride, ref_block += frame_stride; asm("" "lvx 0,%1,%3\n" "lvx 1,%1,%4\n" "lvx 2,%2,%3\n" "lvx 3,%2,%4\n" "vperm 8,0,1,5\n" "vperm 9,2,3,7\n" "lvx 10,%0,%3\n" "vavgub 8,8,9\n" "vavgub 8,8,10\n" "stvewx 8,%0,%3\n" "stvewx 8,%0,%5\n" "" : : "b" (curr_block), "b" (ref_block), "b" (ref_block + field_stride), "b" (i0), "b" (i1), "b" (i2)); curr_block += frame_stride, ref_block += frame_stride; } } else { int i0 = 0, i1 = 4; for (i = 0; i < 4; i++) { asm("" "lvx 0,%1,%3\n" "lvx 1,%2,%3\n" "lvx 2,%0,%3\n" "vavgub 0,0,1\n" "vavgub 0,0,2\n" "stvewx 0,%0,%3\n" "stvewx 0,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (ref_block + field_stride), "b" (i0), "b" (i1)); i0 += frame_stride, i1 += frame_stride; } }}voidmlib_VideoInterpAveXY_U8_U8_16x16(uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride){ int i; int i0 = 0, i1 = 16; ASSERT(((int)curr_block & 15) == 0); asm("" "vspltisb 0,1\n" "lvsl 4,%0,%1\n" "vaddubs 5,4,0\n" "" : : "b" (ref_block), "b" (i0)); for (i = 0; i < 16; i++) { asm("" "lvx 0,%1,%3\n" "lvx 1,%2,%3\n" "lvx 2,%1,%4\n" "lvx 3,%2,%4\n" "vperm 6,0,2,4\n" "vperm 7,0,2,5\n" "vperm 8,1,3,4\n" "vperm 9,1,3,5\n" "vavgub 6,6,7\n" "vavgub 8,8,9\n" "lvx 10,%0,%3\n" "vavgub 6,6,8\n" "vavgub 6,6,10\n" "stvx 6,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (ref_block + field_stride), "b" (i0), "b" (i1)); i0 += frame_stride, i1 += frame_stride; }}void mlib_VideoInterpAveXY_U8_U8_16x8(uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride){ int i; int i0 = 0, i1 = 16; ASSERT(((int)curr_block & 15) == 0); asm("" "vspltisb 0,1\n" "lvsl 4,%0,%1\n" "vaddubs 5,4,0\n" "" : : "b" (ref_block), "b" (i0)); for (i = 0; i < 8; i++) { asm("" "lvx 0,%1,%3\n" "lvx 1,%2,%3\n" "lvx 2,%1,%4\n" "lvx 3,%2,%4\n" "vperm 6,0,2,4\n" "vperm 7,0,2,5\n" "vperm 8,1,3,4\n" "vperm 9,1,3,5\n" "vavgub 6,6,7\n" "vavgub 8,8,9\n" "lvx 10,%0,%3\n" "vavgub 6,6,8\n" "vavgub 6,6,10\n" "stvx 6,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (ref_block + field_stride), "b" (i0), "b" (i1)); i0 += frame_stride, i1 += frame_stride; }}void mlib_VideoInterpAveXY_U8_U8_8x8(uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride){ mlib_VideoInterpAveXY_U8_U8 (curr_block, ref_block, 8, 8, frame_stride, field_stride);}void mlib_VideoInterpAveXY_U8_U8_8x4(uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride){ mlib_VideoInterpAveXY_U8_U8 (curr_block, ref_block, 8, 4, frame_stride, field_stride);}voidmlib_VideoInterpX_U8_U8_16x16(uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride){ int i; int i0 = 0, i1 = 16; ASSERT(((int)curr_block & 15) == 0); asm("" "vspltisb 0,1\n" "lvsl 2,%0,%1\n" "vaddubs 3,2,0\n" "" : : "b" (ref_block), "b" (i0), "b" (i1)); for (i = 0; i < 16; i++) { asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 4,0,1,2\n" "vperm 5,0,1,3\n" "vavgub 4,4,5\n" "stvx 4,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += frame_stride, i1 += frame_stride; }}void mlib_VideoInterpX_U8_U8_16x8(uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride){ int i; int i0 = 0, i1 = 16; ASSERT(((int)curr_block & 15) == 0); asm("" "vspltisb 0,1\n" "lvsl 2,%0,%1\n" "vaddubs 3,2,0\n" "" : : "b" (ref_block), "b" (i0), "b" (i1)); for (i = 0; i < 8; i++) { asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 4,0,1,2\n" "vperm 5,0,1,3\n" "vavgub 4,4,5\n" "stvx 4,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += frame_stride, i1 += frame_stride; }}void mlib_VideoInterpX_U8_U8_8x8(uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride){ int i; const int i0 = 0, i1 = 16, i2 = 4; ASSERT(((int)curr_block & 7) == 0); asm("" "vspltisb 0,1\n" "lvsl 2,%1,%2\n" "lvsr 3,%0,%2\n" "lvsl 4,%1,%3\n" "lvsr 5,%0,%3\n" "vperm 2,2,2,3\n" "vperm 4,4,4,5\n" "vaddubs 3,2,0\n" "vaddubs 5,4,0\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i0 + frame_stride)); for (i = 0; i < 4; i++) { asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 6,0,1,2\n" "vperm 7,0,1,3\n" "vavgub 6,6,7\n" "stvewx 6,%0,%2\n" "stvewx 6,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += frame_stride, ref_block += frame_stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 6,0,1,4\n" "vperm 7,0,1,5\n" "vavgub 6,6,7\n" "stvewx 6,%0,%2\n" "stvewx 6,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += frame_stride, ref_block += frame_stride; }}void mlib_VideoInterpX_U8_U8_8x4(uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride){ int i; const int i0 = 0, i1 = 16, i2 = 4; ASSERT(((int)curr_block & 7) == 0); asm("" "vspltisb 0,1\n" "lvsl 2,%1,%2\n" "lvsr 3,%0,%2\n" "lvsl 4,%1,%3\n" "lvsr 5,%0,%3\n" "vperm 2,2,2,3\n" "vperm 4,4,4,5\n" "vaddubs 3,2,0\n" "vaddubs 5,4,0\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i0 + frame_stride)); for (i = 0; i < 2; i++) { asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 6,0,1,2\n" "vperm 7,0,1,3\n" "vavgub 6,6,7\n" "stvewx 6,%0,%2\n" "stvewx 6,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += frame_stride, ref_block += frame_stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 6,0,1,4\n" "vperm 7,0,1,5\n" "vavgub 6,6,7\n" "stvewx 6,%0,%2\n" "stvewx 6,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += frame_stride, ref_block += frame_stride; }}voidmlib_VideoInterpY_U8_U8_16x16(uint8_t *curr_block, const uint8_t *ref_block, int32_t frame_stride, int32_t field_stride){ int i; ASSERT(((int)curr_block & 15) == 0); if (((int)ref_block & 15) != 0) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -