📄 altivec_mlib.c
字号:
"lvx 1,%1,%3\n" "lvx 2,%0,%2\n" "vperm 0,0,1,4\n" "vavgub 0,0,2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += stride, ref_block += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "lvx 2,%0,%2\n" "vperm 0,0,1,3\n" "vavgub 0,0,2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += stride, ref_block += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "lvx 2,%0,%2\n" "vperm 0,0,1,4\n" "vavgub 0,0,2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += stride, ref_block += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "lvx 2,%0,%2\n" "vperm 0,0,1,3\n" "vavgub 0,0,2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += stride, ref_block += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "lvx 2,%0,%2\n" "vperm 0,0,1,4\n" "vavgub 0,0,2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += stride, ref_block += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "lvx 2,%0,%2\n" "vperm 0,0,1,3\n" "vavgub 0,0,2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += stride, ref_block += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "lvx 2,%0,%2\n" "vperm 0,0,1,4\n" "vavgub 0,0,2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); } else { int i0 = 0, i1 = 4; asm("" "lvx 0,%0,%2\n" "lvx 1,%1,%2\n" "vavgub 0,0,1\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%0,%2\n" "lvx 1,%1,%2\n" "vavgub 0,0,1\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%0,%2\n" "lvx 1,%1,%2\n" "vavgub 0,0,1\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%0,%2\n" "lvx 1,%1,%2\n" "vavgub 0,0,1\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%0,%2\n" "lvx 1,%1,%2\n" "vavgub 0,0,1\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%0,%2\n" "lvx 1,%1,%2\n" "vavgub 0,0,1\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%0,%2\n" "lvx 1,%1,%2\n" "vavgub 0,0,1\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%0,%2\n" "lvx 1,%1,%2\n" "vavgub 0,0,1\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); }}void mlib_VideoCopyRefAve_U8_U8_8x4(uint8_t *curr_block, const uint8_t *ref_block, int32_t stride){ ASSERT(((int)curr_block & 7) == 0); if ((((int)ref_block ^ (int)curr_block) & 15) != 0) { const int i0 = 0, i1 = 16, i2 = 4; asm("" "lvsl 3,%1,%2\n" "lvsl 4,%1,%3\n" "lvsr 5,%0,%2\n" "lvsr 6,%0,%3\n" "vperm 3,3,3,5\n" "vperm 4,4,4,6\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i0 + stride)); asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "lvx 2,%0,%2\n" "vperm 0,0,1,3\n" "vavgub 0,0,2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += stride, ref_block += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "lvx 2,%0,%2\n" "vperm 0,0,1,4\n" "vavgub 0,0,2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += stride, ref_block += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "lvx 2,%0,%2\n" "vperm 0,0,1,3\n" "vavgub 0,0,2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); curr_block += stride, ref_block += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "lvx 2,%0,%2\n" "vperm 0,0,1,4\n" "vavgub 0,0,2\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%4\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1), "b" (i2)); } else { int i0 = 0, i1 = 4; asm("" "lvx 0,%0,%2\n" "lvx 1,%1,%2\n" "vavgub 0,0,1\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%0,%2\n" "lvx 1,%1,%2\n" "vavgub 0,0,1\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%0,%2\n" "lvx 1,%1,%2\n" "vavgub 0,0,1\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%0,%2\n" "lvx 1,%1,%2\n" "vavgub 0,0,1\n" "stvewx 0,%0,%2\n" "stvewx 0,%0,%3\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); }}voidmlib_VideoCopyRef_U8_U8_16x16_multiple(uint8_t *curr_block, const uint8_t *ref_block, int32_t stride, int32_t count){ ASSERT(((int)curr_block & 15) == 0); ASSERT(((int)ref_block & 15) == 0); while (count--) { int i0 = 0; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); curr_block += 16, ref_block += 16; }}voidmlib_VideoCopyRef_U8_U8_16x16(uint8_t *curr_block, const uint8_t *ref_block, int32_t stride){ ASSERT(((int)curr_block & 15) == 0); if (((int)ref_block & 15) != 0) { int i0 = 0, i1 = 16; asm("" "lvsl 2,%0,%1\n" "" : : "b" (ref_block), "b" (i0)); asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); i0 += stride, i1 += stride; asm("" "lvx 0,%1,%2\n" "lvx 1,%1,%3\n" "vperm 0,0,1,2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0), "b" (i1)); } else { int i0 = 0; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); i0 += stride; asm("" "lvx 0,%1,%2\n" "stvx 0,%0,%2\n" "" : : "b" (curr_block), "b" (ref_block), "b" (i0)); }}void mlib_VideoCopyRef_U8_U8_16x8(uint8_t *curr_block,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -