📄 postprocess_altivec_template.c
字号:
#define OP(i) v_tempBluredAss##i = v_srcAss##i; OP(0); OP(1); OP(2); OP(3); OP(4); OP(5); OP(6); OP(7);#undef OP } } else { if (d < maxNoise[0]) { const vector signed short vsint16_7 = vec_splat_s16(7); const vector signed short vsint16_4 = vec_splat_s16(4); const vector unsigned short vuint16_3 = vec_splat_u16(3);#define OP(i) \ const vector signed short v_temp##i = \ vec_mladd(v_tempBluredAss##i, \ vsint16_7, v_srcAss##i); \ const vector signed short v_temp2##i = \ vec_add(v_temp##i, vsint16_4); \ v_tempBluredAss##i = vec_sr(v_temp2##i, vuint16_3) OP(0); OP(1); OP(2); OP(3); OP(4); OP(5); OP(6); OP(7);#undef OP } else { const vector signed short vsint16_3 = vec_splat_s16(3); const vector signed short vsint16_2 = vec_splat_s16(2);#define OP(i) \ const vector signed short v_temp##i = \ vec_mladd(v_tempBluredAss##i, \ vsint16_3, v_srcAss##i); \ const vector signed short v_temp2##i = \ vec_add(v_temp##i, vsint16_2); \ v_tempBluredAss##i = vec_sr(v_temp2##i, (vector unsigned short)vsint16_2) OP(0); OP(1); OP(2); OP(3); OP(4); OP(5); OP(6); OP(7);#undef OP } } const vector signed char neg1 = vec_splat_s8(-1); const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);#define PACK_AND_STORE(src, i) \ const vector unsigned char perms##src##i = \ vec_lvsr(i * stride, src); \ const vector unsigned char vf##src##i = \ vec_packsu(v_tempBluredAss##i, (vector signed short)zero); \ const vector unsigned char vg##src##i = \ vec_perm(vf##src##i, v_##src##A##i, permHH); \ const vector unsigned char mask##src##i = \ vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##src##i); \ const vector unsigned char vg2##src##i = \ vec_perm(vg##src##i, vg##src##i, perms##src##i); \ const vector unsigned char svA##src##i = \ vec_sel(v_##src##A1##i, vg2##src##i, mask##src##i); \ const vector unsigned char svB##src##i = \ vec_sel(vg2##src##i, v_##src##A2##i, mask##src##i); \ vec_st(svA##src##i, i * stride, src); \ vec_st(svB##src##i, i * stride + 16, src) PACK_AND_STORE(src, 0); PACK_AND_STORE(src, 1); PACK_AND_STORE(src, 2); PACK_AND_STORE(src, 3); PACK_AND_STORE(src, 4); PACK_AND_STORE(src, 5); PACK_AND_STORE(src, 6); PACK_AND_STORE(src, 7); PACK_AND_STORE(tempBlured, 0); PACK_AND_STORE(tempBlured, 1); PACK_AND_STORE(tempBlured, 2); PACK_AND_STORE(tempBlured, 3); PACK_AND_STORE(tempBlured, 4); PACK_AND_STORE(tempBlured, 5); PACK_AND_STORE(tempBlured, 6); PACK_AND_STORE(tempBlured, 7);#undef PACK_AND_STORE}static inline void transpose_16x8_char_toPackedAlign_altivec(unsigned char* dst, unsigned char* src, int stride) { const vector unsigned char zero = vec_splat_u8(0);#define LOAD_DOUBLE_LINE(i, j) \ vector unsigned char perm1##i = vec_lvsl(i * stride, src); \ vector unsigned char perm2##i = vec_lvsl(j * stride, src); \ vector unsigned char srcA##i = vec_ld(i * stride, src); \ vector unsigned char srcB##i = vec_ld(i * stride + 16, src); \ vector unsigned char srcC##i = vec_ld(j * stride, src); \ vector unsigned char srcD##i = vec_ld(j * stride+ 16, src); \ vector unsigned char src##i = vec_perm(srcA##i, srcB##i, perm1##i); \ vector unsigned char src##j = vec_perm(srcC##i, srcD##i, perm2##i) LOAD_DOUBLE_LINE(0, 1); LOAD_DOUBLE_LINE(2, 3); LOAD_DOUBLE_LINE(4, 5); LOAD_DOUBLE_LINE(6, 7);#undef LOAD_DOUBLE_LINE vector unsigned char tempA = vec_mergeh(src0, zero); vector unsigned char tempB = vec_mergel(src0, zero); vector unsigned char tempC = vec_mergeh(src1, zero); vector unsigned char tempD = vec_mergel(src1, zero); vector unsigned char tempE = vec_mergeh(src2, zero); vector unsigned char tempF = vec_mergel(src2, zero); vector unsigned char tempG = vec_mergeh(src3, zero); vector unsigned char tempH = vec_mergel(src3, zero); vector unsigned char tempI = vec_mergeh(src4, zero); vector unsigned char tempJ = vec_mergel(src4, zero); vector unsigned char tempK = vec_mergeh(src5, zero); vector unsigned char tempL = vec_mergel(src5, zero); vector unsigned char tempM = vec_mergeh(src6, zero); vector unsigned char tempN = vec_mergel(src6, zero); vector unsigned char tempO = vec_mergeh(src7, zero); vector unsigned char tempP = vec_mergel(src7, zero); vector unsigned char temp0 = vec_mergeh(tempA, tempI); vector unsigned char temp1 = vec_mergel(tempA, tempI); vector unsigned char temp2 = vec_mergeh(tempB, tempJ); vector unsigned char temp3 = vec_mergel(tempB, tempJ); vector unsigned char temp4 = vec_mergeh(tempC, tempK); vector unsigned char temp5 = vec_mergel(tempC, tempK); vector unsigned char temp6 = vec_mergeh(tempD, tempL); vector unsigned char temp7 = vec_mergel(tempD, tempL); vector unsigned char temp8 = vec_mergeh(tempE, tempM); vector unsigned char temp9 = vec_mergel(tempE, tempM); vector unsigned char temp10 = vec_mergeh(tempF, tempN); vector unsigned char temp11 = vec_mergel(tempF, tempN); vector unsigned char temp12 = vec_mergeh(tempG, tempO); vector unsigned char temp13 = vec_mergel(tempG, tempO); vector unsigned char temp14 = vec_mergeh(tempH, tempP); vector unsigned char temp15 = vec_mergel(tempH, tempP); tempA = vec_mergeh(temp0, temp8); tempB = vec_mergel(temp0, temp8); tempC = vec_mergeh(temp1, temp9); tempD = vec_mergel(temp1, temp9); tempE = vec_mergeh(temp2, temp10); tempF = vec_mergel(temp2, temp10); tempG = vec_mergeh(temp3, temp11); tempH = vec_mergel(temp3, temp11); tempI = vec_mergeh(temp4, temp12); tempJ = vec_mergel(temp4, temp12); tempK = vec_mergeh(temp5, temp13); tempL = vec_mergel(temp5, temp13); tempM = vec_mergeh(temp6, temp14); tempN = vec_mergel(temp6, temp14); tempO = vec_mergeh(temp7, temp15); tempP = vec_mergel(temp7, temp15); temp0 = vec_mergeh(tempA, tempI); temp1 = vec_mergel(tempA, tempI); temp2 = vec_mergeh(tempB, tempJ); temp3 = vec_mergel(tempB, tempJ); temp4 = vec_mergeh(tempC, tempK); temp5 = vec_mergel(tempC, tempK); temp6 = vec_mergeh(tempD, tempL); temp7 = vec_mergel(tempD, tempL); temp8 = vec_mergeh(tempE, tempM); temp9 = vec_mergel(tempE, tempM); temp10 = vec_mergeh(tempF, tempN); temp11 = vec_mergel(tempF, tempN); temp12 = vec_mergeh(tempG, tempO); temp13 = vec_mergel(tempG, tempO); temp14 = vec_mergeh(tempH, tempP); temp15 = vec_mergel(tempH, tempP); vec_st(temp0, 0, dst); vec_st(temp1, 16, dst); vec_st(temp2, 32, dst); vec_st(temp3, 48, dst); vec_st(temp4, 64, dst); vec_st(temp5, 80, dst); vec_st(temp6, 96, dst); vec_st(temp7, 112, dst); vec_st(temp8, 128, dst); vec_st(temp9, 144, dst); vec_st(temp10, 160, dst); vec_st(temp11, 176, dst); vec_st(temp12, 192, dst); vec_st(temp13, 208, dst); vec_st(temp14, 224, dst); vec_st(temp15, 240, dst);}static inline void transpose_8x16_char_fromPackedAlign_altivec(unsigned char* dst, unsigned char* src, int stride) { const vector unsigned char zero = vec_splat_u8(0); const vector unsigned char magic_perm = (const vector unsigned char) AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);#define LOAD_DOUBLE_LINE(i, j) \ vector unsigned char src##i = vec_ld(i * 16, src); \ vector unsigned char src##j = vec_ld(j * 16, src) LOAD_DOUBLE_LINE(0, 1); LOAD_DOUBLE_LINE(2, 3); LOAD_DOUBLE_LINE(4, 5); LOAD_DOUBLE_LINE(6, 7); LOAD_DOUBLE_LINE(8, 9); LOAD_DOUBLE_LINE(10, 11); LOAD_DOUBLE_LINE(12, 13); LOAD_DOUBLE_LINE(14, 15);#undef LOAD_DOUBLE_LINE vector unsigned char tempA = vec_mergeh(src0, src8); vector unsigned char tempB; vector unsigned char tempC = vec_mergeh(src1, src9); vector unsigned char tempD; vector unsigned char tempE = vec_mergeh(src2, src10); vector unsigned char tempG = vec_mergeh(src3, src11); vector unsigned char tempI = vec_mergeh(src4, src12); vector unsigned char tempJ; vector unsigned char tempK = vec_mergeh(src5, src13); vector unsigned char tempL; vector unsigned char tempM = vec_mergeh(src6, src14); vector unsigned char tempO = vec_mergeh(src7, src15); vector unsigned char temp0 = vec_mergeh(tempA, tempI); vector unsigned char temp1 = vec_mergel(tempA, tempI); vector unsigned char temp2; vector unsigned char temp3; vector unsigned char temp4 = vec_mergeh(tempC, tempK); vector unsigned char temp5 = vec_mergel(tempC, tempK); vector unsigned char temp6; vector unsigned char temp7; vector unsigned char temp8 = vec_mergeh(tempE, tempM); vector unsigned char temp9 = vec_mergel(tempE, tempM); vector unsigned char temp12 = vec_mergeh(tempG, tempO); vector unsigned char temp13 = vec_mergel(tempG, tempO); tempA = vec_mergeh(temp0, temp8); tempB = vec_mergel(temp0, temp8); tempC = vec_mergeh(temp1, temp9); tempD = vec_mergel(temp1, temp9); tempI = vec_mergeh(temp4, temp12); tempJ = vec_mergel(temp4, temp12); tempK = vec_mergeh(temp5, temp13); tempL = vec_mergel(temp5, temp13); temp0 = vec_mergeh(tempA, tempI); temp1 = vec_mergel(tempA, tempI); temp2 = vec_mergeh(tempB, tempJ); temp3 = vec_mergel(tempB, tempJ); temp4 = vec_mergeh(tempC, tempK); temp5 = vec_mergel(tempC, tempK); temp6 = vec_mergeh(tempD, tempL); temp7 = vec_mergel(tempD, tempL); const vector signed char neg1 = vec_splat_s8(-1);#define STORE_DOUBLE_LINE(i, j) \ vector unsigned char dstA##i = vec_ld(i * stride, dst); \ vector unsigned char dstB##i = vec_ld(i * stride + 16, dst); \ vector unsigned char dstA##j = vec_ld(j * stride, dst); \ vector unsigned char dstB##j = vec_ld(j * stride+ 16, dst); \ vector unsigned char align##i = vec_lvsr(i * stride, dst); \ vector unsigned char align##j = vec_lvsr(j * stride, dst); \ vector unsigned char mask##i = vec_perm(zero, (vector unsigned char)neg1, align##i); \ vector unsigned char mask##j = vec_perm(zero, (vector unsigned char)neg1, align##j); \ vector unsigned char dstR##i = vec_perm(temp##i, temp##i, align##i); \ vector unsigned char dstR##j = vec_perm(temp##j, temp##j, align##j); \ vector unsigned char dstAF##i = vec_sel(dstA##i, dstR##i, mask##i); \ vector unsigned char dstBF##i = vec_sel(dstR##i, dstB##i, mask##i); \ vector unsigned char dstAF##j = vec_sel(dstA##j, dstR##j, mask##j); \ vector unsigned char dstBF##j = vec_sel(dstR##j, dstB##j, mask##j); \ vec_st(dstAF##i, i * stride, dst); \ vec_st(dstBF##i, i * stride + 16, dst); \ vec_st(dstAF##j, j * stride, dst); \ vec_st(dstBF##j, j * stride + 16, dst) STORE_DOUBLE_LINE(0,1); STORE_DOUBLE_LINE(2,3); STORE_DOUBLE_LINE(4,5); STORE_DOUBLE_LINE(6,7);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -