⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 postprocess_altivec_template.c

📁 这是著名的TCPMP播放器在WINDWOWS,和WINCE下编译通过的源程序.笔者对其中的LIBMAD库做了针对ARM MPU的优化. 并增加了词幕功能.
💻 C
📖 第 1 页 / 共 4 页
字号:
#define OP(i) v_tempBluredAss##i = v_srcAss##i;

      OP(0);
      OP(1);
      OP(2);
      OP(3);
      OP(4);
      OP(5);
      OP(6);
      OP(7);
#undef OP
    }
  } else {
    if (d < maxNoise[0]) {
      const vector signed short vsint16_7 = vec_splat_s16(7);
      const vector signed short vsint16_4 = vec_splat_s16(4);
      const vector unsigned short vuint16_3 = vec_splat_u16(3);
      
#define OP(i)								\
      const vector signed short v_temp##i =				\
	vec_mladd(v_tempBluredAss##i,					\
		  vsint16_7, v_srcAss##i);				\
      const vector signed short v_temp2##i =				\
	vec_add(v_temp##i, vsint16_4);					\
      v_tempBluredAss##i = vec_sr(v_temp2##i, vuint16_3)

      OP(0);
      OP(1);
      OP(2);
      OP(3);
      OP(4);
      OP(5);
      OP(6);
      OP(7);
#undef OP
    } else {
      const vector signed short vsint16_3 = vec_splat_s16(3);
      const vector signed short vsint16_2 = vec_splat_s16(2);
      
#define OP(i)								\
      const vector signed short v_temp##i =				\
	vec_mladd(v_tempBluredAss##i,					\
		  vsint16_3, v_srcAss##i);				\
      const vector signed short v_temp2##i =				\
	vec_add(v_temp##i, vsint16_2);					\
      v_tempBluredAss##i = vec_sr(v_temp2##i, (vector unsigned short)vsint16_2)

      OP(0);
      OP(1);
      OP(2);
      OP(3);
      OP(4);
      OP(5);
      OP(6);
      OP(7);
#undef OP
    }
  }

  const vector signed char neg1 = vec_splat_s8(-1);
  const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
								      0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);

#define PACK_AND_STORE(src, i)						\
  const vector unsigned char perms##src##i =				\
    vec_lvsr(i * stride, src);						\
  const vector unsigned char vf##src##i =				\
    vec_packsu(v_tempBluredAss##i, (vector signed short)zero);		\
  const vector unsigned char vg##src##i =				\
    vec_perm(vf##src##i, v_##src##A##i, permHH);			\
  const vector unsigned char mask##src##i =				\
    vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##src##i); \
  const vector unsigned char vg2##src##i =				\
    vec_perm(vg##src##i, vg##src##i, perms##src##i);			\
  const vector unsigned char svA##src##i =				\
    vec_sel(v_##src##A1##i, vg2##src##i, mask##src##i);			\
  const vector unsigned char svB##src##i =				\
    vec_sel(vg2##src##i, v_##src##A2##i, mask##src##i);			\
  vec_st(svA##src##i, i * stride, src);					\
  vec_st(svB##src##i, i * stride + 16, src)

  PACK_AND_STORE(src, 0);
  PACK_AND_STORE(src, 1);
  PACK_AND_STORE(src, 2);
  PACK_AND_STORE(src, 3);
  PACK_AND_STORE(src, 4);
  PACK_AND_STORE(src, 5);
  PACK_AND_STORE(src, 6);
  PACK_AND_STORE(src, 7);
  PACK_AND_STORE(tempBlured, 0);
  PACK_AND_STORE(tempBlured, 1);
  PACK_AND_STORE(tempBlured, 2);
  PACK_AND_STORE(tempBlured, 3);
  PACK_AND_STORE(tempBlured, 4);
  PACK_AND_STORE(tempBlured, 5);
  PACK_AND_STORE(tempBlured, 6);
  PACK_AND_STORE(tempBlured, 7);
#undef PACK_AND_STORE
}

static inline void transpose_16x8_char_toPackedAlign_altivec(unsigned char* dst, unsigned char* src, int stride) {
  const vector unsigned char zero = vec_splat_u8(0);

#define LOAD_DOUBLE_LINE(i, j)						\
  vector unsigned char perm1##i = vec_lvsl(i * stride, src);		\
  vector unsigned char perm2##i = vec_lvsl(j * stride, src);		\
  vector unsigned char srcA##i = vec_ld(i * stride, src);		\
  vector unsigned char srcB##i = vec_ld(i * stride + 16, src);          \
  vector unsigned char srcC##i = vec_ld(j * stride, src);		\
  vector unsigned char srcD##i = vec_ld(j * stride+ 16, src);           \
  vector unsigned char src##i = vec_perm(srcA##i, srcB##i, perm1##i);	\
  vector unsigned char src##j = vec_perm(srcC##i, srcD##i, perm2##i)
  
  LOAD_DOUBLE_LINE(0, 1);
  LOAD_DOUBLE_LINE(2, 3);
  LOAD_DOUBLE_LINE(4, 5);
  LOAD_DOUBLE_LINE(6, 7);
#undef LOAD_DOUBLE_LINE

  vector unsigned char tempA = vec_mergeh(src0, zero);
  vector unsigned char tempB = vec_mergel(src0, zero);
  vector unsigned char tempC = vec_mergeh(src1, zero);
  vector unsigned char tempD = vec_mergel(src1, zero);
  vector unsigned char tempE = vec_mergeh(src2, zero);
  vector unsigned char tempF = vec_mergel(src2, zero);
  vector unsigned char tempG = vec_mergeh(src3, zero);
  vector unsigned char tempH = vec_mergel(src3, zero);
  vector unsigned char tempI = vec_mergeh(src4, zero);
  vector unsigned char tempJ = vec_mergel(src4, zero);
  vector unsigned char tempK = vec_mergeh(src5, zero);
  vector unsigned char tempL = vec_mergel(src5, zero);
  vector unsigned char tempM = vec_mergeh(src6, zero);
  vector unsigned char tempN = vec_mergel(src6, zero);
  vector unsigned char tempO = vec_mergeh(src7, zero);
  vector unsigned char tempP = vec_mergel(src7, zero);

  vector unsigned char temp0 = vec_mergeh(tempA, tempI);
  vector unsigned char temp1 = vec_mergel(tempA, tempI);
  vector unsigned char temp2 = vec_mergeh(tempB, tempJ);
  vector unsigned char temp3 = vec_mergel(tempB, tempJ);
  vector unsigned char temp4 = vec_mergeh(tempC, tempK);
  vector unsigned char temp5 = vec_mergel(tempC, tempK);
  vector unsigned char temp6 = vec_mergeh(tempD, tempL);
  vector unsigned char temp7 = vec_mergel(tempD, tempL);
  vector unsigned char temp8 = vec_mergeh(tempE, tempM);
  vector unsigned char temp9 = vec_mergel(tempE, tempM);
  vector unsigned char temp10 = vec_mergeh(tempF, tempN);
  vector unsigned char temp11 = vec_mergel(tempF, tempN);
  vector unsigned char temp12 = vec_mergeh(tempG, tempO);
  vector unsigned char temp13 = vec_mergel(tempG, tempO);
  vector unsigned char temp14 = vec_mergeh(tempH, tempP);
  vector unsigned char temp15 = vec_mergel(tempH, tempP);

  tempA = vec_mergeh(temp0, temp8);
  tempB = vec_mergel(temp0, temp8);
  tempC = vec_mergeh(temp1, temp9);
  tempD = vec_mergel(temp1, temp9);
  tempE = vec_mergeh(temp2, temp10);
  tempF = vec_mergel(temp2, temp10);
  tempG = vec_mergeh(temp3, temp11);
  tempH = vec_mergel(temp3, temp11);
  tempI = vec_mergeh(temp4, temp12);
  tempJ = vec_mergel(temp4, temp12);
  tempK = vec_mergeh(temp5, temp13);
  tempL = vec_mergel(temp5, temp13);
  tempM = vec_mergeh(temp6, temp14);
  tempN = vec_mergel(temp6, temp14);
  tempO = vec_mergeh(temp7, temp15);
  tempP = vec_mergel(temp7, temp15);

  temp0 = vec_mergeh(tempA, tempI);
  temp1 = vec_mergel(tempA, tempI);
  temp2 = vec_mergeh(tempB, tempJ);
  temp3 = vec_mergel(tempB, tempJ);
  temp4 = vec_mergeh(tempC, tempK);
  temp5 = vec_mergel(tempC, tempK);
  temp6 = vec_mergeh(tempD, tempL);
  temp7 = vec_mergel(tempD, tempL);
  temp8 = vec_mergeh(tempE, tempM);
  temp9 = vec_mergel(tempE, tempM);
  temp10 = vec_mergeh(tempF, tempN);
  temp11 = vec_mergel(tempF, tempN);
  temp12 = vec_mergeh(tempG, tempO);
  temp13 = vec_mergel(tempG, tempO);
  temp14 = vec_mergeh(tempH, tempP);
  temp15 = vec_mergel(tempH, tempP);

  vec_st(temp0, 0, dst);
  vec_st(temp1, 16, dst);
  vec_st(temp2, 32, dst);
  vec_st(temp3, 48, dst);
  vec_st(temp4, 64, dst);
  vec_st(temp5, 80, dst);
  vec_st(temp6, 96, dst);
  vec_st(temp7, 112, dst);
  vec_st(temp8, 128, dst);
  vec_st(temp9, 144, dst);
  vec_st(temp10, 160, dst);
  vec_st(temp11, 176, dst);
  vec_st(temp12, 192, dst);
  vec_st(temp13, 208, dst);
  vec_st(temp14, 224, dst);
  vec_st(temp15, 240, dst);
}

static inline void transpose_8x16_char_fromPackedAlign_altivec(unsigned char* dst, unsigned char* src, int stride) {
  const vector unsigned char zero = vec_splat_u8(0);
  const vector unsigned char magic_perm = (const vector unsigned char)
    AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
	0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
  
#define LOAD_DOUBLE_LINE(i, j)			    		\
  vector unsigned char src##i = vec_ld(i * 16, src);		\
  vector unsigned char src##j = vec_ld(j * 16, src)

  LOAD_DOUBLE_LINE(0, 1);
  LOAD_DOUBLE_LINE(2, 3);
  LOAD_DOUBLE_LINE(4, 5);
  LOAD_DOUBLE_LINE(6, 7);
  LOAD_DOUBLE_LINE(8, 9);
  LOAD_DOUBLE_LINE(10, 11);
  LOAD_DOUBLE_LINE(12, 13);
  LOAD_DOUBLE_LINE(14, 15);
#undef LOAD_DOUBLE_LINE

  vector unsigned char tempA = vec_mergeh(src0, src8);
  vector unsigned char tempB;
  vector unsigned char tempC = vec_mergeh(src1, src9);
  vector unsigned char tempD;
  vector unsigned char tempE = vec_mergeh(src2, src10);
  vector unsigned char tempG = vec_mergeh(src3, src11);
  vector unsigned char tempI = vec_mergeh(src4, src12);
  vector unsigned char tempJ;
  vector unsigned char tempK = vec_mergeh(src5, src13);
  vector unsigned char tempL;
  vector unsigned char tempM = vec_mergeh(src6, src14);
  vector unsigned char tempO = vec_mergeh(src7, src15);

  vector unsigned char temp0 = vec_mergeh(tempA, tempI);
  vector unsigned char temp1 = vec_mergel(tempA, tempI);
  vector unsigned char temp2;
  vector unsigned char temp3;
  vector unsigned char temp4 = vec_mergeh(tempC, tempK);
  vector unsigned char temp5 = vec_mergel(tempC, tempK);
  vector unsigned char temp6;
  vector unsigned char temp7;
  vector unsigned char temp8 = vec_mergeh(tempE, tempM);
  vector unsigned char temp9 = vec_mergel(tempE, tempM);
  vector unsigned char temp12 = vec_mergeh(tempG, tempO);
  vector unsigned char temp13 = vec_mergel(tempG, tempO);

  tempA = vec_mergeh(temp0, temp8);
  tempB = vec_mergel(temp0, temp8);
  tempC = vec_mergeh(temp1, temp9);
  tempD = vec_mergel(temp1, temp9);
  tempI = vec_mergeh(temp4, temp12);
  tempJ = vec_mergel(temp4, temp12);
  tempK = vec_mergeh(temp5, temp13);
  tempL = vec_mergel(temp5, temp13);

  temp0 = vec_mergeh(tempA, tempI);
  temp1 = vec_mergel(tempA, tempI);
  temp2 = vec_mergeh(tempB, tempJ);
  temp3 = vec_mergel(tempB, tempJ);
  temp4 = vec_mergeh(tempC, tempK);
  temp5 = vec_mergel(tempC, tempK);
  temp6 = vec_mergeh(tempD, tempL);
  temp7 = vec_mergel(tempD, tempL);


  const vector signed char neg1 = vec_splat_s8(-1);
#define STORE_DOUBLE_LINE(i, j)						\
  vector unsigned char dstA##i = vec_ld(i * stride, dst);		\
  vector unsigned char dstB##i = vec_ld(i * stride + 16, dst);		\
  vector unsigned char dstA##j = vec_ld(j * stride, dst);		\
  vector unsigned char dstB##j = vec_ld(j * stride+ 16, dst);		\
  vector unsigned char align##i = vec_lvsr(i * stride, dst);		\
  vector unsigned char align##j = vec_lvsr(j * stride, dst);		\
  vector unsigned char mask##i = vec_perm(zero, (vector unsigned char)neg1, align##i);	\
  vector unsigned char mask##j = vec_perm(zero, (vector unsigned char)neg1, align##j);	\
  vector unsigned char dstR##i = vec_perm(temp##i, temp##i, align##i);	\
  vector unsigned char dstR##j = vec_perm(temp##j, temp##j, align##j);	\
  vector unsigned char dstAF##i = vec_sel(dstA##i, dstR##i, mask##i);	\
  vector unsigned char dstBF##i = vec_sel(dstR##i, dstB##i, mask##i);	\
  vector unsigned char dstAF##j = vec_sel(dstA##j, dstR##j, mask##j);	\
  vector unsigned char dstBF##j = vec_sel(dstR##j, dstB##j, mask##j);	\
  vec_st(dstAF##i, i * stride, dst);					\
  vec_st(dstBF##i, i * stride + 16, dst);				\
  vec_st(dstAF##j, j * stride, dst);					\
  vec_st(dstBF##j, j * stride + 16, dst)

  STORE_DOUBLE_LINE(0,1);
  STORE_DOUBLE_LINE(2,3);
  STORE_DOUBLE_LINE(4,5);
  STORE_DOUBLE_LINE(6,7);
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -