⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 postprocess_altivec_template.c

📁 这是著名的TCPMP播放器在WINDWOWS,和WINCE下编译通过的源程序.笔者对其中的LIBMAD库做了针对ARM MPU的优化. 并增加了词幕功能.
💻 C
📖 第 1 页 / 共 4 页
字号:
	(vector unsigned short)vec_mergeh(cmp##i, cmp##i);		\
      const vector unsigned short cmpLi##i =				\
	(vector unsigned short)vec_mergel(cmp##i, cmp##i);		\
      const vector signed short cmpHf##i =				\
	(vector signed short)vec_and(cmpHi##i, mask1);			\
      const vector signed short cmpLf##i =				\
	(vector signed short)vec_and(cmpLi##i, mask2);			\
      const vector signed int sump##i = vec_sum4s(cmpHf##i, zero);	\
      const vector signed int sumq##i = vec_sum4s(cmpLf##i, sump##i);	\
      sum##i  = vec_sums(sumq##i, zero); } while (0)
	  
    COMPARE(0);
    COMPARE(1);
    COMPARE(2);
    COMPARE(3);
    COMPARE(4);
    COMPARE(5);
    COMPARE(6);
    COMPARE(7);
    COMPARE(8);
    COMPARE(9);
#undef COMPARE
	  
    vector signed int sumA2;
    vector signed int sumB2;
    {
      const vector signed int sump02 = vec_mergel(sum0, sum2);
      const vector signed int sump13 = vec_mergel(sum1, sum3);
      const vector signed int sumA = vec_mergel(sump02, sump13);
	      
      const vector signed int sump46 = vec_mergel(sum4, sum6);
      const vector signed int sump57 = vec_mergel(sum5, sum7);
      const vector signed int sumB = vec_mergel(sump46, sump57);
	      
      const vector signed int sump8A = vec_mergel(sum8, zero);
      const vector signed int sump9B = vec_mergel(sum9, zero);
      const vector signed int sumC = vec_mergel(sump8A, sump9B);
	      
      const vector signed int tA = vec_sl(vec_nor(zero, sumA), vuint32_16);
      const vector signed int tB = vec_sl(vec_nor(zero, sumB), vuint32_16);
      const vector signed int tC = vec_sl(vec_nor(zero, sumC), vuint32_16);
      const vector signed int t2A = vec_or(sumA, tA);
      const vector signed int t2B = vec_or(sumB, tB);
      const vector signed int t2C = vec_or(sumC, tC);
      const vector signed int t3A = vec_and(vec_sra(t2A, vuint32_1),
					    vec_sl(t2A, vuint32_1));
      const vector signed int t3B = vec_and(vec_sra(t2B, vuint32_1),
					    vec_sl(t2B, vuint32_1));
      const vector signed int t3C = vec_and(vec_sra(t2C, vuint32_1),
					    vec_sl(t2C, vuint32_1));
      const vector signed int yA = vec_and(t2A, t3A);
      const vector signed int yB = vec_and(t2B, t3B);
      const vector signed int yC = vec_and(t2C, t3C);
	      
      const vector unsigned char strangeperm1 = vec_lvsl(4, (unsigned char*)0);
      const vector unsigned char strangeperm2 = vec_lvsl(8, (unsigned char*)0);
      const vector signed int sumAd4 = vec_perm(yA, yB, strangeperm1);
      const vector signed int sumAd8 = vec_perm(yA, yB, strangeperm2);
      const vector signed int sumBd4 = vec_perm(yB, yC, strangeperm1);
      const vector signed int sumBd8 = vec_perm(yB, yC, strangeperm2);
      const vector signed int sumAp = vec_and(yA,
					      vec_and(sumAd4,sumAd8));
      const vector signed int sumBp = vec_and(yB,
					      vec_and(sumBd4,sumBd8));
      sumA2 = vec_or(sumAp,
		     vec_sra(sumAp,
			     vuint32_16));
      sumB2  = vec_or(sumBp,
		      vec_sra(sumBp,
			      vuint32_16));
    }	
    vec_st(sumA2, 0, S);
    vec_st(sumB2, 16, S);
  }

  /* I'm not sure the following is actually faster
     than straight, unvectorized C code :-( */
	
  int __attribute__((aligned(16))) tQP2[4];
  tQP2[0]= c->QP/2 + 1;
  vector signed int vQP2 = vec_ld(0, tQP2);
  vQP2 = vec_splat(vQP2, 0);
  const vector unsigned char vuint8_2 = vec_splat_u8(2);
  const vector signed int vsint32_8 = vec_splat_s32(8);
  const vector unsigned int vuint32_4 = vec_splat_u32(4);

  const vector unsigned char permA1 = (vector unsigned char)
    AVV(0x00, 0x01, 0x02, 0x10, 0x11, 0x12, 0x1F, 0x1F,
	0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F);
  const vector unsigned char permA2 = (vector unsigned char)
    AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x10, 0x11,
	0x12, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F);
  const vector unsigned char permA1inc = (vector unsigned char)
    AVV(0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
  const vector unsigned char permA2inc = (vector unsigned char)
    AVV(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
	0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
  const vector unsigned char magic = (vector unsigned char)
    AVV(0x01, 0x02, 0x01, 0x02, 0x04, 0x02, 0x01, 0x02,
	0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
  const vector unsigned char extractPerm = (vector unsigned char)
    AVV(0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01,
	0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01);
  const vector unsigned char extractPermInc = (vector unsigned char)
    AVV(0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01,
	0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01);
  const vector unsigned char identity = vec_lvsl(0,(unsigned char *)0);
  const vector unsigned char tenRight = (vector unsigned char)
    AVV(0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
  const vector unsigned char eightLeft = (vector unsigned char)
    AVV(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08);


#define F_INIT(i)					\
  vector unsigned char tenRightM##i = tenRight;		\
  vector unsigned char permA1M##i = permA1;		\
  vector unsigned char permA2M##i = permA2;		\
  vector unsigned char extractPermM##i = extractPerm

#define F2(i, j, k, l)							\
  if (S[i] & (1 << (l+1))) {						\
    const vector unsigned char a_##j##_A##l =				\
      vec_perm(src##i, src##j, permA1M##i);				\
    const vector unsigned char a_##j##_B##l =				\
      vec_perm(a_##j##_A##l, src##k, permA2M##i);			\
    const vector signed int a_##j##_sump##l =				\
      (vector signed int)vec_msum(a_##j##_B##l, magic,			\
				  (vector unsigned int)zero);		\
    vector signed int F_##j##_##l =					\
      vec_sr(vec_sums(a_##j##_sump##l, vsint32_8), vuint32_4);		\
    F_##j##_##l = vec_splat(F_##j##_##l, 3);				\
    const vector signed int p_##j##_##l =				\
      (vector signed int)vec_perm(src##j,				\
				  (vector unsigned char)zero,		\
				  extractPermM##i);			\
    const vector signed int sum_##j##_##l = vec_add( p_##j##_##l, vQP2); \
    const vector signed int diff_##j##_##l = vec_sub( p_##j##_##l, vQP2); \
    vector signed int newpm_##j##_##l;					\
    if (vec_all_lt(sum_##j##_##l, F_##j##_##l))				\
      newpm_##j##_##l = sum_##j##_##l;					\
    else if (vec_all_gt(diff_##j##_##l, F_##j##_##l))			\
      newpm_##j##_##l = diff_##j##_##l;					\
    else newpm_##j##_##l = F_##j##_##l;					\
    const vector unsigned char newpm2_##j##_##l =			\
      vec_splat((vector unsigned char)newpm_##j##_##l, 15);		\
    const vector unsigned char mask##j##l = vec_add(identity,		\
						    tenRightM##i);	\
    src##j = vec_perm(src##j, newpm2_##j##_##l, mask##j##l);		\
  }									\
  permA1M##i = vec_add(permA1M##i, permA1inc);				\
  permA2M##i = vec_add(permA2M##i, permA2inc);				\
  tenRightM##i = vec_sro(tenRightM##i, eightLeft);			\
  extractPermM##i = vec_add(extractPermM##i, extractPermInc)

#define ITER(i, j, k)				\
  F_INIT(i);					\
  F2(i, j, k, 0);				\
  F2(i, j, k, 1);				\
  F2(i, j, k, 2);				\
  F2(i, j, k, 3);				\
  F2(i, j, k, 4);				\
  F2(i, j, k, 5);				\
  F2(i, j, k, 6);				\
  F2(i, j, k, 7)

  ITER(0, 1, 2);
  ITER(1, 2, 3);
  ITER(2, 3, 4);
  ITER(3, 4, 5);
  ITER(4, 5, 6);
  ITER(5, 6, 7);
  ITER(6, 7, 8);
  ITER(7, 8, 9);

  const vector signed char neg1 = vec_splat_s8(-1);
	
#define STORE_LINE(i)					\
  const vector unsigned char permST##i =		\
    vec_lvsr(i * stride, srcCopy);			\
  const vector unsigned char maskST##i =		\
    vec_perm((vector unsigned char)zero,		\
	     (vector unsigned char)neg1, permST##i);	\
  src##i = vec_perm(src##i ,src##i, permST##i);		\
  sA##i= vec_sel(sA##i, src##i, maskST##i);		\
  sB##i= vec_sel(src##i, sB##i, maskST##i);		\
  vec_st(sA##i, i * stride, srcCopy);			\
  vec_st(sB##i, i * stride + 16, srcCopy)
	
  STORE_LINE(1);
  STORE_LINE(2);
  STORE_LINE(3);
  STORE_LINE(4);
  STORE_LINE(5);
  STORE_LINE(6);
  STORE_LINE(7);
  STORE_LINE(8);

#undef STORE_LINE
#undef ITER
#undef F2
}

#define doHorizLowPass_altivec(a...) doHorizLowPass_C(a)
#define doHorizDefFilter_altivec(a...) doHorizDefFilter_C(a)
#define do_a_deblock_altivec(a...) do_a_deblock_C(a)

static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
				    uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise)
{
  const vector signed int zero = vec_splat_s32(0);
  const vector signed short vsint16_1 = vec_splat_s16(1);
  vector signed int v_dp = zero;
  vector signed int v_sysdp = zero;
  int d, sysd, i;
  
  tempBluredPast[127]= maxNoise[0];
  tempBluredPast[128]= maxNoise[1];
  tempBluredPast[129]= maxNoise[2];

#define LOAD_LINE(src, i)						\
  register int j##src##i = i * stride;					\
  vector unsigned char perm##src##i = vec_lvsl(j##src##i, src);		\
  const vector unsigned char v_##src##A1##i = vec_ld(j##src##i, src);	\
  const vector unsigned char v_##src##A2##i = vec_ld(j##src##i + 16, src); \
  const vector unsigned char v_##src##A##i =				\
    vec_perm(v_##src##A1##i, v_##src##A2##i, perm##src##i);		\
  vector signed short v_##src##Ass##i =					\
    (vector signed short)vec_mergeh((vector signed char)zero,		\
				    (vector signed char)v_##src##A##i)
  
  LOAD_LINE(src, 0);
  LOAD_LINE(src, 1);
  LOAD_LINE(src, 2);
  LOAD_LINE(src, 3);
  LOAD_LINE(src, 4);
  LOAD_LINE(src, 5);
  LOAD_LINE(src, 6);
  LOAD_LINE(src, 7);

  LOAD_LINE(tempBlured, 0);
  LOAD_LINE(tempBlured, 1);
  LOAD_LINE(tempBlured, 2);
  LOAD_LINE(tempBlured, 3);
  LOAD_LINE(tempBlured, 4);
  LOAD_LINE(tempBlured, 5);
  LOAD_LINE(tempBlured, 6);
  LOAD_LINE(tempBlured, 7);
#undef LOAD_LINE

#define ACCUMULATE_DIFFS(i)					\
  vector signed short v_d##i = vec_sub(v_tempBluredAss##i,	\
				       v_srcAss##i);		\
  v_dp = vec_msums(v_d##i, v_d##i, v_dp);			\
  v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp)

  ACCUMULATE_DIFFS(0);
  ACCUMULATE_DIFFS(1);
  ACCUMULATE_DIFFS(2);
  ACCUMULATE_DIFFS(3);
  ACCUMULATE_DIFFS(4);
  ACCUMULATE_DIFFS(5);
  ACCUMULATE_DIFFS(6);
  ACCUMULATE_DIFFS(7);
#undef ACCUMULATE_DIFFS

  v_dp = vec_sums(v_dp, zero);
  v_sysdp = vec_sums(v_sysdp, zero);

  v_dp = vec_splat(v_dp, 3);
  v_sysdp = vec_splat(v_sysdp, 3);
  
  vec_ste(v_dp, 0, &d);
  vec_ste(v_sysdp, 0, &sysd);

  i = d;
  d = (4*d
       +(*(tempBluredPast-256))
       +(*(tempBluredPast-1))+ (*(tempBluredPast+1))
       +(*(tempBluredPast+256))
       +4)>>3;

  *tempBluredPast=i;

  if (d > maxNoise[1]) {
    if (d < maxNoise[2]) {
#define OP(i) v_tempBluredAss##i = vec_avg(v_tempBluredAss##i, v_srcAss##i);

      OP(0);
      OP(1);
      OP(2);
      OP(3);
      OP(4);
      OP(5);
      OP(6);
      OP(7);
#undef OP
    } else {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -