📄 postprocess_altivec_template.c

📁 ffmpeg源码分析
💻 C
📖 第 1 页 / 共 4 页
字号:
        (vector unsigned short)vec_mergeh(cmp##i, cmp##i);              \      const vector unsigned short cmpLi##i =                            \        (vector unsigned short)vec_mergel(cmp##i, cmp##i);              \      const vector signed short cmpHf##i =                              \        (vector signed short)vec_and(cmpHi##i, mask1);                  \      const vector signed short cmpLf##i =                              \        (vector signed short)vec_and(cmpLi##i, mask2);                  \      const vector signed int sump##i = vec_sum4s(cmpHf##i, zero);      \      const vector signed int sumq##i = vec_sum4s(cmpLf##i, sump##i);   \      sum##i  = vec_sums(sumq##i, zero); } while (0)    COMPARE(0);    COMPARE(1);    COMPARE(2);    COMPARE(3);    COMPARE(4);    COMPARE(5);    COMPARE(6);    COMPARE(7);    COMPARE(8);    COMPARE(9);#undef COMPARE    vector signed int sumA2;    vector signed int sumB2;    {      const vector signed int sump02 = vec_mergel(sum0, sum2);      const vector signed int sump13 = vec_mergel(sum1, sum3);      const vector signed int sumA = vec_mergel(sump02, sump13);      const vector signed int sump46 = vec_mergel(sum4, sum6);      const vector signed int sump57 = vec_mergel(sum5, sum7);      const vector signed int sumB = vec_mergel(sump46, sump57);      const vector signed int sump8A = vec_mergel(sum8, zero);      const vector signed int sump9B = vec_mergel(sum9, zero);      const vector signed int sumC = vec_mergel(sump8A, sump9B);      const vector signed int tA = vec_sl(vec_nor(zero, sumA), vuint32_16);      const vector signed int tB = vec_sl(vec_nor(zero, sumB), vuint32_16);      const vector signed int tC = vec_sl(vec_nor(zero, sumC), vuint32_16);      const vector signed int t2A = vec_or(sumA, tA);      const vector signed int t2B = vec_or(sumB, tB);      const vector signed int t2C = vec_or(sumC, tC);      const vector signed int t3A = vec_and(vec_sra(t2A, vuint32_1),                                            vec_sl(t2A, vuint32_1));      const vector signed int t3B = vec_and(vec_sra(t2B, vuint32_1),                                            vec_sl(t2B, vuint32_1));      const vector signed int t3C = vec_and(vec_sra(t2C, vuint32_1),                                            vec_sl(t2C, vuint32_1));      const vector signed int yA = vec_and(t2A, t3A);      const vector signed int yB = vec_and(t2B, t3B);      const vector signed int yC = vec_and(t2C, t3C);      const vector unsigned char strangeperm1 = vec_lvsl(4, (unsigned char*)0);      const vector unsigned char strangeperm2 = vec_lvsl(8, (unsigned char*)0);      const vector signed int sumAd4 = vec_perm(yA, yB, strangeperm1);      const vector signed int sumAd8 = vec_perm(yA, yB, strangeperm2);      const vector signed int sumBd4 = vec_perm(yB, yC, strangeperm1);      const vector signed int sumBd8 = vec_perm(yB, yC, strangeperm2);      const vector signed int sumAp = vec_and(yA,                                              vec_and(sumAd4,sumAd8));      const vector signed int sumBp = vec_and(yB,                                              vec_and(sumBd4,sumBd8));      sumA2 = vec_or(sumAp,                     vec_sra(sumAp,                             vuint32_16));      sumB2  = vec_or(sumBp,                      vec_sra(sumBp,                              vuint32_16));    }    vec_st(sumA2, 0, S);    vec_st(sumB2, 16, S);  }  /* I'm not sure the following is actually faster     than straight, unvectorized C code :-( */  int __attribute__((aligned(16))) tQP2[4];  tQP2[0]= c->QP/2 + 1;  vector signed int vQP2 = vec_ld(0, tQP2);  vQP2 = vec_splat(vQP2, 0);  const vector unsigned char vuint8_2 = vec_splat_u8(2);  const vector signed int vsint32_8 = vec_splat_s32(8);  const vector unsigned int vuint32_4 = vec_splat_u32(4);  const vector unsigned char permA1 = (vector unsigned char)    AVV(0x00, 0x01, 0x02, 0x10, 0x11, 0x12, 0x1F, 0x1F,        0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F);  const vector unsigned char permA2 = (vector unsigned char)    AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x10, 0x11,        0x12, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F);  const vector unsigned char permA1inc = (vector unsigned char)    AVV(0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00,        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);  const vector unsigned char permA2inc = (vector unsigned char)    AVV(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,        0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);  const vector unsigned char magic = (vector unsigned char)    AVV(0x01, 0x02, 0x01, 0x02, 0x04, 0x02, 0x01, 0x02,        0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);  const vector unsigned char extractPerm = (vector unsigned char)    AVV(0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01,        0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01);  const vector unsigned char extractPermInc = (vector unsigned char)    AVV(0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01,        0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01);  const vector unsigned char identity = vec_lvsl(0,(unsigned char *)0);  const vector unsigned char tenRight = (vector unsigned char)    AVV(0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);  const vector unsigned char eightLeft = (vector unsigned char)    AVV(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08);#define F_INIT(i)                                       \  vector unsigned char tenRightM##i = tenRight;         \  vector unsigned char permA1M##i = permA1;             \  vector unsigned char permA2M##i = permA2;             \  vector unsigned char extractPermM##i = extractPerm#define F2(i, j, k, l)                                                  \  if (S[i] & (1 << (l+1))) {                                            \    const vector unsigned char a_##j##_A##l =                           \      vec_perm(src##i, src##j, permA1M##i);                             \    const vector unsigned char a_##j##_B##l =                           \      vec_perm(a_##j##_A##l, src##k, permA2M##i);                       \    const vector signed int a_##j##_sump##l =                           \      (vector signed int)vec_msum(a_##j##_B##l, magic,                  \                                  (vector unsigned int)zero);           \    vector signed int F_##j##_##l =                                     \      vec_sr(vec_sums(a_##j##_sump##l, vsint32_8), vuint32_4);          \    F_##j##_##l = vec_splat(F_##j##_##l, 3);                            \    const vector signed int p_##j##_##l =                               \      (vector signed int)vec_perm(src##j,                               \                                  (vector unsigned char)zero,           \                                  extractPermM##i);                     \    const vector signed int sum_##j##_##l = vec_add( p_##j##_##l, vQP2);\    const vector signed int diff_##j##_##l = vec_sub( p_##j##_##l, vQP2);\    vector signed int newpm_##j##_##l;                                  \    if (vec_all_lt(sum_##j##_##l, F_##j##_##l))                         \      newpm_##j##_##l = sum_##j##_##l;                                  \    else if (vec_all_gt(diff_##j##_##l, F_##j##_##l))                   \      newpm_##j##_##l = diff_##j##_##l;                                 \    else newpm_##j##_##l = F_##j##_##l;                                 \    const vector unsigned char newpm2_##j##_##l =                       \      vec_splat((vector unsigned char)newpm_##j##_##l, 15);             \    const vector unsigned char mask##j##l = vec_add(identity,           \                                                    tenRightM##i);      \    src##j = vec_perm(src##j, newpm2_##j##_##l, mask##j##l);            \  }                                                                     \  permA1M##i = vec_add(permA1M##i, permA1inc);                          \  permA2M##i = vec_add(permA2M##i, permA2inc);                          \  tenRightM##i = vec_sro(tenRightM##i, eightLeft);                      \  extractPermM##i = vec_add(extractPermM##i, extractPermInc)#define ITER(i, j, k)                           \  F_INIT(i);                                    \  F2(i, j, k, 0);                               \  F2(i, j, k, 1);                               \  F2(i, j, k, 2);                               \  F2(i, j, k, 3);                               \  F2(i, j, k, 4);                               \  F2(i, j, k, 5);                               \  F2(i, j, k, 6);                               \  F2(i, j, k, 7)  ITER(0, 1, 2);  ITER(1, 2, 3);  ITER(2, 3, 4);  ITER(3, 4, 5);  ITER(4, 5, 6);  ITER(5, 6, 7);  ITER(6, 7, 8);  ITER(7, 8, 9);  const vector signed char neg1 = vec_splat_s8(-1);#define STORE_LINE(i)                                   \  const vector unsigned char permST##i =                \    vec_lvsr(i * stride, srcCopy);                      \  const vector unsigned char maskST##i =                \    vec_perm((vector unsigned char)zero,                \             (vector unsigned char)neg1, permST##i);    \  src##i = vec_perm(src##i ,src##i, permST##i);         \  sA##i= vec_sel(sA##i, src##i, maskST##i);             \  sB##i= vec_sel(src##i, sB##i, maskST##i);             \  vec_st(sA##i, i * stride, srcCopy);                   \  vec_st(sB##i, i * stride + 16, srcCopy)  STORE_LINE(1);  STORE_LINE(2);  STORE_LINE(3);  STORE_LINE(4);  STORE_LINE(5);  STORE_LINE(6);  STORE_LINE(7);  STORE_LINE(8);#undef STORE_LINE#undef ITER#undef F2}#define doHorizLowPass_altivec(a...) doHorizLowPass_C(a)#define doHorizDefFilter_altivec(a...) doHorizDefFilter_C(a)#define do_a_deblock_altivec(a...) do_a_deblock_C(a)static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,                                    uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise){  const vector signed int zero = vec_splat_s32(0);  const vector signed short vsint16_1 = vec_splat_s16(1);  vector signed int v_dp = zero;  vector signed int v_sysdp = zero;  int d, sysd, i;  tempBluredPast[127]= maxNoise[0];  tempBluredPast[128]= maxNoise[1];  tempBluredPast[129]= maxNoise[2];#define LOAD_LINE(src, i)                                               \  register int j##src##i = i * stride;                                  \  vector unsigned char perm##src##i = vec_lvsl(j##src##i, src);         \  const vector unsigned char v_##src##A1##i = vec_ld(j##src##i, src);   \  const vector unsigned char v_##src##A2##i = vec_ld(j##src##i + 16, src); \  const vector unsigned char v_##src##A##i =                            \    vec_perm(v_##src##A1##i, v_##src##A2##i, perm##src##i);             \  vector signed short v_##src##Ass##i =                                 \    (vector signed short)vec_mergeh((vector signed char)zero,           \                                    (vector signed char)v_##src##A##i)  LOAD_LINE(src, 0);  LOAD_LINE(src, 1);  LOAD_LINE(src, 2);  LOAD_LINE(src, 3);  LOAD_LINE(src, 4);  LOAD_LINE(src, 5);  LOAD_LINE(src, 6);  LOAD_LINE(src, 7);  LOAD_LINE(tempBlured, 0);  LOAD_LINE(tempBlured, 1);  LOAD_LINE(tempBlured, 2);  LOAD_LINE(tempBlured, 3);  LOAD_LINE(tempBlured, 4);  LOAD_LINE(tempBlured, 5);  LOAD_LINE(tempBlured, 6);  LOAD_LINE(tempBlured, 7);#undef LOAD_LINE#define ACCUMULATE_DIFFS(i)                                     \  vector signed short v_d##i = vec_sub(v_tempBluredAss##i,      \                                       v_srcAss##i);            \  v_dp = vec_msums(v_d##i, v_d##i, v_dp);                       \  v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp)  ACCUMULATE_DIFFS(0);  ACCUMULATE_DIFFS(1);  ACCUMULATE_DIFFS(2);  ACCUMULATE_DIFFS(3);  ACCUMULATE_DIFFS(4);  ACCUMULATE_DIFFS(5);  ACCUMULATE_DIFFS(6);  ACCUMULATE_DIFFS(7);#undef ACCUMULATE_DIFFS  v_dp = vec_sums(v_dp, zero);  v_sysdp = vec_sums(v_sysdp, zero);  v_dp = vec_splat(v_dp, 3);  v_sysdp = vec_splat(v_sysdp, 3);  vec_ste(v_dp, 0, &d);  vec_ste(v_sysdp, 0, &sysd);  i = d;  d = (4*d       +(*(tempBluredPast-256))       +(*(tempBluredPast-1))+ (*(tempBluredPast+1))       +(*(tempBluredPast+256))       +4)>>3;  *tempBluredPast=i;  if (d > maxNoise[1]) {    if (d < maxNoise[2]) {#define OP(i) v_tempBluredAss##i = vec_avg(v_tempBluredAss##i, v_srcAss##i);      OP(0);      OP(1);      OP(2);      OP(3);      OP(4);      OP(5);      OP(6);      OP(7);#undef OP    } else {
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -