📄 postprocess_altivec_template.c

📁 ffmpeg源码分析
💻 C
📖 第 1 页 / 共 4 页
字号:
  const vector signed short temp71 = vec_sub(v_sumsB6, vb3);  const vector signed short v_sumsB7 = vec_add(temp71, v_last);  const vector signed short temp81 = vec_sub(v_sumsB7, vb4);  const vector signed short v_sumsB8 = vec_add(temp81, v_last);  const vector signed short temp91 = vec_sub(v_sumsB8, vb5);  const vector signed short v_sumsB9 = vec_add(temp91, v_last);#define COMPUTE_VR(i, j, k)                                             \  const vector signed short temps1##i =                                 \    vec_add(v_sumsB##i, v_sumsB##k);                                    \  const vector signed short temps2##i =                                 \    vec_mladd(vb##j, (vector signed short)v_2, temps1##i);              \  const vector signed short  vr##j = vec_sra(temps2##i, v_4)  COMPUTE_VR(0, 1, 2);  COMPUTE_VR(1, 2, 3);  COMPUTE_VR(2, 3, 4);  COMPUTE_VR(3, 4, 5);  COMPUTE_VR(4, 5, 6);  COMPUTE_VR(5, 6, 7);  COMPUTE_VR(6, 7, 8);  COMPUTE_VR(7, 8, 9);  const vector signed char neg1 = vec_splat_s8(-1);  const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,                                                                      0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);#define PACK_AND_STORE(i)                                       \  const vector unsigned char perms##i =                         \    vec_lvsr(i * stride, src2);                                 \  const vector unsigned char vf##i =                            \    vec_packsu(vr##i, (vector signed short)zero);               \  const vector unsigned char vg##i =                            \    vec_perm(vf##i, vbT##i, permHH);                            \  const vector unsigned char mask##i =                          \    vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \  const vector unsigned char vg2##i =                           \    vec_perm(vg##i, vg##i, perms##i);                           \  const vector unsigned char svA##i =                           \    vec_sel(vbA##i, vg2##i, mask##i);                           \  const vector unsigned char svB##i =                           \    vec_sel(vg2##i, vbB##i, mask##i);                           \  vec_st(svA##i, i * stride, src2);                             \  vec_st(svB##i, i * stride + 16, src2)#define PACK_AND_STORE_ALIGNED(i)                               \  const vector unsigned char vf##i =                            \    vec_packsu(vr##i, (vector signed short)zero);               \  const vector unsigned char vg##i =                            \    vec_perm(vf##i, vbT##i, permHH);                            \  vec_st(vg##i, i * stride, src2)  // special casing the aligned case is worthwhile, as all call from  // the (transposed) horizontable deblocks will be aligned, in addition  // to the naturraly aligned vertical deblocks.  if (properStride && srcAlign) {    PACK_AND_STORE_ALIGNED(1);    PACK_AND_STORE_ALIGNED(2);    PACK_AND_STORE_ALIGNED(3);    PACK_AND_STORE_ALIGNED(4);    PACK_AND_STORE_ALIGNED(5);    PACK_AND_STORE_ALIGNED(6);    PACK_AND_STORE_ALIGNED(7);    PACK_AND_STORE_ALIGNED(8);  } else {    PACK_AND_STORE(1);    PACK_AND_STORE(2);    PACK_AND_STORE(3);    PACK_AND_STORE(4);    PACK_AND_STORE(5);    PACK_AND_STORE(6);    PACK_AND_STORE(7);    PACK_AND_STORE(8);  }#undef PACK_AND_STORE#undef PACK_AND_STORE_ALIGNED}static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext *c) {  /*    this code makes no assumption on src or stride.    One could remove the recomputation of the perm    vector by assuming (stride % 16) == 0, unfortunately    this is not always true. Quite a lot of load/stores    can be removed by assuming proper alignement of    src & stride :-(  */  uint8_t *src2 = src;  const vector signed int zero = vec_splat_s32(0);  short __attribute__ ((aligned(16))) qp[8];  qp[0] = 8*c->QP;  vector signed short vqp = vec_ld(0, qp);  vqp = vec_splat(vqp, 0);#define LOAD_LINE(i)                                                    \  const vector unsigned char perm##i =                                  \    vec_lvsl(i * stride, src2);                                         \  const vector unsigned char vbA##i =                                   \    vec_ld(i * stride, src2);                                           \  const vector unsigned char vbB##i =                                   \    vec_ld(i * stride + 16, src2);                                      \  const vector unsigned char vbT##i =                                   \    vec_perm(vbA##i, vbB##i, perm##i);                                  \  const vector signed short vb##i =                                     \    (vector signed short)vec_mergeh((vector unsigned char)zero,         \                                    (vector unsigned char)vbT##i)  src2 += stride*3;  LOAD_LINE(1);  LOAD_LINE(2);  LOAD_LINE(3);  LOAD_LINE(4);  LOAD_LINE(5);  LOAD_LINE(6);  LOAD_LINE(7);  LOAD_LINE(8);#undef LOAD_LINE  const vector signed short v_1 = vec_splat_s16(1);  const vector signed short v_2 = vec_splat_s16(2);  const vector signed short v_5 = vec_splat_s16(5);  const vector signed short v_32 = vec_sl(v_1,                                          (vector unsigned short)v_5);  /* middle energy */  const vector signed short l3minusl6 = vec_sub(vb3, vb6);  const vector signed short l5minusl4 = vec_sub(vb5, vb4);  const vector signed short twotimes_l3minusl6 = vec_mladd(v_2, l3minusl6, (vector signed short)zero);  const vector signed short mE = vec_mladd(v_5, l5minusl4, twotimes_l3minusl6);  const vector signed short absmE = vec_abs(mE);  /* left & right energy */  const vector signed short l1minusl4 = vec_sub(vb1, vb4);  const vector signed short l3minusl2 = vec_sub(vb3, vb2);  const vector signed short l5minusl8 = vec_sub(vb5, vb8);  const vector signed short l7minusl6 = vec_sub(vb7, vb6);  const vector signed short twotimes_l1minusl4 = vec_mladd(v_2, l1minusl4, (vector signed short)zero);  const vector signed short twotimes_l5minusl8 = vec_mladd(v_2, l5minusl8, (vector signed short)zero);  const vector signed short lE = vec_mladd(v_5, l3minusl2, twotimes_l1minusl4);  const vector signed short rE = vec_mladd(v_5, l7minusl6, twotimes_l5minusl8);  /* d */  const vector signed short ddiff = vec_sub(absmE,                                            vec_min(vec_abs(lE),                                                    vec_abs(rE)));  const vector signed short ddiffclamp = vec_max(ddiff, (vector signed short)zero);  const vector signed short dtimes64 = vec_mladd(v_5, ddiffclamp, v_32);  const vector signed short d = vec_sra(dtimes64, vec_splat_u16(6));  const vector signed short minusd = vec_sub((vector signed short)zero, d);  const vector signed short finald = vec_sel(minusd,                                             d,                                             vec_cmpgt(vec_sub((vector signed short)zero, mE),                                                       (vector signed short)zero));  /* q */  const vector signed short qtimes2 = vec_sub(vb4, vb5);  /* for a shift right to behave like /2, we need to add one     to all negative integer */  const vector signed short rounddown = vec_sel((vector signed short)zero,                                                v_1,                                                vec_cmplt(qtimes2, (vector signed short)zero));  const vector signed short q = vec_sra(vec_add(qtimes2, rounddown), vec_splat_u16(1));  /* clamp */  const vector signed short dclamp_P1 = vec_max((vector signed short)zero, finald);  const vector signed short dclamp_P = vec_min(dclamp_P1, q);  const vector signed short dclamp_N1 = vec_min((vector signed short)zero, finald);  const vector signed short dclamp_N = vec_max(dclamp_N1, q);  const vector signed short dclampedfinal = vec_sel(dclamp_N,                                                    dclamp_P,                                                    vec_cmpgt(q, (vector signed short)zero));  const vector signed short dornotd = vec_sel((vector signed short)zero,                                              dclampedfinal,                                              vec_cmplt(absmE, vqp));  /* add/substract to l4 and l5 */  const vector signed short vb4minusd = vec_sub(vb4, dornotd);  const vector signed short vb5plusd = vec_add(vb5, dornotd);  /* finally, stores */  const vector unsigned char st4 = vec_packsu(vb4minusd, (vector signed short)zero);  const vector unsigned char st5 = vec_packsu(vb5plusd, (vector signed short)zero);  const vector signed char neg1 = vec_splat_s8(-1);  const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,                                                                      0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);#define STORE(i)                                                \  const vector unsigned char perms##i =                         \    vec_lvsr(i * stride, src2);                                 \  const vector unsigned char vg##i =                            \    vec_perm(st##i, vbT##i, permHH);                            \  const vector unsigned char mask##i =                          \    vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \  const vector unsigned char vg2##i =                           \    vec_perm(vg##i, vg##i, perms##i);                           \  const vector unsigned char svA##i =                           \    vec_sel(vbA##i, vg2##i, mask##i);                           \  const vector unsigned char svB##i =                           \    vec_sel(vg2##i, vbB##i, mask##i);                           \  vec_st(svA##i, i * stride, src2);                             \  vec_st(svB##i, i * stride + 16, src2)  STORE(4);  STORE(5);}static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {  /*    this code makes no assumption on src or stride.    One could remove the recomputation of the perm    vector by assuming (stride % 16) == 0, unfortunately    this is not always true. Quite a lot of load/stores    can be removed by assuming proper alignement of    src & stride :-(  */  uint8_t *srcCopy = src;  uint8_t __attribute__((aligned(16))) dt[16];  const vector unsigned char vuint8_1 = vec_splat_u8(1);  const vector signed int zero = vec_splat_s32(0);  vector unsigned char v_dt;  dt[0] = deringThreshold;  v_dt = vec_splat(vec_ld(0, dt), 0);#define LOAD_LINE(i)                                                    \  const vector unsigned char perm##i =                                  \    vec_lvsl(i * stride, srcCopy);                                      \  vector unsigned char sA##i = vec_ld(i * stride, srcCopy);             \  vector unsigned char sB##i = vec_ld(i * stride + 16, srcCopy);        \  vector unsigned char src##i = vec_perm(sA##i, sB##i, perm##i)  LOAD_LINE(0);  LOAD_LINE(1);  LOAD_LINE(2);  LOAD_LINE(3);  LOAD_LINE(4);  LOAD_LINE(5);  LOAD_LINE(6);  LOAD_LINE(7);  LOAD_LINE(8);  LOAD_LINE(9);#undef LOAD_LINE  vector unsigned char v_avg;  {    const vector unsigned char trunc_perm = (vector unsigned char)      AVV(0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,          0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18);    const vector unsigned char trunc_src12 = vec_perm(src1, src2, trunc_perm);    const vector unsigned char trunc_src34 = vec_perm(src3, src4, trunc_perm);    const vector unsigned char trunc_src56 = vec_perm(src5, src6, trunc_perm);    const vector unsigned char trunc_src78 = vec_perm(src7, src8, trunc_perm);#define EXTRACT(op) do {                                                \      const vector unsigned char s##op##_1 = vec_##op(trunc_src12, trunc_src34); \      const vector unsigned char s##op##_2 = vec_##op(trunc_src56, trunc_src78); \      const vector unsigned char s##op##_6 = vec_##op(s##op##_1, s##op##_2); \      const vector unsigned char s##op##_8h = vec_mergeh(s##op##_6, s##op##_6); \      const vector unsigned char s##op##_8l = vec_mergel(s##op##_6, s##op##_6); \      const vector unsigned char s##op##_9 = vec_##op(s##op##_8h, s##op##_8l); \      const vector unsigned char s##op##_9h = vec_mergeh(s##op##_9, s##op##_9); \      const vector unsigned char s##op##_9l = vec_mergel(s##op##_9, s##op##_9); \      const vector unsigned char s##op##_10 = vec_##op(s##op##_9h, s##op##_9l); \      const vector unsigned char s##op##_10h = vec_mergeh(s##op##_10, s##op##_10); \      const vector unsigned char s##op##_10l = vec_mergel(s##op##_10, s##op##_10); \      const vector unsigned char s##op##_11 = vec_##op(s##op##_10h, s##op##_10l); \      const vector unsigned char s##op##_11h = vec_mergeh(s##op##_11, s##op##_11); \      const vector unsigned char s##op##_11l = vec_mergel(s##op##_11, s##op##_11); \      v_##op = vec_##op(s##op##_11h, s##op##_11l); } while (0)    vector unsigned char v_min;    vector unsigned char v_max;    EXTRACT(min);    EXTRACT(max);#undef EXTRACT    if (vec_all_lt(vec_sub(v_max, v_min), v_dt))      return;    v_avg = vec_avg(v_min, v_max);  }  signed int __attribute__((aligned(16))) S[8];  {    const vector unsigned short mask1 = (vector unsigned short)      AVV(0x0001, 0x0002, 0x0004, 0x0008,          0x0010, 0x0020, 0x0040, 0x0080);    const vector unsigned short mask2 = (vector unsigned short)      AVV(0x0100, 0x0200, 0x0000, 0x0000,          0x0000, 0x0000, 0x0000, 0x0000);    const vector unsigned int vuint32_16 = vec_sl(vec_splat_u32(1), vec_splat_u32(4));    const vector unsigned int vuint32_1 = vec_splat_u32(1);#define COMPARE(i)                                                      \    vector signed int sum##i;                                           \    do {                                                                \      const vector unsigned char cmp##i =                               \        (vector unsigned char)vec_cmpgt(src##i, v_avg);                 \      const vector unsigned short cmpHi##i =                            \
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -