📄 postprocess_altivec_template.c

📁 ffmpeg移植到symbian的全部源代码
💻 C
📖 第 1 页 / 共 4 页
字号:
            0x0010, 0x0020, 0x0040, 0x0080);    const vector unsigned short mask2 = (vector unsigned short)        AVV(0x0100, 0x0200, 0x0000, 0x0000,            0x0000, 0x0000, 0x0000, 0x0000);    const vector unsigned int vuint32_16 = vec_sl(vec_splat_u32(1), vec_splat_u32(4));    const vector unsigned int vuint32_1 = vec_splat_u32(1);#define COMPARE(i)                                                      \    vector signed int sum##i;                                           \    do {                                                                \        const vector unsigned char cmp##i =                             \            (vector unsigned char)vec_cmpgt(src##i, v_avg);             \        const vector unsigned short cmpHi##i =                          \            (vector unsigned short)vec_mergeh(cmp##i, cmp##i);          \        const vector unsigned short cmpLi##i =                          \            (vector unsigned short)vec_mergel(cmp##i, cmp##i);          \        const vector signed short cmpHf##i =                            \            (vector signed short)vec_and(cmpHi##i, mask1);              \        const vector signed short cmpLf##i =                            \            (vector signed short)vec_and(cmpLi##i, mask2);              \        const vector signed int sump##i = vec_sum4s(cmpHf##i, zero);    \        const vector signed int sumq##i = vec_sum4s(cmpLf##i, sump##i); \        sum##i  = vec_sums(sumq##i, zero); } while (0)    COMPARE(0);    COMPARE(1);    COMPARE(2);    COMPARE(3);    COMPARE(4);    COMPARE(5);    COMPARE(6);    COMPARE(7);    COMPARE(8);    COMPARE(9);#undef COMPARE    vector signed int sumA2;    vector signed int sumB2;    {    const vector signed int sump02 = vec_mergel(sum0, sum2);    const vector signed int sump13 = vec_mergel(sum1, sum3);    const vector signed int sumA = vec_mergel(sump02, sump13);    const vector signed int sump46 = vec_mergel(sum4, sum6);    const vector signed int sump57 = vec_mergel(sum5, sum7);    const vector signed int sumB = vec_mergel(sump46, sump57);    const vector signed int sump8A = vec_mergel(sum8, zero);    const vector signed int sump9B = vec_mergel(sum9, zero);    const vector signed int sumC = vec_mergel(sump8A, sump9B);    const vector signed int tA = vec_sl(vec_nor(zero, sumA), vuint32_16);    const vector signed int tB = vec_sl(vec_nor(zero, sumB), vuint32_16);    const vector signed int tC = vec_sl(vec_nor(zero, sumC), vuint32_16);    const vector signed int t2A = vec_or(sumA, tA);    const vector signed int t2B = vec_or(sumB, tB);    const vector signed int t2C = vec_or(sumC, tC);    const vector signed int t3A = vec_and(vec_sra(t2A, vuint32_1),                                          vec_sl(t2A, vuint32_1));    const vector signed int t3B = vec_and(vec_sra(t2B, vuint32_1),                                          vec_sl(t2B, vuint32_1));    const vector signed int t3C = vec_and(vec_sra(t2C, vuint32_1),                                          vec_sl(t2C, vuint32_1));    const vector signed int yA = vec_and(t2A, t3A);    const vector signed int yB = vec_and(t2B, t3B);    const vector signed int yC = vec_and(t2C, t3C);    const vector unsigned char strangeperm1 = vec_lvsl(4, (unsigned char*)0);    const vector unsigned char strangeperm2 = vec_lvsl(8, (unsigned char*)0);    const vector signed int sumAd4 = vec_perm(yA, yB, strangeperm1);    const vector signed int sumAd8 = vec_perm(yA, yB, strangeperm2);    const vector signed int sumBd4 = vec_perm(yB, yC, strangeperm1);    const vector signed int sumBd8 = vec_perm(yB, yC, strangeperm2);    const vector signed int sumAp = vec_and(yA,                                            vec_and(sumAd4,sumAd8));    const vector signed int sumBp = vec_and(yB,                                            vec_and(sumBd4,sumBd8));    sumA2 = vec_or(sumAp,                   vec_sra(sumAp,                           vuint32_16));    sumB2  = vec_or(sumBp,                    vec_sra(sumBp,                            vuint32_16));    }    vec_st(sumA2, 0, S);    vec_st(sumB2, 16, S);    }    /* I'm not sure the following is actually faster       than straight, unvectorized C code :-( */    DECLARE_ALIGNED(16, int, tQP2[4]);    tQP2[0]= c->QP/2 + 1;    vector signed int vQP2 = vec_ld(0, tQP2);    vQP2 = vec_splat(vQP2, 0);    const vector signed int vsint32_8 = vec_splat_s32(8);    const vector unsigned int vuint32_4 = vec_splat_u32(4);    const vector unsigned char permA1 = (vector unsigned char)        AVV(0x00, 0x01, 0x02, 0x10, 0x11, 0x12, 0x1F, 0x1F,            0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F);    const vector unsigned char permA2 = (vector unsigned char)        AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x10, 0x11,            0x12, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F);    const vector unsigned char permA1inc = (vector unsigned char)        AVV(0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00,            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);    const vector unsigned char permA2inc = (vector unsigned char)        AVV(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,            0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);    const vector unsigned char magic = (vector unsigned char)        AVV(0x01, 0x02, 0x01, 0x02, 0x04, 0x02, 0x01, 0x02,            0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);    const vector unsigned char extractPerm = (vector unsigned char)        AVV(0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01,            0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01);    const vector unsigned char extractPermInc = (vector unsigned char)        AVV(0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01,            0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01);    const vector unsigned char identity = vec_lvsl(0,(unsigned char *)0);    const vector unsigned char tenRight = (vector unsigned char)        AVV(0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);    const vector unsigned char eightLeft = (vector unsigned char)        AVV(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08);#define F_INIT(i)                                       \    vector unsigned char tenRightM##i = tenRight;       \    vector unsigned char permA1M##i = permA1;           \    vector unsigned char permA2M##i = permA2;           \    vector unsigned char extractPermM##i = extractPerm#define F2(i, j, k, l)                                                  \    if (S[i] & (1 << (l+1))) {                                          \        const vector unsigned char a_##j##_A##l =                       \            vec_perm(src##i, src##j, permA1M##i);                       \        const vector unsigned char a_##j##_B##l =                       \            vec_perm(a_##j##_A##l, src##k, permA2M##i);                 \        const vector signed int a_##j##_sump##l =                       \            (vector signed int)vec_msum(a_##j##_B##l, magic,            \                                        (vector unsigned int)zero);     \        vector signed int F_##j##_##l =                                 \            vec_sr(vec_sums(a_##j##_sump##l, vsint32_8), vuint32_4);    \        F_##j##_##l = vec_splat(F_##j##_##l, 3);                        \        const vector signed int p_##j##_##l =                           \            (vector signed int)vec_perm(src##j,                         \                                        (vector unsigned char)zero,     \                                        extractPermM##i);               \        const vector signed int sum_##j##_##l  = vec_add( p_##j##_##l, vQP2);\        const vector signed int diff_##j##_##l = vec_sub( p_##j##_##l, vQP2);\        vector signed int newpm_##j##_##l;                              \        if (vec_all_lt(sum_##j##_##l, F_##j##_##l))                     \            newpm_##j##_##l = sum_##j##_##l;                            \        else if (vec_all_gt(diff_##j##_##l, F_##j##_##l))               \            newpm_##j##_##l = diff_##j##_##l;                           \        else newpm_##j##_##l = F_##j##_##l;                             \        const vector unsigned char newpm2_##j##_##l =                   \            vec_splat((vector unsigned char)newpm_##j##_##l, 15);       \        const vector unsigned char mask##j##l = vec_add(identity,       \                                                        tenRightM##i);  \        src##j = vec_perm(src##j, newpm2_##j##_##l, mask##j##l);        \    }                                                                   \    permA1M##i = vec_add(permA1M##i, permA1inc);                        \    permA2M##i = vec_add(permA2M##i, permA2inc);                        \    tenRightM##i = vec_sro(tenRightM##i, eightLeft);                    \    extractPermM##i = vec_add(extractPermM##i, extractPermInc)#define ITER(i, j, k)                           \    F_INIT(i);                                  \    F2(i, j, k, 0);                             \    F2(i, j, k, 1);                             \    F2(i, j, k, 2);                             \    F2(i, j, k, 3);                             \    F2(i, j, k, 4);                             \    F2(i, j, k, 5);                             \    F2(i, j, k, 6);                             \    F2(i, j, k, 7)    ITER(0, 1, 2);    ITER(1, 2, 3);    ITER(2, 3, 4);    ITER(3, 4, 5);    ITER(4, 5, 6);    ITER(5, 6, 7);    ITER(6, 7, 8);    ITER(7, 8, 9);    const vector signed char neg1 = vec_splat_s8(-1);#define STORE_LINE(i)                                   \    const vector unsigned char permST##i =              \        vec_lvsr(i * stride, srcCopy);                  \    const vector unsigned char maskST##i =              \        vec_perm((vector unsigned char)zero,            \                 (vector unsigned char)neg1, permST##i);\    src##i = vec_perm(src##i ,src##i, permST##i);       \    sA##i= vec_sel(sA##i, src##i, maskST##i);           \    sB##i= vec_sel(src##i, sB##i, maskST##i);           \    vec_st(sA##i, i * stride, srcCopy);                 \    vec_st(sB##i, i * stride + 16, srcCopy)    STORE_LINE(1);    STORE_LINE(2);    STORE_LINE(3);    STORE_LINE(4);    STORE_LINE(5);    STORE_LINE(6);    STORE_LINE(7);    STORE_LINE(8);#undef STORE_LINE#undef ITER#undef F2}#define doHorizLowPass_altivec(a...) doHorizLowPass_C(a)#define doHorizDefFilter_altivec(a...) doHorizDefFilter_C(a)#define do_a_deblock_altivec(a...) do_a_deblock_C(a)static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,                                            uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise){    const vector signed int zero = vec_splat_s32(0);    const vector signed short vsint16_1 = vec_splat_s16(1);    vector signed int v_dp = zero;    vector signed int v_sysdp = zero;    int d, sysd, i;    tempBlurredPast[127]= maxNoise[0];    tempBlurredPast[128]= maxNoise[1];    tempBlurredPast[129]= maxNoise[2];#define LOAD_LINE(src, i)                                               \    register int j##src##i = i * stride;                                \    vector unsigned char perm##src##i = vec_lvsl(j##src##i, src);       \    const vector unsigned char v_##src##A1##i = vec_ld(j##src##i, src); \    const vector unsigned char v_##src##A2##i = vec_ld(j##src##i + 16, src); \    const vector unsigned char v_##src##A##i =                          \        vec_perm(v_##src##A1##i, v_##src##A2##i, perm##src##i);         \    vector signed short v_##src##Ass##i =                               \        (vector signed short)vec_mergeh((vector signed char)zero,       \                                        (vector signed char)v_##src##A##i)    LOAD_LINE(src, 0);    LOAD_LINE(src, 1);    LOAD_LINE(src, 2);    LOAD_LINE(src, 3);    LOAD_LINE(src, 4);    LOAD_LINE(src, 5);    LOAD_LINE(src, 6);    LOAD_LINE(src, 7);    LOAD_LINE(tempBlurred, 0);    LOAD_LINE(tempBlurred, 1);    LOAD_LINE(tempBlurred, 2);    LOAD_LINE(tempBlurred, 3);    LOAD_LINE(tempBlurred, 4);    LOAD_LINE(tempBlurred, 5);    LOAD_LINE(tempBlurred, 6);    LOAD_LINE(tempBlurred, 7);#undef LOAD_LINE#define ACCUMULATE_DIFFS(i)                                     \    vector signed short v_d##i = vec_sub(v_tempBlurredAss##i,   \                                         v_srcAss##i);          \    v_dp = vec_msums(v_d##i, v_d##i, v_dp);                     \    v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp)    ACCUMULATE_DIFFS(0);    ACCUMULATE_DIFFS(1);    ACCUMULATE_DIFFS(2);    ACCUMULATE_DIFFS(3);    ACCUMULATE_DIFFS(4);    ACCUMULATE_DIFFS(5);    ACCUMULATE_DIFFS(6);    ACCUMULATE_DIFFS(7);#undef ACCUMULATE_DIFFS    v_dp = vec_sums(v_dp, zero);    v_sysdp = vec_sums(v_sysdp, zero);    v_dp = vec_splat(v_dp, 3);    v_sysdp = vec_splat(v_sysdp, 3);    vec_ste(v_dp, 0, &d);    vec_ste(v_sysdp, 0, &sysd);    i = d;    d = (4*d         +(*(tempBlurredPast-256))         +(*(tempBlurredPast-1))+ (*(tempBlurredPast+1))         +(*(tempBlurredPast+256))         +4)>>3;    *tempBlurredPast=i;    if (d > maxNoise[1]) {        if (d < maxNoise[2]) {#define OP(i) v_tempBlurredAss##i = vec_avg(v_tempBlurredAss##i, v_srcAss##i);            OP(0);
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -