⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 postprocess_template.c

📁 ffmpeg的完整源代码和作者自己写的文档。不但有在Linux的工程哦
💻 C
📖 第 1 页 / 共 5 页
字号:
                PAVGB(%%mm1, %%mm3)                           // ~(l0-l3)/4 +(l2-l1)/8 + 128
                PAVGB(%%mm2, %%mm3)                           // ~(l0-l3)/8 +5(l2-l1)/16 + 128
// mm0=128-q, mm3=lenergy/16 + 128, mm4= menergy/16 + 128, mm5= -l5-1

                PAVGB((%%REGc, %1), %%mm5)                    // (l6-l5+256)/2
                "movq (%%"REG_c", %1, 2), %%mm1         \n\t" // l7
                "pxor %%mm6, %%mm1                      \n\t" // -l7-1
                PAVGB((%0, %1, 4), %%mm1)                     // (l4-l7+256)/2
                "movq "MANGLE(b80)", %%mm2              \n\t" // 128
                PAVGB(%%mm5, %%mm2)                           // ~(l6-l5)/4 + 128
                PAVGB(%%mm1, %%mm2)                           // ~(l4-l7)/4 +(l6-l5)/8 + 128
                PAVGB(%%mm5, %%mm2)                           // ~(l4-l7)/8 +5(l6-l5)/16 + 128
// mm0=128-q, mm2=renergy/16 + 128, mm3=lenergy/16 + 128, mm4= menergy/16 + 128

                "movq "MANGLE(b00)", %%mm1              \n\t" // 0
                "movq "MANGLE(b00)", %%mm5              \n\t" // 0
                "psubb %%mm2, %%mm1                     \n\t" // 128 - renergy/16
                "psubb %%mm3, %%mm5                     \n\t" // 128 - lenergy/16
                PMAXUB(%%mm1, %%mm2)                          // 128 + |renergy/16|
                 PMAXUB(%%mm5, %%mm3)                         // 128 + |lenergy/16|
                PMINUB(%%mm2, %%mm3, %%mm1)                   // 128 + MIN(|lenergy|,|renergy|)/16

// mm0=128-q, mm3=128 + MIN(|lenergy|,|renergy|)/16, mm4= menergy/16 + 128

                "movq "MANGLE(b00)", %%mm7              \n\t" // 0
                "movq %2, %%mm2                         \n\t" // QP
                PAVGB(%%mm6, %%mm2)                           // 128 + QP/2
                "psubb %%mm6, %%mm2                     \n\t"

                "movq %%mm4, %%mm1                      \n\t"
                "pcmpgtb %%mm7, %%mm1                   \n\t" // SIGN(menergy)
                "pxor %%mm1, %%mm4                      \n\t"
                "psubb %%mm1, %%mm4                     \n\t" // 128 + |menergy|/16
                "pcmpgtb %%mm4, %%mm2                   \n\t" // |menergy|/16 < QP/2
                "psubusb %%mm3, %%mm4                   \n\t" //d=|menergy|/16 - MIN(|lenergy|,|renergy|)/16
// mm0=128-q, mm1= SIGN(menergy), mm2= |menergy|/16 < QP/2, mm4= d/16

                "movq %%mm4, %%mm3                      \n\t" // d
                "psubusb "MANGLE(b01)", %%mm4           \n\t"
                PAVGB(%%mm7, %%mm4)                           // d/32
                PAVGB(%%mm7, %%mm4)                           // (d + 32)/64
                "paddb %%mm3, %%mm4                     \n\t" // 5d/64
                "pand %%mm2, %%mm4                      \n\t"

                "movq "MANGLE(b80)", %%mm5              \n\t" // 128
                "psubb %%mm0, %%mm5                     \n\t" // q
                "paddsb %%mm6, %%mm5                    \n\t" // fix bad rounding
                "pcmpgtb %%mm5, %%mm7                   \n\t" // SIGN(q)
                "pxor %%mm7, %%mm5                      \n\t"

                PMINUB(%%mm5, %%mm4, %%mm3)                   // MIN(|q|, 5d/64)
                "pxor %%mm1, %%mm7                      \n\t" // SIGN(d*q)

                "pand %%mm7, %%mm4                      \n\t"
                "movq (%%"REG_a", %1, 2), %%mm0         \n\t"
                "movq (%0, %1, 4), %%mm2                \n\t"
                "pxor %%mm1, %%mm0                      \n\t"
                "pxor %%mm1, %%mm2                      \n\t"
                "paddb %%mm4, %%mm0                     \n\t"
                "psubb %%mm4, %%mm2                     \n\t"
                "pxor %%mm1, %%mm0                      \n\t"
                "pxor %%mm1, %%mm2                      \n\t"
                "movq %%mm0, (%%"REG_a", %1, 2)         \n\t"
                "movq %%mm2, (%0, %1, 4)                \n\t"

                :
                : "r" (src), "r" ((long)stride), "m" (c->pQPb)
                : "%"REG_a, "%"REG_c
        );

/*
        {
        int x;
        src-= stride;
        for(x=0; x<BLOCK_SIZE; x++)
        {
                const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]);
                if(FFABS(middleEnergy)< 8*QP)
                {
                        const int q=(src[l4] - src[l5])/2;
                        const int leftEnergy=  5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]);
                        const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]);

                        int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
                        d= FFMAX(d, 0);

                        d= (5*d + 32) >> 6;
                        d*= FFSIGN(-middleEnergy);

                        if(q>0)
                        {
                                d= d<0 ? 0 : d;
                                d= d>q ? q : d;
                        }
                        else
                        {
                                d= d>0 ? 0 : d;
                                d= d<q ? q : d;
                        }

                        src[l4]-= d;
                        src[l5]+= d;
                }
                src++;
        }
src-=8;
        for(x=0; x<8; x++)
        {
                int y;
                for(y=4; y<6; y++)
                {
                        int d= src[x+y*stride] - tmp[x+(y-4)*8];
                        int ad= FFABS(d);
                        static int max=0;
                        static int sum=0;
                        static int num=0;
                        static int bias=0;

                        if(max<ad) max=ad;
                        sum+= ad>3 ? 1 : 0;
                        if(ad>3)
                        {
                                src[0] = src[7] = src[stride*7] = src[(stride+1)*7]=255;
                        }
                        if(y==4) bias+=d;
                        num++;
                        if(num%1000000 == 0)
                        {
                                av_log(c, AV_LOG_INFO, " %d %d %d %d\n", num, sum, max, bias);
                        }
                }
        }
}
*/
#elif defined (HAVE_MMX)
        src+= stride*4;
        asm volatile(
                "pxor %%mm7, %%mm7                      \n\t"
                "lea -40(%%"REG_SP"), %%"REG_c"         \n\t" // make space for 4 8-byte vars
                "and "ALIGN_MASK", %%"REG_c"            \n\t" // align
//      0       1       2       3       4       5       6       7
//      %0      %0+%1   %0+2%1  eax+2%1 %0+4%1  eax+4%1 edx+%1  edx+2%1
//      %0      eax     eax+%1  eax+2%1 %0+4%1  edx     edx+%1  edx+2%1

                "movq (%0), %%mm0                       \n\t"
                "movq %%mm0, %%mm1                      \n\t"
                "punpcklbw %%mm7, %%mm0                 \n\t" // low part of line 0
                "punpckhbw %%mm7, %%mm1                 \n\t" // high part of line 0

                "movq (%0, %1), %%mm2                   \n\t"
                "lea (%0, %1, 2), %%"REG_a"             \n\t"
                "movq %%mm2, %%mm3                      \n\t"
                "punpcklbw %%mm7, %%mm2                 \n\t" // low part of line 1
                "punpckhbw %%mm7, %%mm3                 \n\t" // high part of line 1

                "movq (%%"REG_a"), %%mm4                \n\t"
                "movq %%mm4, %%mm5                      \n\t"
                "punpcklbw %%mm7, %%mm4                 \n\t" // low part of line 2
                "punpckhbw %%mm7, %%mm5                 \n\t" // high part of line 2

                "paddw %%mm0, %%mm0                     \n\t" // 2L0
                "paddw %%mm1, %%mm1                     \n\t" // 2H0
                "psubw %%mm4, %%mm2                     \n\t" // L1 - L2
                "psubw %%mm5, %%mm3                     \n\t" // H1 - H2
                "psubw %%mm2, %%mm0                     \n\t" // 2L0 - L1 + L2
                "psubw %%mm3, %%mm1                     \n\t" // 2H0 - H1 + H2

                "psllw $2, %%mm2                        \n\t" // 4L1 - 4L2
                "psllw $2, %%mm3                        \n\t" // 4H1 - 4H2
                "psubw %%mm2, %%mm0                     \n\t" // 2L0 - 5L1 + 5L2
                "psubw %%mm3, %%mm1                     \n\t" // 2H0 - 5H1 + 5H2

                "movq (%%"REG_a", %1), %%mm2            \n\t"
                "movq %%mm2, %%mm3                      \n\t"
                "punpcklbw %%mm7, %%mm2                 \n\t" // L3
                "punpckhbw %%mm7, %%mm3                 \n\t" // H3

                "psubw %%mm2, %%mm0                     \n\t" // 2L0 - 5L1 + 5L2 - L3
                "psubw %%mm3, %%mm1                     \n\t" // 2H0 - 5H1 + 5H2 - H3
                "psubw %%mm2, %%mm0                     \n\t" // 2L0 - 5L1 + 5L2 - 2L3
                "psubw %%mm3, %%mm1                     \n\t" // 2H0 - 5H1 + 5H2 - 2H3
                "movq %%mm0, (%%"REG_c")                \n\t" // 2L0 - 5L1 + 5L2 - 2L3
                "movq %%mm1, 8(%%"REG_c")               \n\t" // 2H0 - 5H1 + 5H2 - 2H3

                "movq (%%"REG_a", %1, 2), %%mm0         \n\t"
                "movq %%mm0, %%mm1                      \n\t"
                "punpcklbw %%mm7, %%mm0                 \n\t" // L4
                "punpckhbw %%mm7, %%mm1                 \n\t" // H4

                "psubw %%mm0, %%mm2                     \n\t" // L3 - L4
                "psubw %%mm1, %%mm3                     \n\t" // H3 - H4
                "movq %%mm2, 16(%%"REG_c")              \n\t" // L3 - L4
                "movq %%mm3, 24(%%"REG_c")              \n\t" // H3 - H4
                "paddw %%mm4, %%mm4                     \n\t" // 2L2
                "paddw %%mm5, %%mm5                     \n\t" // 2H2
                "psubw %%mm2, %%mm4                     \n\t" // 2L2 - L3 + L4
                "psubw %%mm3, %%mm5                     \n\t" // 2H2 - H3 + H4

                "lea (%%"REG_a", %1), %0                \n\t"
                "psllw $2, %%mm2                        \n\t" // 4L3 - 4L4
                "psllw $2, %%mm3                        \n\t" // 4H3 - 4H4
                "psubw %%mm2, %%mm4                     \n\t" // 2L2 - 5L3 + 5L4
                "psubw %%mm3, %%mm5                     \n\t" // 2H2 - 5H3 + 5H4
//50 opcodes so far
                "movq (%0, %1, 2), %%mm2                \n\t"
                "movq %%mm2, %%mm3                      \n\t"
                "punpcklbw %%mm7, %%mm2                 \n\t" // L5
                "punpckhbw %%mm7, %%mm3                 \n\t" // H5
                "psubw %%mm2, %%mm4                     \n\t" // 2L2 - 5L3 + 5L4 - L5
                "psubw %%mm3, %%mm5                     \n\t" // 2H2 - 5H3 + 5H4 - H5
                "psubw %%mm2, %%mm4                     \n\t" // 2L2 - 5L3 + 5L4 - 2L5
                "psubw %%mm3, %%mm5                     \n\t" // 2H2 - 5H3 + 5H4 - 2H5

                "movq (%%"REG_a", %1, 4), %%mm6         \n\t"
                "punpcklbw %%mm7, %%mm6                 \n\t" // L6
                "psubw %%mm6, %%mm2                     \n\t" // L5 - L6
                "movq (%%"REG_a", %1, 4), %%mm6         \n\t"
                "punpckhbw %%mm7, %%mm6                 \n\t" // H6
                "psubw %%mm6, %%mm3                     \n\t" // H5 - H6

                "paddw %%mm0, %%mm0                     \n\t" // 2L4
                "paddw %%mm1, %%mm1                     \n\t" // 2H4
                "psubw %%mm2, %%mm0                     \n\t" // 2L4 - L5 + L6
                "psubw %%mm3, %%mm1                     \n\t" // 2H4 - H5 + H6

                "psllw $2, %%mm2                        \n\t" // 4L5 - 4L6
                "psllw $2, %%mm3                        \n\t" // 4H5 - 4H6
                "psubw %%mm2, %%mm0                     \n\t" // 2L4 - 5L5 + 5L6
                "psubw %%mm3, %%mm1                     \n\t" // 2H4 - 5H5 + 5H6

                "movq (%0, %1, 4), %%mm2                \n\t"
                "movq %%mm2, %%mm3                      \n\t"
                "punpcklbw %%mm7, %%mm2                 \n\t" // L7
                "punpckhbw %%mm7, %%mm3                 \n\t" // H7

                "paddw %%mm2, %%mm2                     \n\t" // 2L7
                "paddw %%mm3, %%mm3                     \n\t" // 2H7
                "psubw %%mm2, %%mm0                     \n\t" // 2L4 - 5L5 + 5L6 - 2L7
                "psubw %%mm3, %%mm1                     \n\t" // 2H4 - 5H5 + 5H6 - 2H7

                "movq (%%"REG_c"), %%mm2                \n\t" // 2L0 - 5L1 + 5L2 - 2L3
                "movq 8(%%"REG_c"), %%mm3               \n\t" // 2H0 - 5H1 + 5H2 - 2H3

#ifdef HAVE_MMX2
                "movq %%mm7, %%mm6                      \n\t" // 0
                "psubw %%mm0, %%mm6                     \n\t"
                "pmaxsw %%mm6, %%mm0                    \n\t" // |2L4 - 5L5 + 5L6 - 2L7|
                "movq %%mm7, %%mm6                      \n\t" // 0
                "psubw %%mm1, %%mm6                     \n\t"
                "pmaxsw %%mm6, %%mm1                    \n\t" // |2H4 - 5H5 + 5H6 - 2H7|
                "movq %%mm7, %%mm6                      \n\t" // 0
                "psubw %%mm2, %%mm6                     \n\t"
                "pmaxsw %%mm6, %%mm2                    \n\t" // |2L0 - 5L1 + 5L2 - 2L3|
                "movq %%mm7, %%mm6                      \n\t" // 0
                "psubw %%mm3, %%mm6                     \n\t"
                "pmaxsw %%mm6, %%mm3                    \n\t" // |2H0 - 5H1 + 5H2 - 2H3|
#else

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -