⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 h264dsp_mmx.c.svn-base

📁 mediastreamer2是开源的网络传输媒体流的库
💻 SVN-BASE
📖 第 1 页 / 共 5 页
字号:
            if(!(mask_mv & edge)) {                asm volatile("pxor %%mm0, %%mm0 \n\t":);                for( l = bidir; l >= 0; l-- ) {                    asm volatile(                        "movd %0, %%mm1 \n\t"                        "punpckldq %1, %%mm1 \n\t"                        "movq %%mm1, %%mm2 \n\t"                        "psrlw $7, %%mm2 \n\t"                        "pand %%mm6, %%mm2 \n\t"                        "por %%mm2, %%mm1 \n\t" // ref_cache with -2 mapped to -1                        "punpckldq %%mm1, %%mm2 \n\t"                        "pcmpeqb %%mm2, %%mm1 \n\t"                        "paddb %%mm6, %%mm1 \n\t"                        "punpckhbw %%mm7, %%mm1 \n\t" // ref[b] != ref[bn]                        "por %%mm1, %%mm0 \n\t"                        "movq %2, %%mm1 \n\t"                        "movq %3, %%mm2 \n\t"                        "psubw %4, %%mm1 \n\t"                        "psubw %5, %%mm2 \n\t"                        "packsswb %%mm2, %%mm1 \n\t"                        "paddb %%mm5, %%mm1 \n\t"                        "pminub %%mm4, %%mm1 \n\t"                        "pcmpeqb %%mm4, %%mm1 \n\t" // abs(mv[b] - mv[bn]) >= limit                        "por %%mm1, %%mm0 \n\t"                        ::"m"(ref[l][b_idx]),                          "m"(ref[l][b_idx+d_idx]),                          "m"(mv[l][b_idx][0]),                          "m"(mv[l][b_idx+2][0]),                          "m"(mv[l][b_idx+d_idx][0]),                          "m"(mv[l][b_idx+d_idx+2][0])                    );                }            }            asm volatile(                "movd %0, %%mm1 \n\t"                "por  %1, %%mm1 \n\t"                "punpcklbw %%mm7, %%mm1 \n\t"                "pcmpgtw %%mm7, %%mm1 \n\t" // nnz[b] || nnz[bn]                ::"m"(nnz[b_idx]),                  "m"(nnz[b_idx+d_idx])            );            asm volatile(                "pcmpeqw %%mm7, %%mm0 \n\t"                "pcmpeqw %%mm7, %%mm0 \n\t"                "psrlw $15, %%mm0 \n\t" // nonzero -> 1                "psrlw $14, %%mm1 \n\t"                "movq %%mm0, %%mm2 \n\t"                "por %%mm1, %%mm2 \n\t"                "psrlw $1, %%mm1 \n\t"                "pandn %%mm2, %%mm1 \n\t"                "movq %%mm1, %0 \n\t"                :"=m"(*bS[dir][edge])                ::"memory"            );        }        edges = 4;        step = 1;    }    asm volatile(        "movq   (%0), %%mm0 \n\t"        "movq  8(%0), %%mm1 \n\t"        "movq 16(%0), %%mm2 \n\t"        "movq 24(%0), %%mm3 \n\t"        TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4)        "movq %%mm0,   (%0) \n\t"        "movq %%mm3,  8(%0) \n\t"        "movq %%mm4, 16(%0) \n\t"        "movq %%mm2, 24(%0) \n\t"        ::"r"(bS[0])        :"memory"    );}/***********************************//* motion compensation */#define QPEL_H264V_MM(A,B,C,D,E,F,OP,T,Z,d,q)\        "mov"#q" "#C", "#T"         \n\t"\        "mov"#d" (%0), "#F"         \n\t"\        "paddw "#D", "#T"           \n\t"\        "psllw $2, "#T"             \n\t"\        "psubw "#B", "#T"           \n\t"\        "psubw "#E", "#T"           \n\t"\        "punpcklbw "#Z", "#F"       \n\t"\        "pmullw %4, "#T"            \n\t"\        "paddw %5, "#A"             \n\t"\        "add %2, %0                 \n\t"\        "paddw "#F", "#A"           \n\t"\        "paddw "#A", "#T"           \n\t"\        "psraw $5, "#T"             \n\t"\        "packuswb "#T", "#T"        \n\t"\        OP(T, (%1), A, d)\        "add %3, %1                 \n\t"#define QPEL_H264HV_MM(A,B,C,D,E,F,OF,T,Z,d,q)\        "mov"#q" "#C", "#T"         \n\t"\        "mov"#d" (%0), "#F"         \n\t"\        "paddw "#D", "#T"           \n\t"\        "psllw $2, "#T"             \n\t"\        "paddw %4, "#A"             \n\t"\        "psubw "#B", "#T"           \n\t"\        "psubw "#E", "#T"           \n\t"\        "punpcklbw "#Z", "#F"       \n\t"\        "pmullw %3, "#T"            \n\t"\        "paddw "#F", "#A"           \n\t"\        "add %2, %0                 \n\t"\        "paddw "#A", "#T"           \n\t"\        "mov"#q" "#T", "#OF"(%1)    \n\t"#define QPEL_H264V(A,B,C,D,E,F,OP) QPEL_H264V_MM(A,B,C,D,E,F,OP,%%mm6,%%mm7,d,q)#define QPEL_H264HV(A,B,C,D,E,F,OF) QPEL_H264HV_MM(A,B,C,D,E,F,OF,%%mm6,%%mm7,d,q)#define QPEL_H264V_XMM(A,B,C,D,E,F,OP) QPEL_H264V_MM(A,B,C,D,E,F,OP,%%xmm6,%%xmm7,q,dqa)#define QPEL_H264HV_XMM(A,B,C,D,E,F,OF) QPEL_H264HV_MM(A,B,C,D,E,F,OF,%%xmm6,%%xmm7,q,dqa)#define QPEL_H264(OPNAME, OP, MMX)\static av_noinline void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\    int h=4;\\    asm volatile(\        "pxor %%mm7, %%mm7          \n\t"\        "movq %5, %%mm4             \n\t"\        "movq %6, %%mm5             \n\t"\        "1:                         \n\t"\        "movd  -1(%0), %%mm1        \n\t"\        "movd    (%0), %%mm2        \n\t"\        "movd   1(%0), %%mm3        \n\t"\        "movd   2(%0), %%mm0        \n\t"\        "punpcklbw %%mm7, %%mm1     \n\t"\        "punpcklbw %%mm7, %%mm2     \n\t"\        "punpcklbw %%mm7, %%mm3     \n\t"\        "punpcklbw %%mm7, %%mm0     \n\t"\        "paddw %%mm0, %%mm1         \n\t"\        "paddw %%mm3, %%mm2         \n\t"\        "movd  -2(%0), %%mm0        \n\t"\        "movd   3(%0), %%mm3        \n\t"\        "punpcklbw %%mm7, %%mm0     \n\t"\        "punpcklbw %%mm7, %%mm3     \n\t"\        "paddw %%mm3, %%mm0         \n\t"\        "psllw $2, %%mm2            \n\t"\        "psubw %%mm1, %%mm2         \n\t"\        "pmullw %%mm4, %%mm2        \n\t"\        "paddw %%mm5, %%mm0         \n\t"\        "paddw %%mm2, %%mm0         \n\t"\        "psraw $5, %%mm0            \n\t"\        "packuswb %%mm0, %%mm0      \n\t"\        OP(%%mm0, (%1),%%mm6, d)\        "add %3, %0                 \n\t"\        "add %4, %1                 \n\t"\        "decl %2                    \n\t"\        " jnz 1b                    \n\t"\        : "+a"(src), "+c"(dst), "+g"(h)\        : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\        : "memory"\    );\}\static av_noinline void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\    int h=4;\    asm volatile(\        "pxor %%mm7, %%mm7          \n\t"\        "movq %0, %%mm4             \n\t"\        "movq %1, %%mm5             \n\t"\        :: "m"(ff_pw_5), "m"(ff_pw_16)\    );\    do{\    asm volatile(\        "movd  -1(%0), %%mm1        \n\t"\        "movd    (%0), %%mm2        \n\t"\        "movd   1(%0), %%mm3        \n\t"\        "movd   2(%0), %%mm0        \n\t"\        "punpcklbw %%mm7, %%mm1     \n\t"\        "punpcklbw %%mm7, %%mm2     \n\t"\        "punpcklbw %%mm7, %%mm3     \n\t"\        "punpcklbw %%mm7, %%mm0     \n\t"\        "paddw %%mm0, %%mm1         \n\t"\        "paddw %%mm3, %%mm2         \n\t"\        "movd  -2(%0), %%mm0        \n\t"\        "movd   3(%0), %%mm3        \n\t"\        "punpcklbw %%mm7, %%mm0     \n\t"\        "punpcklbw %%mm7, %%mm3     \n\t"\        "paddw %%mm3, %%mm0         \n\t"\        "psllw $2, %%mm2            \n\t"\        "psubw %%mm1, %%mm2         \n\t"\        "pmullw %%mm4, %%mm2        \n\t"\        "paddw %%mm5, %%mm0         \n\t"\        "paddw %%mm2, %%mm0         \n\t"\        "movd   (%2), %%mm3         \n\t"\        "psraw $5, %%mm0            \n\t"\        "packuswb %%mm0, %%mm0      \n\t"\        PAVGB" %%mm3, %%mm0         \n\t"\        OP(%%mm0, (%1),%%mm6, d)\        "add %4, %0                 \n\t"\        "add %4, %1                 \n\t"\        "add %3, %2                 \n\t"\        : "+a"(src), "+c"(dst), "+d"(src2)\        : "D"((long)src2Stride), "S"((long)dstStride)\        : "memory"\    );\    }while(--h);\}\static av_noinline void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\    src -= 2*srcStride;\    asm volatile(\        "pxor %%mm7, %%mm7          \n\t"\        "movd (%0), %%mm0           \n\t"\        "add %2, %0                 \n\t"\        "movd (%0), %%mm1           \n\t"\        "add %2, %0                 \n\t"\        "movd (%0), %%mm2           \n\t"\        "add %2, %0                 \n\t"\        "movd (%0), %%mm3           \n\t"\        "add %2, %0                 \n\t"\        "movd (%0), %%mm4           \n\t"\        "add %2, %0                 \n\t"\        "punpcklbw %%mm7, %%mm0     \n\t"\        "punpcklbw %%mm7, %%mm1     \n\t"\        "punpcklbw %%mm7, %%mm2     \n\t"\        "punpcklbw %%mm7, %%mm3     \n\t"\        "punpcklbw %%mm7, %%mm4     \n\t"\        QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\        QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\        QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\        QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\         \        : "+a"(src), "+c"(dst)\        : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\        : "memory"\    );\}\static av_noinline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\    int h=4;\    int w=3;\    src -= 2*srcStride+2;\    while(w--){\        asm volatile(\            "pxor %%mm7, %%mm7      \n\t"\            "movd (%0), %%mm0       \n\t"\            "add %2, %0             \n\t"\            "movd (%0), %%mm1       \n\t"\            "add %2, %0             \n\t"\            "movd (%0), %%mm2       \n\t"\            "add %2, %0             \n\t"\            "movd (%0), %%mm3       \n\t"\            "add %2, %0             \n\t"\            "movd (%0), %%mm4       \n\t"\            "add %2, %0             \n\t"\            "punpcklbw %%mm7, %%mm0 \n\t"\            "punpcklbw %%mm7, %%mm1 \n\t"\            "punpcklbw %%mm7, %%mm2 \n\t"\            "punpcklbw %%mm7, %%mm3 \n\t"\            "punpcklbw %%mm7, %%mm4 \n\t"\            QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*3)\            QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*3)\            QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*3)\            QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*8*3)\             \            : "+a"(src)\            : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\            : "memory"\        );\        tmp += 4;\        src += 4 - 9*srcStride;\    }\    tmp -= 3*4;\    asm volatile(\        "1:                         \n\t"\        "movq     (%0), %%mm0       \n\t"\        "paddw  10(%0), %%mm0       \n\t"\        "movq    2(%0), %%mm1       \n\t"\        "paddw   8(%0), %%mm1       \n\t"\        "movq    4(%0), %%mm2       \n\t"\        "paddw   6(%0), %%mm2       \n\t"\        "psubw %%mm1, %%mm0         \n\t"/*a-b   (abccba)*/\        "psraw $2, %%mm0            \n\t"/*(a-b)/4 */\        "psubw %%mm1, %%mm0         \n\t"/*(a-b)/4-b */\        "paddsw %%mm2, %%mm0        \n\t"\        "psraw $2, %%mm0            \n\t"/*((a-b)/4-b+c)/4 */\        "paddw %%mm2, %%mm0         \n\t"/*(a-5*b+20*c)/16 */\        "psraw $6, %%mm0            \n\t"\        "packuswb %%mm0, %%mm0      \n\t"\        OP(%%mm0, (%1),%%mm7, d)\        "add $24, %0                \n\t"\        "add %3, %1                 \n\t"\        "decl %2                    \n\t"\        " jnz 1b                    \n\t"\        : "+a"(tmp), "+c"(dst), "+g"(h)\        : "S"((long)dstStride)\        : "memory"\    );\}\\static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\    int h=8;\    asm volatile(\        "pxor %%mm7, %%mm7          \n\t"\        "movq %5, %%mm6             \n\t"\        "1:                         \n\t"\        "movq    (%0), %%mm0        \n\t"\        "movq   1(%0), %%mm2        \n\t"\        "movq %%mm0, %%mm1          \n\t"\        "movq %%mm2, %%mm3          \n\t"\        "punpcklbw %%mm7, %%mm0     \n\t"\        "punpckhbw %%mm7, %%mm1     \n\t"\        "punpcklbw %%mm7, %%mm2     \n\t"\        "punpckhbw %%mm7, %%mm3     \n\t"\        "paddw %%mm2, %%mm0         \n\t"\        "paddw %%mm3, %%mm1         \n\t"\        "psllw $2, %%mm0            \n\t"\        "psllw $2, %%mm1            \n\t"\        "movq   -1(%0), %%mm2       \n\t"\        "movq    2(%0), %%mm4       \n\t"\        "movq %%mm2, %%mm3          \n\t"\        "movq %%mm4, %%mm5          \n\t"\        "punpcklbw %%mm7, %%mm2     \n\t"\        "punpckhbw %%mm7, %%mm3     \n\t"\        "punpcklbw %%mm7, %%mm4     \n\t"\        "punpckhbw %%mm7, %%mm5     \n\t"\        "paddw %%mm4, %%mm2         \n\t"\        "paddw %%mm3, %%mm5         \n\t"\        "psubw %%mm2, %%mm0         \n\t"\        "psubw %%mm5, %%mm1         \n\t"\        "pmullw %%mm6, %%mm0        \n\t"\

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -