⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 h264dsp_mmx.c

📁 mpeg4 video codec mpeg4 video codec
💻 C
📖 第 1 页 / 共 3 页
字号:
        "movq    %%mm1,   (%0,%2)   \n\t"        "movq    %%mm2,   (%1)      \n\t"        :: "r"(pix-2*stride), "r"(pix), "r"((long)stride),           "m"(alpha1), "m"(beta1), "m"(mm_bone)    );}static void h264_v_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta){    h264_loop_filter_chroma_intra_mmx2(pix, stride, alpha-1, beta-1);}static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta){    //FIXME: could cut some load/stores by merging transpose with filter    uint8_t trans[8*4];    transpose4x4(trans, pix-2, 8, stride);    transpose4x4(trans+4, pix-2+4*stride, 8, stride);    h264_loop_filter_chroma_intra_mmx2(trans+2*8, 8, alpha-1, beta-1);    transpose4x4(pix-2, trans, stride, 8);    transpose4x4(pix-2+4*stride, trans+4, stride, 8);}/***********************************//* motion compensation */#define QPEL_H264V(A,B,C,D,E,F,OP)\        "movd (%0), "#F"		\n\t"\        "movq "#C", %%mm6		\n\t"\        "paddw "#D", %%mm6		\n\t"\        "psllw $2, %%mm6		\n\t"\        "psubw "#B", %%mm6		\n\t"\        "psubw "#E", %%mm6		\n\t"\        "pmullw %4, %%mm6		\n\t"\        "add %2, %0			\n\t"\        "punpcklbw %%mm7, "#F"		\n\t"\        "paddw %5, "#A"			\n\t"\        "paddw "#F", "#A"		\n\t"\        "paddw "#A", %%mm6		\n\t"\        "psraw $5, %%mm6		\n\t"\        "packuswb %%mm6, %%mm6		\n\t"\        OP(%%mm6, (%1), A, d)\        "add %3, %1			\n\t"     #define QPEL_H264HV(A,B,C,D,E,F,OF)\        "movd (%0), "#F"		\n\t"\        "movq "#C", %%mm6		\n\t"\        "paddw "#D", %%mm6		\n\t"\        "psllw $2, %%mm6		\n\t"\        "psubw "#B", %%mm6		\n\t"\        "psubw "#E", %%mm6		\n\t"\        "pmullw %3, %%mm6		\n\t"\        "add %2, %0			\n\t"\        "punpcklbw %%mm7, "#F"		\n\t"\        "paddw "#F", "#A"		\n\t"\        "paddw "#A", %%mm6		\n\t"\        "movq %%mm6, "#OF"(%1)		\n\t"        #define QPEL_H264(OPNAME, OP, MMX)\static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\    int h=4;\\    asm volatile(\        "pxor %%mm7, %%mm7		\n\t"\        "movq %5, %%mm4			\n\t"\        "movq %6, %%mm5			\n\t"\        "1:				\n\t"\        "movd  -1(%0), %%mm1		\n\t"\        "movd    (%0), %%mm2		\n\t"\        "movd   1(%0), %%mm3		\n\t"\        "movd   2(%0), %%mm0		\n\t"\        "punpcklbw %%mm7, %%mm1		\n\t"\        "punpcklbw %%mm7, %%mm2		\n\t"\        "punpcklbw %%mm7, %%mm3		\n\t"\        "punpcklbw %%mm7, %%mm0		\n\t"\        "paddw %%mm0, %%mm1		\n\t"\        "paddw %%mm3, %%mm2		\n\t"\        "movd  -2(%0), %%mm0		\n\t"\        "movd   3(%0), %%mm3		\n\t"\        "punpcklbw %%mm7, %%mm0		\n\t"\        "punpcklbw %%mm7, %%mm3		\n\t"\        "paddw %%mm3, %%mm0		\n\t"\        "psllw $2, %%mm2		\n\t"\        "psubw %%mm1, %%mm2		\n\t"\        "pmullw %%mm4, %%mm2		\n\t"\        "paddw %%mm5, %%mm0		\n\t"\        "paddw %%mm2, %%mm0		\n\t"\        "psraw $5, %%mm0		\n\t"\        "packuswb %%mm0, %%mm0		\n\t"\        OP(%%mm0, (%1),%%mm6, d)\        "add %3, %0			\n\t"\        "add %4, %1			\n\t"\        "decl %2			\n\t"\        " jnz 1b			\n\t"\        : "+a"(src), "+c"(dst), "+m"(h)\        : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\        : "memory"\    );\}\static void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\    src -= 2*srcStride;\    asm volatile(\        "pxor %%mm7, %%mm7		\n\t"\        "movd (%0), %%mm0		\n\t"\        "add %2, %0			\n\t"\        "movd (%0), %%mm1		\n\t"\        "add %2, %0			\n\t"\        "movd (%0), %%mm2		\n\t"\        "add %2, %0			\n\t"\        "movd (%0), %%mm3		\n\t"\        "add %2, %0			\n\t"\        "movd (%0), %%mm4		\n\t"\        "add %2, %0			\n\t"\        "punpcklbw %%mm7, %%mm0		\n\t"\        "punpcklbw %%mm7, %%mm1		\n\t"\        "punpcklbw %%mm7, %%mm2		\n\t"\        "punpcklbw %%mm7, %%mm3		\n\t"\        "punpcklbw %%mm7, %%mm4		\n\t"\        QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\        QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\        QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\        QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\         \        : "+a"(src), "+c"(dst)\        : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\        : "memory"\    );\}\static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\    int h=4;\    int w=3;\    src -= 2*srcStride+2;\    while(w--){\        asm volatile(\            "pxor %%mm7, %%mm7			\n\t"\            "movd (%0), %%mm0			\n\t"\            "add %2, %0				\n\t"\            "movd (%0), %%mm1			\n\t"\            "add %2, %0				\n\t"\            "movd (%0), %%mm2			\n\t"\            "add %2, %0				\n\t"\            "movd (%0), %%mm3			\n\t"\            "add %2, %0				\n\t"\            "movd (%0), %%mm4			\n\t"\            "add %2, %0				\n\t"\            "punpcklbw %%mm7, %%mm0		\n\t"\            "punpcklbw %%mm7, %%mm1		\n\t"\            "punpcklbw %%mm7, %%mm2		\n\t"\            "punpcklbw %%mm7, %%mm3		\n\t"\            "punpcklbw %%mm7, %%mm4		\n\t"\            QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*3)\            QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*3)\            QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*3)\            QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*8*3)\             \            : "+a"(src)\            : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5)\            : "memory"\        );\        tmp += 4;\        src += 4 - 9*srcStride;\    }\    tmp -= 3*4;\    asm volatile(\        "movq %4, %%mm6			\n\t"\        "1:				\n\t"\        "movq     (%0), %%mm0		\n\t"\        "paddw  10(%0), %%mm0		\n\t"\        "movq    2(%0), %%mm1		\n\t"\        "paddw   8(%0), %%mm1		\n\t"\        "movq    4(%0), %%mm2		\n\t"\        "paddw   6(%0), %%mm2		\n\t"\        "psubw %%mm1, %%mm0		\n\t"/*a-b   (abccba)*/\        "psraw $2, %%mm0		\n\t"/*(a-b)/4 */\        "psubw %%mm1, %%mm0		\n\t"/*(a-b)/4-b */\        "paddsw %%mm2, %%mm0		\n\t"\        "psraw $2, %%mm0            \n\t"/*((a-b)/4-b+c)/4 */\        "paddw %%mm6, %%mm2		\n\t"\        "paddw %%mm2, %%mm0         \n\t"/*(a-5*b+20*c)/16 +32 */\        "psraw $6, %%mm0		\n\t"\        "packuswb %%mm0, %%mm0		\n\t"\        OP(%%mm0, (%1),%%mm7, d)\        "add $24, %0			\n\t"\        "add %3, %1			\n\t"\        "decl %2			\n\t"\        " jnz 1b			\n\t"\        : "+a"(tmp), "+c"(dst), "+m"(h)\        : "S"((long)dstStride), "m"(ff_pw_32)\        : "memory"\    );\}\\static void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\    int h=8;\    asm volatile(\        "pxor %%mm7, %%mm7		\n\t"\        "movq %5, %%mm6			\n\t"\        "1:				\n\t"\        "movq    (%0), %%mm0		\n\t"\        "movq   1(%0), %%mm2		\n\t"\        "movq %%mm0, %%mm1		\n\t"\        "movq %%mm2, %%mm3		\n\t"\        "punpcklbw %%mm7, %%mm0		\n\t"\        "punpckhbw %%mm7, %%mm1		\n\t"\        "punpcklbw %%mm7, %%mm2		\n\t"\        "punpckhbw %%mm7, %%mm3		\n\t"\        "paddw %%mm2, %%mm0		\n\t"\        "paddw %%mm3, %%mm1		\n\t"\        "psllw $2, %%mm0		\n\t"\        "psllw $2, %%mm1		\n\t"\        "movq   -1(%0), %%mm2		\n\t"\        "movq    2(%0), %%mm4		\n\t"\        "movq %%mm2, %%mm3		\n\t"\        "movq %%mm4, %%mm5		\n\t"\        "punpcklbw %%mm7, %%mm2		\n\t"\        "punpckhbw %%mm7, %%mm3		\n\t"\        "punpcklbw %%mm7, %%mm4		\n\t"\        "punpckhbw %%mm7, %%mm5		\n\t"\        "paddw %%mm4, %%mm2		\n\t"\        "paddw %%mm3, %%mm5		\n\t"\        "psubw %%mm2, %%mm0		\n\t"\        "psubw %%mm5, %%mm1		\n\t"\        "pmullw %%mm6, %%mm0		\n\t"\        "pmullw %%mm6, %%mm1		\n\t"\        "movd   -2(%0), %%mm2		\n\t"\        "movd    7(%0), %%mm5		\n\t"\        "punpcklbw %%mm7, %%mm2		\n\t"\        "punpcklbw %%mm7, %%mm5		\n\t"\        "paddw %%mm3, %%mm2		\n\t"\        "paddw %%mm5, %%mm4		\n\t"\        "movq %6, %%mm5			\n\t"\        "paddw %%mm5, %%mm2		\n\t"\        "paddw %%mm5, %%mm4		\n\t"\        "paddw %%mm2, %%mm0		\n\t"\        "paddw %%mm4, %%mm1		\n\t"\        "psraw $5, %%mm0		\n\t"\        "psraw $5, %%mm1		\n\t"\        "packuswb %%mm1, %%mm0		\n\t"\        OP(%%mm0, (%1),%%mm5, q)\        "add %3, %0			\n\t"\        "add %4, %1			\n\t"\        "decl %2			\n\t"\        " jnz 1b			\n\t"\        : "+a"(src), "+c"(dst), "+m"(h)\        : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\        : "memory"\    );\}\\static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\    int h= 2;\    src -= 2*srcStride;\    \    while(h--){\      asm volatile(\        "pxor %%mm7, %%mm7		\n\t"\        "movd (%0), %%mm0		\n\t"\        "add %2, %0			\n\t"\        "movd (%0), %%mm1		\n\t"\        "add %2, %0			\n\t"\        "movd (%0), %%mm2		\n\t"\        "add %2, %0			\n\t"\        "movd (%0), %%mm3		\n\t"\        "add %2, %0			\n\t"\        "movd (%0), %%mm4		\n\t"\        "add %2, %0			\n\t"\        "punpcklbw %%mm7, %%mm0		\n\t"\        "punpcklbw %%mm7, %%mm1		\n\t"\        "punpcklbw %%mm7, %%mm2		\n\t"\        "punpcklbw %%mm7, %%mm3		\n\t"\        "punpcklbw %%mm7, %%mm4		\n\t"\        QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\        QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\        QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\        QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\        QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\        QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\        QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\        QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\         \        : "+a"(src), "+c"(dst)\        : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\        : "memory"\     );\     src += 4-13*srcStride;\     dst +=  4-8*dstStride;\   }\}\static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\    int h=8;\    int w=4;\    src -= 2*srcStride+2;\    while(w--){\        asm volatile(\            "pxor %%mm7, %%mm7			\n\t"\            "movd (%0), %%mm0			\n\t"\            "add %2, %0				\n\t"\            "movd (%0), %%mm1			\n\t"\            "add %2, %0				\n\t"\            "movd (%0), %%mm2			\n\t"\            "add %2, %0				\n\t"\            "movd (%0), %%mm3			\n\t"\            "add %2, %0				\n\t"\            "movd (%0), %%mm4			\n\t"\            "add %2, %0				\n\t"\            "punpcklbw %%mm7, %%mm0		\n\t"\            "punpcklbw %%mm7, %%mm1		\n\t"\            "punpcklbw %%mm7, %%mm2		\n\t"\            "punpcklbw %%mm7, %%mm3		\n\t"\            "punpcklbw %%mm7, %%mm4		\n\t"\            QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*4)\            QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*4)\            QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*4)\            QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*8*4)\            QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 4*8*4)\            QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 5*8*4)\            QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 6*8*4)\            QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 7*8*4)\             \            : "+a"(src)\            : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5)\            : "memory"\        );\        tmp += 4;\        src += 4 - 13*srcStride;\    }\    tmp -= 4*4;\    asm volatile(\        "movq %4, %%mm6			\n\t"\        "1:				\n\t"\        "movq     (%0), %%mm0		\n\t"\        "movq    8(%0), %%mm3		\n\t"\        "movq    2(%0), %%mm1		\n\t"\        "movq   10(%0), %%mm4		\n\t"\        "paddw   %%mm4, %%mm0		\n\t"\        "paddw   %%mm3, %%mm1		\n\t"\        "paddw  18(%0), %%mm3		\n\t"\        "paddw  16(%0), %%mm4		\n\t"\        "movq    4(%0), %%mm2		\n\t"\        "movq   12(%0), %%mm5		\n\t"\        "paddw   6(%0), %%mm2		\n\t"\        "paddw  14(%0), %%mm5		\n\t"\

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -