⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 h264dsp_mmx.c.svn-base

📁 mediastreamer2是开源的网络传输媒体流的库
💻 SVN-BASE
📖 第 1 页 / 共 5 页
字号:
        "pmullw %%mm6, %%mm1        \n\t"\        "movd   -2(%0), %%mm2       \n\t"\        "movd    7(%0), %%mm5       \n\t"\        "punpcklbw %%mm7, %%mm2     \n\t"\        "punpcklbw %%mm7, %%mm5     \n\t"\        "paddw %%mm3, %%mm2         \n\t"\        "paddw %%mm5, %%mm4         \n\t"\        "movq %6, %%mm5             \n\t"\        "paddw %%mm5, %%mm2         \n\t"\        "paddw %%mm5, %%mm4         \n\t"\        "paddw %%mm2, %%mm0         \n\t"\        "paddw %%mm4, %%mm1         \n\t"\        "psraw $5, %%mm0            \n\t"\        "psraw $5, %%mm1            \n\t"\        "packuswb %%mm1, %%mm0      \n\t"\        OP(%%mm0, (%1),%%mm5, q)\        "add %3, %0                 \n\t"\        "add %4, %1                 \n\t"\        "decl %2                    \n\t"\        " jnz 1b                    \n\t"\        : "+a"(src), "+c"(dst), "+g"(h)\        : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\        : "memory"\    );\}\\static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\    int h=8;\    asm volatile(\        "pxor %%mm7, %%mm7          \n\t"\        "movq %0, %%mm6             \n\t"\        :: "m"(ff_pw_5)\    );\    do{\    asm volatile(\        "movq    (%0), %%mm0        \n\t"\        "movq   1(%0), %%mm2        \n\t"\        "movq %%mm0, %%mm1          \n\t"\        "movq %%mm2, %%mm3          \n\t"\        "punpcklbw %%mm7, %%mm0     \n\t"\        "punpckhbw %%mm7, %%mm1     \n\t"\        "punpcklbw %%mm7, %%mm2     \n\t"\        "punpckhbw %%mm7, %%mm3     \n\t"\        "paddw %%mm2, %%mm0         \n\t"\        "paddw %%mm3, %%mm1         \n\t"\        "psllw $2, %%mm0            \n\t"\        "psllw $2, %%mm1            \n\t"\        "movq   -1(%0), %%mm2       \n\t"\        "movq    2(%0), %%mm4       \n\t"\        "movq %%mm2, %%mm3          \n\t"\        "movq %%mm4, %%mm5          \n\t"\        "punpcklbw %%mm7, %%mm2     \n\t"\        "punpckhbw %%mm7, %%mm3     \n\t"\        "punpcklbw %%mm7, %%mm4     \n\t"\        "punpckhbw %%mm7, %%mm5     \n\t"\        "paddw %%mm4, %%mm2         \n\t"\        "paddw %%mm3, %%mm5         \n\t"\        "psubw %%mm2, %%mm0         \n\t"\        "psubw %%mm5, %%mm1         \n\t"\        "pmullw %%mm6, %%mm0        \n\t"\        "pmullw %%mm6, %%mm1        \n\t"\        "movd   -2(%0), %%mm2       \n\t"\        "movd    7(%0), %%mm5       \n\t"\        "punpcklbw %%mm7, %%mm2     \n\t"\        "punpcklbw %%mm7, %%mm5     \n\t"\        "paddw %%mm3, %%mm2         \n\t"\        "paddw %%mm5, %%mm4         \n\t"\        "movq %5, %%mm5             \n\t"\        "paddw %%mm5, %%mm2         \n\t"\        "paddw %%mm5, %%mm4         \n\t"\        "paddw %%mm2, %%mm0         \n\t"\        "paddw %%mm4, %%mm1         \n\t"\        "psraw $5, %%mm0            \n\t"\        "psraw $5, %%mm1            \n\t"\        "movq (%2), %%mm4           \n\t"\        "packuswb %%mm1, %%mm0      \n\t"\        PAVGB" %%mm4, %%mm0         \n\t"\        OP(%%mm0, (%1),%%mm5, q)\        "add %4, %0                 \n\t"\        "add %4, %1                 \n\t"\        "add %3, %2                 \n\t"\        : "+a"(src), "+c"(dst), "+d"(src2)\        : "D"((long)src2Stride), "S"((long)dstStride),\          "m"(ff_pw_16)\        : "memory"\    );\    }while(--h);\}\\static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\    int w= 2;\    src -= 2*srcStride;\    \    while(w--){\      asm volatile(\        "pxor %%mm7, %%mm7          \n\t"\        "movd (%0), %%mm0           \n\t"\        "add %2, %0                 \n\t"\        "movd (%0), %%mm1           \n\t"\        "add %2, %0                 \n\t"\        "movd (%0), %%mm2           \n\t"\        "add %2, %0                 \n\t"\        "movd (%0), %%mm3           \n\t"\        "add %2, %0                 \n\t"\        "movd (%0), %%mm4           \n\t"\        "add %2, %0                 \n\t"\        "punpcklbw %%mm7, %%mm0     \n\t"\        "punpcklbw %%mm7, %%mm1     \n\t"\        "punpcklbw %%mm7, %%mm2     \n\t"\        "punpcklbw %%mm7, %%mm3     \n\t"\        "punpcklbw %%mm7, %%mm4     \n\t"\        QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\        QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\        QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\        QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\        QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\        QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\        QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\        QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\         \        : "+a"(src), "+c"(dst)\        : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\        : "memory"\     );\     if(h==16){\        asm volatile(\            QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\            QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\            QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\            QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\            QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\            QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\            QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\            QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\            \           : "+a"(src), "+c"(dst)\           : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\           : "memory"\        );\     }\     src += 4-(h+5)*srcStride;\     dst += 4-h*dstStride;\   }\}\static av_always_inline void OPNAME ## h264_qpel8or16_hv1_lowpass_ ## MMX(int16_t *tmp, uint8_t *src, int tmpStride, int srcStride, int size){\    int w = (size+8)>>2;\    src -= 2*srcStride+2;\    while(w--){\        asm volatile(\            "pxor %%mm7, %%mm7      \n\t"\            "movd (%0), %%mm0       \n\t"\            "add %2, %0             \n\t"\            "movd (%0), %%mm1       \n\t"\            "add %2, %0             \n\t"\            "movd (%0), %%mm2       \n\t"\            "add %2, %0             \n\t"\            "movd (%0), %%mm3       \n\t"\            "add %2, %0             \n\t"\            "movd (%0), %%mm4       \n\t"\            "add %2, %0             \n\t"\            "punpcklbw %%mm7, %%mm0 \n\t"\            "punpcklbw %%mm7, %%mm1 \n\t"\            "punpcklbw %%mm7, %%mm2 \n\t"\            "punpcklbw %%mm7, %%mm3 \n\t"\            "punpcklbw %%mm7, %%mm4 \n\t"\            QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*48)\            QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*48)\            QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*48)\            QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*48)\            QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 4*48)\            QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 5*48)\            QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 6*48)\            QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 7*48)\            : "+a"(src)\            : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\            : "memory"\        );\        if(size==16){\            asm volatile(\                QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1,  8*48)\                QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2,  9*48)\                QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 10*48)\                QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 11*48)\                QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 12*48)\                QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 13*48)\                QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 14*48)\                QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 15*48)\                : "+a"(src)\                : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\                : "memory"\            );\        }\        tmp += 4;\        src += 4 - (size+5)*srcStride;\    }\}\static av_always_inline void OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size){\    int w = size>>4;\    do{\    int h = size;\    asm volatile(\        "1:                         \n\t"\        "movq     (%0), %%mm0       \n\t"\        "movq    8(%0), %%mm3       \n\t"\        "movq    2(%0), %%mm1       \n\t"\        "movq   10(%0), %%mm4       \n\t"\        "paddw   %%mm4, %%mm0       \n\t"\        "paddw   %%mm3, %%mm1       \n\t"\        "paddw  18(%0), %%mm3       \n\t"\        "paddw  16(%0), %%mm4       \n\t"\        "movq    4(%0), %%mm2       \n\t"\        "movq   12(%0), %%mm5       \n\t"\        "paddw   6(%0), %%mm2       \n\t"\        "paddw  14(%0), %%mm5       \n\t"\        "psubw %%mm1, %%mm0         \n\t"\        "psubw %%mm4, %%mm3         \n\t"\        "psraw $2, %%mm0            \n\t"\        "psraw $2, %%mm3            \n\t"\        "psubw %%mm1, %%mm0         \n\t"\        "psubw %%mm4, %%mm3         \n\t"\        "paddsw %%mm2, %%mm0        \n\t"\        "paddsw %%mm5, %%mm3        \n\t"\        "psraw $2, %%mm0            \n\t"\        "psraw $2, %%mm3            \n\t"\        "paddw %%mm2, %%mm0         \n\t"\        "paddw %%mm5, %%mm3         \n\t"\        "psraw $6, %%mm0            \n\t"\        "psraw $6, %%mm3            \n\t"\        "packuswb %%mm3, %%mm0      \n\t"\        OP(%%mm0, (%1),%%mm7, q)\        "add $48, %0                \n\t"\        "add %3, %1                 \n\t"\        "decl %2                    \n\t"\        " jnz 1b                    \n\t"\        : "+a"(tmp), "+c"(dst), "+g"(h)\        : "S"((long)dstStride)\        : "memory"\    );\    tmp += 8 - size*24;\    dst += 8 - size*dstStride;\    }while(w--);\}\\static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\    OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst  , src  , dstStride, srcStride, 8);\}\static av_noinline void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\    OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst  , src  , dstStride, srcStride, 16);\    OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\}\\static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\    OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst  , src  , dstStride, srcStride);\    OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\    src += 8*srcStride;\    dst += 8*dstStride;\    OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst  , src  , dstStride, srcStride);\    OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\}\\static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\    OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst  , src  , src2  , dstStride, src2Stride);\    OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\    src += 8*dstStride;\    dst += 8*dstStride;\    src2 += 8*src2Stride;\    OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst  , src  , src2  , dstStride, src2Stride);\    OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\}\\static av_noinline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\          put_h264_qpel8or16_hv1_lowpass_ ## MMX(tmp, src, tmpStride, srcStride, size);\    OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\}\static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\    OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst  , tmp  , src  , dstStride, tmpStride, srcStride, 8);\}\\static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\    OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst  , tmp  , src  , dstStride, tmpStride, srcStride, 16);\}\\static av_noinline void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\{\    asm volatile(\        "movq      (%1), %%mm0          \n\t"\        "movq    24(%1), %%mm1          \n\t"\        "psraw      $5,  %%mm0          \n\t"\        "psraw      $5,  %%mm1          \n\t"\        "packuswb %%mm0, %%mm0          \n\t"\        "packuswb %%mm1, %%mm1          \n\t"\        PAVGB"     (%0), %%mm0          \n\t"\        PAVGB"  (%0,%3), %%mm1          \n\t"\        OP(%%mm0, (%2),    %%mm4, d)\        OP(%%mm1, (%2,%4), %%mm5, d)\        "lea  (%0,%3,2), %0             \n\t"\        "lea  (%2,%4,2), %2             \n\t"\        "movq    48(%1), %%mm0          \n\t"\        "movq    72(%1), %%mm1          \n\t"\        "psraw      $5,  %%mm0          \n\t"\        "psraw      $5,  %%mm1          \n\t"\        "packuswb %%mm0, %%mm0          \n\t"\        "packuswb %%mm1, %%mm1          \n\t"\        PAVGB"     (%0), %%mm0          \n\t"\        PAVGB"  (%0,%3), %%mm1          \n\t"\        OP(%%mm0, (%2),    %%mm4, d)\        OP(%%mm1, (%2,%4), %%mm5, d)\        :"+a"(src8), "+c"(src16), "+d"(dst)\        :"S"((long)src8Stride), "D"((long)dstStride)\        :"memory");\}\static av_noinline void OPNAME ## pixels8_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\{\    do{\    asm volatile(\        "movq      (%1), %%mm0          \n\t"\        "movq     8(%1), %%mm1          \n\t"\        "movq    48(%1), %%mm2          \n\t"\        "movq  8+48(%1), %%mm3          \n\t"\        "psraw      $5,  %%mm0          \n\t"\        "psraw      $5,  %%mm1          \n\t"\        "psraw      $5,  %%mm2          \n\t"\        "psraw      $5,  %%mm3          \n\t"\

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -