⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 h264dsp_mmx.c

📁 君正早期ucos系统(只有早期的才不没有打包成库),MPLAYER,文件系统,图片解码,浏览,电子书,录音,想学ucos,识货的人就下吧 russblock fmradio explore set
💻 C
📖 第 1 页 / 共 4 页
字号:
        "add %4, %1                 \n\t"\        "add %3, %2                 \n\t"\        : "+a"(src), "+c"(dst), "+d"(src2)\        : "D"((long)src2Stride), "S"((long)dstStride)\        : "memory"\    );\    }while(--h);\}\static av_noinline void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\    src -= 2*srcStride;\    asm volatile(\        "pxor %%mm7, %%mm7          \n\t"\        "movd (%0), %%mm0           \n\t"\        "add %2, %0                 \n\t"\        "movd (%0), %%mm1           \n\t"\        "add %2, %0                 \n\t"\        "movd (%0), %%mm2           \n\t"\        "add %2, %0                 \n\t"\        "movd (%0), %%mm3           \n\t"\        "add %2, %0                 \n\t"\        "movd (%0), %%mm4           \n\t"\        "add %2, %0                 \n\t"\        "punpcklbw %%mm7, %%mm0     \n\t"\        "punpcklbw %%mm7, %%mm1     \n\t"\        "punpcklbw %%mm7, %%mm2     \n\t"\        "punpcklbw %%mm7, %%mm3     \n\t"\        "punpcklbw %%mm7, %%mm4     \n\t"\        QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\        QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\        QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\        QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\         \        : "+a"(src), "+c"(dst)\        : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\        : "memory"\    );\}\static av_noinline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\    int h=4;\    int w=3;\    src -= 2*srcStride+2;\    while(w--){\        asm volatile(\            "pxor %%mm7, %%mm7      \n\t"\            "movd (%0), %%mm0       \n\t"\            "add %2, %0             \n\t"\            "movd (%0), %%mm1       \n\t"\            "add %2, %0             \n\t"\            "movd (%0), %%mm2       \n\t"\            "add %2, %0             \n\t"\            "movd (%0), %%mm3       \n\t"\            "add %2, %0             \n\t"\            "movd (%0), %%mm4       \n\t"\            "add %2, %0             \n\t"\            "punpcklbw %%mm7, %%mm0 \n\t"\            "punpcklbw %%mm7, %%mm1 \n\t"\            "punpcklbw %%mm7, %%mm2 \n\t"\            "punpcklbw %%mm7, %%mm3 \n\t"\            "punpcklbw %%mm7, %%mm4 \n\t"\            QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*3)\            QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*3)\            QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*3)\            QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*8*3)\             \            : "+a"(src)\            : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5)\            : "memory"\        );\        tmp += 4;\        src += 4 - 9*srcStride;\    }\    tmp -= 3*4;\    asm volatile(\        "movq %4, %%mm6             \n\t"\        "1:                         \n\t"\        "movq     (%0), %%mm0       \n\t"\        "paddw  10(%0), %%mm0       \n\t"\        "movq    2(%0), %%mm1       \n\t"\        "paddw   8(%0), %%mm1       \n\t"\        "movq    4(%0), %%mm2       \n\t"\        "paddw   6(%0), %%mm2       \n\t"\        "psubw %%mm1, %%mm0         \n\t"/*a-b   (abccba)*/\        "psraw $2, %%mm0            \n\t"/*(a-b)/4 */\        "psubw %%mm1, %%mm0         \n\t"/*(a-b)/4-b */\        "paddsw %%mm2, %%mm0        \n\t"\        "psraw $2, %%mm0            \n\t"/*((a-b)/4-b+c)/4 */\        "paddw %%mm6, %%mm2         \n\t"\        "paddw %%mm2, %%mm0         \n\t"/*(a-5*b+20*c)/16 +32 */\        "psraw $6, %%mm0            \n\t"\        "packuswb %%mm0, %%mm0      \n\t"\        OP(%%mm0, (%1),%%mm7, d)\        "add $24, %0                \n\t"\        "add %3, %1                 \n\t"\        "decl %2                    \n\t"\        " jnz 1b                    \n\t"\        : "+a"(tmp), "+c"(dst), "+m"(h)\        : "S"((long)dstStride), "m"(ff_pw_32)\        : "memory"\    );\}\\static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\    int h=8;\    asm volatile(\        "pxor %%mm7, %%mm7          \n\t"\        "movq %5, %%mm6             \n\t"\        "1:                         \n\t"\        "movq    (%0), %%mm0        \n\t"\        "movq   1(%0), %%mm2        \n\t"\        "movq %%mm0, %%mm1          \n\t"\        "movq %%mm2, %%mm3          \n\t"\        "punpcklbw %%mm7, %%mm0     \n\t"\        "punpckhbw %%mm7, %%mm1     \n\t"\        "punpcklbw %%mm7, %%mm2     \n\t"\        "punpckhbw %%mm7, %%mm3     \n\t"\        "paddw %%mm2, %%mm0         \n\t"\        "paddw %%mm3, %%mm1         \n\t"\        "psllw $2, %%mm0            \n\t"\        "psllw $2, %%mm1            \n\t"\        "movq   -1(%0), %%mm2       \n\t"\        "movq    2(%0), %%mm4       \n\t"\        "movq %%mm2, %%mm3          \n\t"\        "movq %%mm4, %%mm5          \n\t"\        "punpcklbw %%mm7, %%mm2     \n\t"\        "punpckhbw %%mm7, %%mm3     \n\t"\        "punpcklbw %%mm7, %%mm4     \n\t"\        "punpckhbw %%mm7, %%mm5     \n\t"\        "paddw %%mm4, %%mm2         \n\t"\        "paddw %%mm3, %%mm5         \n\t"\        "psubw %%mm2, %%mm0         \n\t"\        "psubw %%mm5, %%mm1         \n\t"\        "pmullw %%mm6, %%mm0        \n\t"\        "pmullw %%mm6, %%mm1        \n\t"\        "movd   -2(%0), %%mm2       \n\t"\        "movd    7(%0), %%mm5       \n\t"\        "punpcklbw %%mm7, %%mm2     \n\t"\        "punpcklbw %%mm7, %%mm5     \n\t"\        "paddw %%mm3, %%mm2         \n\t"\        "paddw %%mm5, %%mm4         \n\t"\        "movq %6, %%mm5             \n\t"\        "paddw %%mm5, %%mm2         \n\t"\        "paddw %%mm5, %%mm4         \n\t"\        "paddw %%mm2, %%mm0         \n\t"\        "paddw %%mm4, %%mm1         \n\t"\        "psraw $5, %%mm0            \n\t"\        "psraw $5, %%mm1            \n\t"\        "packuswb %%mm1, %%mm0      \n\t"\        OP(%%mm0, (%1),%%mm5, q)\        "add %3, %0                 \n\t"\        "add %4, %1                 \n\t"\        "decl %2                    \n\t"\        " jnz 1b                    \n\t"\        : "+a"(src), "+c"(dst), "+m"(h)\        : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\        : "memory"\    );\}\\static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\    int h=8;\    asm volatile(\        "pxor %%mm7, %%mm7          \n\t"\        "movq %0, %%mm6             \n\t"\        :: "m"(ff_pw_5)\    );\    do{\    asm volatile(\        "movq    (%0), %%mm0        \n\t"\        "movq   1(%0), %%mm2        \n\t"\        "movq %%mm0, %%mm1          \n\t"\        "movq %%mm2, %%mm3          \n\t"\        "punpcklbw %%mm7, %%mm0     \n\t"\        "punpckhbw %%mm7, %%mm1     \n\t"\        "punpcklbw %%mm7, %%mm2     \n\t"\        "punpckhbw %%mm7, %%mm3     \n\t"\        "paddw %%mm2, %%mm0         \n\t"\        "paddw %%mm3, %%mm1         \n\t"\        "psllw $2, %%mm0            \n\t"\        "psllw $2, %%mm1            \n\t"\        "movq   -1(%0), %%mm2       \n\t"\        "movq    2(%0), %%mm4       \n\t"\        "movq %%mm2, %%mm3          \n\t"\        "movq %%mm4, %%mm5          \n\t"\        "punpcklbw %%mm7, %%mm2     \n\t"\        "punpckhbw %%mm7, %%mm3     \n\t"\        "punpcklbw %%mm7, %%mm4     \n\t"\        "punpckhbw %%mm7, %%mm5     \n\t"\        "paddw %%mm4, %%mm2         \n\t"\        "paddw %%mm3, %%mm5         \n\t"\        "psubw %%mm2, %%mm0         \n\t"\        "psubw %%mm5, %%mm1         \n\t"\        "pmullw %%mm6, %%mm0        \n\t"\        "pmullw %%mm6, %%mm1        \n\t"\        "movd   -2(%0), %%mm2       \n\t"\        "movd    7(%0), %%mm5       \n\t"\        "punpcklbw %%mm7, %%mm2     \n\t"\        "punpcklbw %%mm7, %%mm5     \n\t"\        "paddw %%mm3, %%mm2         \n\t"\        "paddw %%mm5, %%mm4         \n\t"\        "movq %5, %%mm5             \n\t"\        "paddw %%mm5, %%mm2         \n\t"\        "paddw %%mm5, %%mm4         \n\t"\        "paddw %%mm2, %%mm0         \n\t"\        "paddw %%mm4, %%mm1         \n\t"\        "psraw $5, %%mm0            \n\t"\        "psraw $5, %%mm1            \n\t"\        "movq (%2), %%mm4           \n\t"\        "packuswb %%mm1, %%mm0      \n\t"\        PAVGB" %%mm4, %%mm0         \n\t"\        OP(%%mm0, (%1),%%mm5, q)\        "add %4, %0                 \n\t"\        "add %4, %1                 \n\t"\        "add %3, %2                 \n\t"\        : "+a"(src), "+c"(dst), "+d"(src2)\        : "D"((long)src2Stride), "S"((long)dstStride),\          "m"(ff_pw_16)\        : "memory"\    );\    }while(--h);\}\\static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\    int w= 2;\    src -= 2*srcStride;\    \    while(w--){\      asm volatile(\        "pxor %%mm7, %%mm7          \n\t"\        "movd (%0), %%mm0           \n\t"\        "add %2, %0                 \n\t"\        "movd (%0), %%mm1           \n\t"\        "add %2, %0                 \n\t"\        "movd (%0), %%mm2           \n\t"\        "add %2, %0                 \n\t"\        "movd (%0), %%mm3           \n\t"\        "add %2, %0                 \n\t"\        "movd (%0), %%mm4           \n\t"\        "add %2, %0                 \n\t"\        "punpcklbw %%mm7, %%mm0     \n\t"\        "punpcklbw %%mm7, %%mm1     \n\t"\        "punpcklbw %%mm7, %%mm2     \n\t"\        "punpcklbw %%mm7, %%mm3     \n\t"\        "punpcklbw %%mm7, %%mm4     \n\t"\        QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\        QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\        QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\        QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\        QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\        QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\        QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\        QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\         \        : "+a"(src), "+c"(dst)\        : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\        : "memory"\     );\     if(h==16){\        asm volatile(\            QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\            QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\            QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\            QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\            QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\            QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\            QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\            QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\            \           : "+a"(src), "+c"(dst)\           : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\           : "memory"\        );\     }\     src += 4-(h+5)*srcStride;\     dst += 4-h*dstStride;\   }\}\static av_noinline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\    int h = size;\    int w = (size+8)>>2;\    src -= 2*srcStride+2;\    while(w--){\        asm volatile(\            "pxor %%mm7, %%mm7      \n\t"\            "movd (%0), %%mm0       \n\t"\            "add %2, %0             \n\t"\            "movd (%0), %%mm1       \n\t"\            "add %2, %0             \n\t"\            "movd (%0), %%mm2       \n\t"\            "add %2, %0             \n\t"\            "movd (%0), %%mm3       \n\t"\            "add %2, %0             \n\t"\            "movd (%0), %%mm4       \n\t"\            "add %2, %0             \n\t"\            "punpcklbw %%mm7, %%mm0 \n\t"\            "punpcklbw %%mm7, %%mm1 \n\t"\            "punpcklbw %%mm7, %%mm2 \n\t"\            "punpcklbw %%mm7, %%mm3 \n\t"\            "punpcklbw %%mm7, %%mm4 \n\t"\            QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*48)\            QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*48)\            QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*48)\            QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*48)\            QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 4*48)\            QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 5*48)\            QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 6*48)\            QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 7*48)\            : "+a"(src)\            : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5)\            : "memory"\        );\        if(size==16){\            asm volatile(\                QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1,  8*48)\                QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2,  9*48)\                QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 10*48)\                QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 11*48)\                QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 12*48)\                QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 13*48)\                QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 14*48)\                QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 15*48)\                : "+a"(src)\                : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5)\                : "memory"\            );\        }\        tmp += 4;\        src += 4 - (size+5)*srcStride;\    }\    tmp -= size+8;\    w = size>>4;\    do{\    h = size;\    asm volatile(\        "movq %4, %%mm6             \n\t"\        "1:                         \n\t"\        "movq     (%0), %%mm0       \n\t"\        "movq    8(%0), %%mm3       \n\t"\        "movq    2(%0), %%mm1       \n\t"\        "movq   10(%0), %%mm4       \n\t"\        "paddw   %%mm4, %%mm0       \n\t"\        "paddw   %%mm3, %%mm1       \n\t"\        "paddw  18(%0), %%mm3       \n\t"\        "paddw  16(%0), %%mm4       \n\t"\        "movq    4(%0), %%mm2       \n\t"\        "movq   12(%0), %%mm5       \n\t"\        "paddw   6(%0), %%mm2       \n\t"\        "paddw  14(%0), %%mm5       \n\t"\        "psubw %%mm1, %%mm0         \n\t"\        "psubw %%mm4, %%mm3         \n\t"\        "psraw $2, %%mm0            \n\t"\        "psraw $2, %%mm3            \n\t"\        "psubw %%mm1, %%mm0         \n\t"\        "psubw %%mm4, %%mm3         \n\t"\        "paddsw %%mm2, %%mm0        \n\t"\        "paddsw %%mm5, %%mm3        \n\t"\        "psraw $2, %%mm0            \n\t"\        "psraw $2, %%mm3            \n\t"\        "paddw %%mm6, %%mm2         \n\t"\        "paddw %%mm6, %%mm5         \n\t"\        "paddw %%mm2, %%mm0         \n\t"\        "paddw %%mm5, %%mm3         \n\t"\        "psraw $6, %%mm0            \n\t"\        "psraw $6, %%mm3            \n\t"\        "packuswb %%mm3, %%mm0      \n\t"\        OP(%%mm0, (%1),%%mm7, q)\        "add $48, %0                \n\t"\        "add %3, %1                 \n\t"\        "decl %2                    \n\t"\        " jnz 1b                    \n\t"\        : "+a"(tmp), "+c"(dst), "+m"(h)\        : "S"((long)dstStride), "m"(ff_pw_32)\        : "memory"\    );\    tmp += 8 - size*24;\    dst += 8 - size*dstStride;\    }while(w--);\}\\

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -