⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 vc1dsp_mmx.c

📁 mediastreamer2是开源的网络传输媒体流的库
💻 C
📖 第 1 页 / 共 2 页
字号:
     "pmullw    %%mm5, %%mm2    \n\t" /* *53 */                 \     "paddw     %%mm1, %%mm3    \n\t" /* 4,53,18,-3 */          \     "paddw     %%mm2, %%mm4    \n\t" /* 4,53,18,-3 *//** * Macro to build the vertical 16bits version of vc1_put_shift[13]. * Here, offset=src_stride. Parameters passed A1 to A4 must use * %3 (src_stride) and %4 (3*src_stride). * * @param  NAME   Either 1 or 3 * @see MSPEL_FILTER13_CORE for information on A1->A4 */#define MSPEL_FILTER13_VER_16B(NAME, A1, A2, A3, A4)                    \static void                                                             \vc1_put_ver_16b_ ## NAME ## _mmx(int16_t *dst, const uint8_t *src,      \                                 long int src_stride,                   \                                 int rnd, int64_t shift)                \{                                                                       \    int h = 8;                                                          \    src -= src_stride;                                                  \    asm volatile(                                                       \        LOAD_ROUNDER_MMX("%5")                                          \        "movq      "MANGLE(ff_pw_53)", %%mm5\n\t"                       \        "movq      "MANGLE(ff_pw_18)", %%mm6\n\t"                       \        ASMALIGN(3)                                                     \        "1:                        \n\t"                                \        MSPEL_FILTER13_CORE(DO_UNPACK, "movd  1", A1, A2, A3, A4)       \        NORMALIZE_MMX("%6")                                             \        TRANSFER_DONT_PACK                                              \        /* Last 3 (in fact 4) bytes on the line */                      \        "movd      8+"A1", %%mm1   \n\t"                                \        DO_UNPACK("%%mm1")                                              \        "movq      %%mm1, %%mm3    \n\t"                                \        "paddw     %%mm1, %%mm1    \n\t"                                \        "paddw     %%mm3, %%mm1    \n\t" /* 3* */                       \        "movd      8+"A2", %%mm3   \n\t"                                \        DO_UNPACK("%%mm3")                                              \        "pmullw    %%mm6, %%mm3    \n\t" /* *18 */                      \        "psubw     %%mm1, %%mm3    \n\t" /*18,-3 */                     \        "movd      8+"A3", %%mm1   \n\t"                                \        DO_UNPACK("%%mm1")                                              \        "pmullw    %%mm5, %%mm1    \n\t" /* *53 */                      \        "paddw     %%mm1, %%mm3    \n\t" /*53,18,-3 */                  \        "movd      8+"A4", %%mm1   \n\t"                                \        DO_UNPACK("%%mm1")                                              \        "psllw     $2, %%mm1       \n\t" /* 4* */                       \        "psubw     %%mm1, %%mm3    \n\t"                                \        "paddw     %%mm7, %%mm3    \n\t"                                \        "psraw     %6, %%mm3       \n\t"                                \        "movq      %%mm3, 16(%2)   \n\t"                                \        "add       %3, %1          \n\t"                                \        "add       $24, %2         \n\t"                                \        "decl      %0              \n\t"                                \        "jnz 1b                    \n\t"                                \        : "+r"(h), "+r" (src),  "+r" (dst)                              \        : "r"(src_stride), "r"(3*src_stride),                           \          "m"(rnd), "m"(shift)                                          \        : "memory"                                                      \    );                                                                  \}/** * Macro to build the horizontal 16bits version of vc1_put_shift[13]. * Here, offset=16bits, so parameters passed A1 to A4 should be simple. * * @param  NAME   Either 1 or 3 * @see MSPEL_FILTER13_CORE for information on A1->A4 */#define MSPEL_FILTER13_HOR_16B(NAME, A1, A2, A3, A4)                    \static void                                                             \vc1_put_hor_16b_ ## NAME ## _mmx(uint8_t *dst, long int stride,         \                                 const int16_t *src, int rnd)           \{                                                                       \    int h = 8;                                                          \    src -= 1;                                                           \    rnd -= (-4+58+13-3)*256; /* Add -256 bias */                        \    asm volatile(                                                       \        LOAD_ROUNDER_MMX("%4")                                          \        "movq      "MANGLE(ff_pw_18)", %%mm6   \n\t"                    \        "movq      "MANGLE(ff_pw_53)", %%mm5   \n\t"                    \        ASMALIGN(3)                                                     \        "1:                        \n\t"                                \        MSPEL_FILTER13_CORE(DONT_UNPACK, "movq 2", A1, A2, A3, A4)      \        NORMALIZE_MMX("$7")                                             \        /* Remove bias */                                               \        "paddw     "MANGLE(ff_pw_128)", %%mm3  \n\t"                    \        "paddw     "MANGLE(ff_pw_128)", %%mm4  \n\t"                    \        TRANSFER_DO_PACK                                                \        "add       $24, %1         \n\t"                                \        "add       %3, %2          \n\t"                                \        "decl      %0              \n\t"                                \        "jnz 1b                    \n\t"                                \        : "+r"(h), "+r" (src),  "+r" (dst)                              \        : "r"(stride), "m"(rnd)                                         \        : "memory"                                                      \    );                                                                  \}/** * Macro to build the 8bits, any direction, version of vc1_put_shift[13]. * Here, offset=src_stride. Parameters passed A1 to A4 must use * %3 (offset) and %4 (3*offset). * * @param  NAME   Either 1 or 3 * @see MSPEL_FILTER13_CORE for information on A1->A4 */#define MSPEL_FILTER13_8B(NAME, A1, A2, A3, A4)                         \static void                                                             \vc1_put_## NAME ## _mmx(uint8_t *dst, const uint8_t *src,               \                        long int stride, int rnd, long int offset)      \{                                                                       \    int h = 8;                                                          \    src -= offset;                                                      \    rnd = 32-rnd;                                                       \    asm volatile (                                                      \        LOAD_ROUNDER_MMX("%6")                                          \        "movq      "MANGLE(ff_pw_53)", %%mm5       \n\t"                \        "movq      "MANGLE(ff_pw_18)", %%mm6       \n\t"                \        ASMALIGN(3)                                                     \        "1:                        \n\t"                                \        MSPEL_FILTER13_CORE(DO_UNPACK, "movd   1", A1, A2, A3, A4)      \        NORMALIZE_MMX("$6")                                             \        TRANSFER_DO_PACK                                                \        "add       %5, %1          \n\t"                                \        "add       %5, %2          \n\t"                                \        "decl      %0              \n\t"                                \        "jnz 1b                    \n\t"                                \        : "+r"(h), "+r" (src),  "+r" (dst)                              \        : "r"(offset), "r"(3*offset), "g"(stride), "m"(rnd)             \        : "memory"                                                      \    );                                                                  \}/** 1/4 shift bicubic interpolation */MSPEL_FILTER13_8B     (shift1, "0(%1,%4  )", "0(%1,%3,2)", "0(%1,%3  )", "0(%1     )")MSPEL_FILTER13_VER_16B(shift1, "0(%1,%4  )", "0(%1,%3,2)", "0(%1,%3  )", "0(%1     )")MSPEL_FILTER13_HOR_16B(shift1, "2*3(%1)", "2*2(%1)", "2*1(%1)", "2*0(%1)")/** 3/4 shift bicubic interpolation */MSPEL_FILTER13_8B     (shift3, "0(%1     )", "0(%1,%3  )", "0(%1,%3,2)", "0(%1,%4  )")MSPEL_FILTER13_VER_16B(shift3, "0(%1     )", "0(%1,%3  )", "0(%1,%3,2)", "0(%1,%4  )")MSPEL_FILTER13_HOR_16B(shift3, "2*0(%1)", "2*1(%1)", "2*2(%1)", "2*3(%1)")typedef void (*vc1_mspel_mc_filter_ver_16bits)(int16_t *dst, const uint8_t *src, long int src_stride, int rnd, int64_t shift);typedef void (*vc1_mspel_mc_filter_hor_16bits)(uint8_t *dst, long int dst_stride, const int16_t *src, int rnd);typedef void (*vc1_mspel_mc_filter_8bits)(uint8_t *dst, const uint8_t *src, long int stride, int rnd, long int offset);/** * Interpolates fractional pel values by applying proper vertical then * horizontal filter. * * @param  dst     Destination buffer for interpolated pels. * @param  src     Source buffer. * @param  stride  Stride for both src and dst buffers. * @param  hmode   Horizontal filter (expressed in quarter pixels shift). * @param  hmode   Vertical filter. * @param  rnd     Rounding bias. */static void vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride,                         int hmode, int vmode, int rnd){    static const vc1_mspel_mc_filter_ver_16bits vc1_put_shift_ver_16bits[] =         { NULL, vc1_put_ver_16b_shift1_mmx, vc1_put_ver_16b_shift2_mmx, vc1_put_ver_16b_shift3_mmx };    static const vc1_mspel_mc_filter_hor_16bits vc1_put_shift_hor_16bits[] =         { NULL, vc1_put_hor_16b_shift1_mmx, vc1_put_hor_16b_shift2_mmx, vc1_put_hor_16b_shift3_mmx };    static const vc1_mspel_mc_filter_8bits vc1_put_shift_8bits[] =         { NULL, vc1_put_shift1_mmx, vc1_put_shift2_mmx, vc1_put_shift3_mmx };    asm volatile(        "pxor %%mm0, %%mm0         \n\t"        ::: "memory"    );    if (vmode) { /* Vertical filter to apply */        if (hmode) { /* Horizontal filter to apply, output to tmp */            static const int shift_value[] = { 0, 5, 1, 5 };            int              shift = (shift_value[hmode]+shift_value[vmode])>>1;            int              r;            DECLARE_ALIGNED_16(int16_t, tmp[12*8]);            r = (1<<(shift-1)) + rnd-1;            vc1_put_shift_ver_16bits[vmode](tmp, src-1, stride, r, shift);            vc1_put_shift_hor_16bits[hmode](dst, stride, tmp+1, 64-rnd);            return;        }        else { /* No horizontal filter, output 8 lines to dst */            vc1_put_shift_8bits[vmode](dst, src, stride, 1-rnd, stride);            return;        }    }    /* Horizontal mode with no vertical mode */    vc1_put_shift_8bits[hmode](dst, src, stride, rnd, 1);}void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd);/** Macro to ease bicubic filter interpolation functions declarations */#define DECLARE_FUNCTION(a, b)                                          \static void put_vc1_mspel_mc ## a ## b ## _mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \     vc1_mspel_mc(dst, src, stride, a, b, rnd);                         \}DECLARE_FUNCTION(0, 1)DECLARE_FUNCTION(0, 2)DECLARE_FUNCTION(0, 3)DECLARE_FUNCTION(1, 0)DECLARE_FUNCTION(1, 1)DECLARE_FUNCTION(1, 2)DECLARE_FUNCTION(1, 3)DECLARE_FUNCTION(2, 0)DECLARE_FUNCTION(2, 1)DECLARE_FUNCTION(2, 2)DECLARE_FUNCTION(2, 3)DECLARE_FUNCTION(3, 0)DECLARE_FUNCTION(3, 1)DECLARE_FUNCTION(3, 2)DECLARE_FUNCTION(3, 3)void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx) {    dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_mmx;    dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_mmx;    dsp->put_vc1_mspel_pixels_tab[ 8] = put_vc1_mspel_mc02_mmx;    dsp->put_vc1_mspel_pixels_tab[12] = put_vc1_mspel_mc03_mmx;    dsp->put_vc1_mspel_pixels_tab[ 1] = put_vc1_mspel_mc10_mmx;    dsp->put_vc1_mspel_pixels_tab[ 5] = put_vc1_mspel_mc11_mmx;    dsp->put_vc1_mspel_pixels_tab[ 9] = put_vc1_mspel_mc12_mmx;    dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_mmx;    dsp->put_vc1_mspel_pixels_tab[ 2] = put_vc1_mspel_mc20_mmx;    dsp->put_vc1_mspel_pixels_tab[ 6] = put_vc1_mspel_mc21_mmx;    dsp->put_vc1_mspel_pixels_tab[10] = put_vc1_mspel_mc22_mmx;    dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_mmx;    dsp->put_vc1_mspel_pixels_tab[ 3] = put_vc1_mspel_mc30_mmx;    dsp->put_vc1_mspel_pixels_tab[ 7] = put_vc1_mspel_mc31_mmx;    dsp->put_vc1_mspel_pixels_tab[11] = put_vc1_mspel_mc32_mmx;    dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_mmx;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -