⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dsputil_mmx.c

📁 ffmpeg移植到symbian的全部源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
    MOVQ_ZERO(mm7);    i = 4;    do {        asm volatile(                "movq   (%2), %%mm0     \n\t"                "movq   8(%2), %%mm1    \n\t"                "movq   16(%2), %%mm2   \n\t"                "movq   24(%2), %%mm3   \n\t"                "movq   %0, %%mm4       \n\t"                "movq   %1, %%mm6       \n\t"                "movq   %%mm4, %%mm5    \n\t"                "punpcklbw %%mm7, %%mm4 \n\t"                "punpckhbw %%mm7, %%mm5 \n\t"                "paddsw %%mm4, %%mm0    \n\t"                "paddsw %%mm5, %%mm1    \n\t"                "movq   %%mm6, %%mm5    \n\t"                "punpcklbw %%mm7, %%mm6 \n\t"                "punpckhbw %%mm7, %%mm5 \n\t"                "paddsw %%mm6, %%mm2    \n\t"                "paddsw %%mm5, %%mm3    \n\t"                "packuswb %%mm1, %%mm0  \n\t"                "packuswb %%mm3, %%mm2  \n\t"                "movq   %%mm0, %0       \n\t"                "movq   %%mm2, %1       \n\t"                :"+m"(*pix), "+m"(*(pix+line_size))                :"r"(p)                :"memory");        pix += line_size*2;        p += 16;    } while (--i);}static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h){    asm volatile(         "lea (%3, %3), %%"REG_a"       \n\t"         ASMALIGN(3)         "1:                            \n\t"         "movd (%1), %%mm0              \n\t"         "movd (%1, %3), %%mm1          \n\t"         "movd %%mm0, (%2)              \n\t"         "movd %%mm1, (%2, %3)          \n\t"         "add %%"REG_a", %1             \n\t"         "add %%"REG_a", %2             \n\t"         "movd (%1), %%mm0              \n\t"         "movd (%1, %3), %%mm1          \n\t"         "movd %%mm0, (%2)              \n\t"         "movd %%mm1, (%2, %3)          \n\t"         "add %%"REG_a", %1             \n\t"         "add %%"REG_a", %2             \n\t"         "subl $4, %0                   \n\t"         "jnz 1b                        \n\t"         : "+g"(h), "+r" (pixels),  "+r" (block)         : "r"((x86_reg)line_size)         : "%"REG_a, "memory"        );}static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h){    asm volatile(         "lea (%3, %3), %%"REG_a"       \n\t"         ASMALIGN(3)         "1:                            \n\t"         "movq (%1), %%mm0              \n\t"         "movq (%1, %3), %%mm1          \n\t"         "movq %%mm0, (%2)              \n\t"         "movq %%mm1, (%2, %3)          \n\t"         "add %%"REG_a", %1             \n\t"         "add %%"REG_a", %2             \n\t"         "movq (%1), %%mm0              \n\t"         "movq (%1, %3), %%mm1          \n\t"         "movq %%mm0, (%2)              \n\t"         "movq %%mm1, (%2, %3)          \n\t"         "add %%"REG_a", %1             \n\t"         "add %%"REG_a", %2             \n\t"         "subl $4, %0                   \n\t"         "jnz 1b                        \n\t"         : "+g"(h), "+r" (pixels),  "+r" (block)         : "r"((x86_reg)line_size)         : "%"REG_a, "memory"        );}static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h){    asm volatile(         "lea (%3, %3), %%"REG_a"       \n\t"         ASMALIGN(3)         "1:                            \n\t"         "movq (%1), %%mm0              \n\t"         "movq 8(%1), %%mm4             \n\t"         "movq (%1, %3), %%mm1          \n\t"         "movq 8(%1, %3), %%mm5         \n\t"         "movq %%mm0, (%2)              \n\t"         "movq %%mm4, 8(%2)             \n\t"         "movq %%mm1, (%2, %3)          \n\t"         "movq %%mm5, 8(%2, %3)         \n\t"         "add %%"REG_a", %1             \n\t"         "add %%"REG_a", %2             \n\t"         "movq (%1), %%mm0              \n\t"         "movq 8(%1), %%mm4             \n\t"         "movq (%1, %3), %%mm1          \n\t"         "movq 8(%1, %3), %%mm5         \n\t"         "movq %%mm0, (%2)              \n\t"         "movq %%mm4, 8(%2)             \n\t"         "movq %%mm1, (%2, %3)          \n\t"         "movq %%mm5, 8(%2, %3)         \n\t"         "add %%"REG_a", %1             \n\t"         "add %%"REG_a", %2             \n\t"         "subl $4, %0                   \n\t"         "jnz 1b                        \n\t"         : "+g"(h), "+r" (pixels),  "+r" (block)         : "r"((x86_reg)line_size)         : "%"REG_a, "memory"        );}static void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h){    asm volatile(         "1:                            \n\t"         "movdqu (%1), %%xmm0           \n\t"         "movdqu (%1,%3), %%xmm1        \n\t"         "movdqu (%1,%3,2), %%xmm2      \n\t"         "movdqu (%1,%4), %%xmm3        \n\t"         "movdqa %%xmm0, (%2)           \n\t"         "movdqa %%xmm1, (%2,%3)        \n\t"         "movdqa %%xmm2, (%2,%3,2)      \n\t"         "movdqa %%xmm3, (%2,%4)        \n\t"         "subl $4, %0                   \n\t"         "lea (%1,%3,4), %1             \n\t"         "lea (%2,%3,4), %2             \n\t"         "jnz 1b                        \n\t"         : "+g"(h), "+r" (pixels),  "+r" (block)         : "r"((x86_reg)line_size), "r"((x86_reg)3L*line_size)         : "memory"        );}static void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h){    asm volatile(         "1:                            \n\t"         "movdqu (%1), %%xmm0           \n\t"         "movdqu (%1,%3), %%xmm1        \n\t"         "movdqu (%1,%3,2), %%xmm2      \n\t"         "movdqu (%1,%4), %%xmm3        \n\t"         "pavgb  (%2), %%xmm0           \n\t"         "pavgb  (%2,%3), %%xmm1        \n\t"         "pavgb  (%2,%3,2), %%xmm2      \n\t"         "pavgb  (%2,%4), %%xmm3        \n\t"         "movdqa %%xmm0, (%2)           \n\t"         "movdqa %%xmm1, (%2,%3)        \n\t"         "movdqa %%xmm2, (%2,%3,2)      \n\t"         "movdqa %%xmm3, (%2,%4)        \n\t"         "subl $4, %0                   \n\t"         "lea (%1,%3,4), %1             \n\t"         "lea (%2,%3,4), %2             \n\t"         "jnz 1b                        \n\t"         : "+g"(h), "+r" (pixels),  "+r" (block)         : "r"((x86_reg)line_size), "r"((x86_reg)3L*line_size)         : "memory"        );}static void clear_blocks_mmx(DCTELEM *blocks){    asm volatile(                "pxor %%mm7, %%mm7              \n\t"                "mov $-128*6, %%"REG_a"         \n\t"                "1:                             \n\t"                "movq %%mm7, (%0, %%"REG_a")    \n\t"                "movq %%mm7, 8(%0, %%"REG_a")   \n\t"                "movq %%mm7, 16(%0, %%"REG_a")  \n\t"                "movq %%mm7, 24(%0, %%"REG_a")  \n\t"                "add $32, %%"REG_a"             \n\t"                " js 1b                         \n\t"                : : "r" (((uint8_t *)blocks)+128*6)                : "%"REG_a        );}static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){    x86_reg i=0;    asm volatile(        "jmp 2f                         \n\t"        "1:                             \n\t"        "movq  (%1, %0), %%mm0          \n\t"        "movq  (%2, %0), %%mm1          \n\t"        "paddb %%mm0, %%mm1             \n\t"        "movq %%mm1, (%2, %0)           \n\t"        "movq 8(%1, %0), %%mm0          \n\t"        "movq 8(%2, %0), %%mm1          \n\t"        "paddb %%mm0, %%mm1             \n\t"        "movq %%mm1, 8(%2, %0)          \n\t"        "add $16, %0                    \n\t"        "2:                             \n\t"        "cmp %3, %0                     \n\t"        " js 1b                         \n\t"        : "+r" (i)        : "r"(src), "r"(dst), "r"((x86_reg)w-15)    );    for(; i<w; i++)        dst[i+0] += src[i+0];}static void add_bytes_l2_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){    x86_reg i=0;    asm volatile(        "jmp 2f                         \n\t"        "1:                             \n\t"        "movq   (%2, %0), %%mm0         \n\t"        "movq  8(%2, %0), %%mm1         \n\t"        "paddb  (%3, %0), %%mm0         \n\t"        "paddb 8(%3, %0), %%mm1         \n\t"        "movq %%mm0,  (%1, %0)          \n\t"        "movq %%mm1, 8(%1, %0)          \n\t"        "add $16, %0                    \n\t"        "2:                             \n\t"        "cmp %4, %0                     \n\t"        " js 1b                         \n\t"        : "+r" (i)        : "r"(dst), "r"(src1), "r"(src2), "r"((x86_reg)w-15)    );    for(; i<w; i++)        dst[i] = src1[i] + src2[i];}#define H263_LOOP_FILTER \        "pxor %%mm7, %%mm7              \n\t"\        "movq  %0, %%mm0                \n\t"\        "movq  %0, %%mm1                \n\t"\        "movq  %3, %%mm2                \n\t"\        "movq  %3, %%mm3                \n\t"\        "punpcklbw %%mm7, %%mm0         \n\t"\        "punpckhbw %%mm7, %%mm1         \n\t"\        "punpcklbw %%mm7, %%mm2         \n\t"\        "punpckhbw %%mm7, %%mm3         \n\t"\        "psubw %%mm2, %%mm0             \n\t"\        "psubw %%mm3, %%mm1             \n\t"\        "movq  %1, %%mm2                \n\t"\        "movq  %1, %%mm3                \n\t"\        "movq  %2, %%mm4                \n\t"\        "movq  %2, %%mm5                \n\t"\        "punpcklbw %%mm7, %%mm2         \n\t"\        "punpckhbw %%mm7, %%mm3         \n\t"\        "punpcklbw %%mm7, %%mm4         \n\t"\        "punpckhbw %%mm7, %%mm5         \n\t"\        "psubw %%mm2, %%mm4             \n\t"\        "psubw %%mm3, %%mm5             \n\t"\        "psllw $2, %%mm4                \n\t"\        "psllw $2, %%mm5                \n\t"\        "paddw %%mm0, %%mm4             \n\t"\        "paddw %%mm1, %%mm5             \n\t"\        "pxor %%mm6, %%mm6              \n\t"\        "pcmpgtw %%mm4, %%mm6           \n\t"\        "pcmpgtw %%mm5, %%mm7           \n\t"\        "pxor %%mm6, %%mm4              \n\t"\        "pxor %%mm7, %%mm5              \n\t"\        "psubw %%mm6, %%mm4             \n\t"\        "psubw %%mm7, %%mm5             \n\t"\        "psrlw $3, %%mm4                \n\t"\        "psrlw $3, %%mm5                \n\t"\        "packuswb %%mm5, %%mm4          \n\t"\        "packsswb %%mm7, %%mm6          \n\t"\        "pxor %%mm7, %%mm7              \n\t"\        "movd %4, %%mm2                 \n\t"\        "punpcklbw %%mm2, %%mm2         \n\t"\        "punpcklbw %%mm2, %%mm2         \n\t"\        "punpcklbw %%mm2, %%mm2         \n\t"\        "psubusb %%mm4, %%mm2           \n\t"\        "movq %%mm2, %%mm3              \n\t"\        "psubusb %%mm4, %%mm3           \n\t"\        "psubb %%mm3, %%mm2             \n\t"\        "movq %1, %%mm3                 \n\t"\        "movq %2, %%mm4                 \n\t"\        "pxor %%mm6, %%mm3              \n\t"\        "pxor %%mm6, %%mm4              \n\t"\        "paddusb %%mm2, %%mm3           \n\t"\        "psubusb %%mm2, %%mm4           \n\t"\        "pxor %%mm6, %%mm3              \n\t"\        "pxor %%mm6, %%mm4              \n\t"\        "paddusb %%mm2, %%mm2           \n\t"\        "packsswb %%mm1, %%mm0          \n\t"\        "pcmpgtb %%mm0, %%mm7           \n\t"\        "pxor %%mm7, %%mm0              \n\t"\        "psubb %%mm7, %%mm0             \n\t"\        "movq %%mm0, %%mm1              \n\t"\        "psubusb %%mm2, %%mm0           \n\t"\        "psubb %%mm0, %%mm1             \n\t"\        "pand %5, %%mm1                 \n\t"\        "psrlw $2, %%mm1                \n\t"\        "pxor %%mm7, %%mm1              \n\t"\        "psubb %%mm7, %%mm1             \n\t"\        "movq %0, %%mm5                 \n\t"\        "movq %3, %%mm6                 \n\t"\        "psubb %%mm1, %%mm5             \n\t"\

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -