⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 motion_est_mmx.c

📁 mplayer播放器的源码
💻 C
📖 第 1 页 / 共 2 页
字号:
/* * MMX optimized motion estimation * Copyright (c) 2001 Fabrice Bellard. * Copyright (c) 2002-2004 Michael Niedermayer * * mostly by Michael Niedermayer <michaelni@gmx.at> * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */#include "dsputil.h"#include "x86_cpu.h"static const __attribute__ ((aligned(8))) uint64_t round_tab[3]={0x0000000000000000ULL,0x0001000100010001ULL,0x0002000200020002ULL,};static attribute_used __attribute__ ((aligned(8))) uint64_t bone= 0x0101010101010101LL;static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h){    long len= -(stride*h);    asm volatile(        ASMALIGN(4)        "1:                             \n\t"        "movq (%1, %%"REG_a"), %%mm0    \n\t"        "movq (%2, %%"REG_a"), %%mm2    \n\t"        "movq (%2, %%"REG_a"), %%mm4    \n\t"        "add %3, %%"REG_a"              \n\t"        "psubusb %%mm0, %%mm2           \n\t"        "psubusb %%mm4, %%mm0           \n\t"        "movq (%1, %%"REG_a"), %%mm1    \n\t"        "movq (%2, %%"REG_a"), %%mm3    \n\t"        "movq (%2, %%"REG_a"), %%mm5    \n\t"        "psubusb %%mm1, %%mm3           \n\t"        "psubusb %%mm5, %%mm1           \n\t"        "por %%mm2, %%mm0               \n\t"        "por %%mm1, %%mm3               \n\t"        "movq %%mm0, %%mm1              \n\t"        "movq %%mm3, %%mm2              \n\t"        "punpcklbw %%mm7, %%mm0         \n\t"        "punpckhbw %%mm7, %%mm1         \n\t"        "punpcklbw %%mm7, %%mm3         \n\t"        "punpckhbw %%mm7, %%mm2         \n\t"        "paddw %%mm1, %%mm0             \n\t"        "paddw %%mm3, %%mm2             \n\t"        "paddw %%mm2, %%mm0             \n\t"        "paddw %%mm0, %%mm6             \n\t"        "add %3, %%"REG_a"              \n\t"        " js 1b                         \n\t"        : "+a" (len)        : "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride)    );}static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h){    asm volatile(        ASMALIGN(4)        "1:                             \n\t"        "movq (%1), %%mm0               \n\t"        "movq (%1, %3), %%mm1           \n\t"        "psadbw (%2), %%mm0             \n\t"        "psadbw (%2, %3), %%mm1         \n\t"        "paddw %%mm0, %%mm6             \n\t"        "paddw %%mm1, %%mm6             \n\t"        "lea (%1,%3,2), %1              \n\t"        "lea (%2,%3,2), %2              \n\t"        "sub $2, %0                     \n\t"        " jg 1b                         \n\t"        : "+r" (h), "+r" (blk1), "+r" (blk2)        : "r" ((long)stride)    );}static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h){    int ret;    asm volatile(        "pxor %%xmm6, %%xmm6            \n\t"        ASMALIGN(4)        "1:                             \n\t"        "movdqu (%1), %%xmm0            \n\t"        "movdqu (%1, %3), %%xmm1        \n\t"        "psadbw (%2), %%xmm0            \n\t"        "psadbw (%2, %3), %%xmm1        \n\t"        "paddw %%xmm0, %%xmm6           \n\t"        "paddw %%xmm1, %%xmm6           \n\t"        "lea (%1,%3,2), %1              \n\t"        "lea (%2,%3,2), %2              \n\t"        "sub $2, %0                     \n\t"        " jg 1b                         \n\t"        : "+r" (h), "+r" (blk1), "+r" (blk2)        : "r" ((long)stride)    );    asm volatile(        "movhlps %%xmm6, %%xmm0         \n\t"        "paddw   %%xmm0, %%xmm6         \n\t"        "movd    %%xmm6, %0             \n\t"        : "=r"(ret)    );    return ret;}static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h){    asm volatile(        ASMALIGN(4)        "1:                             \n\t"        "movq (%1), %%mm0               \n\t"        "movq (%1, %3), %%mm1           \n\t"        "pavgb 1(%1), %%mm0             \n\t"        "pavgb 1(%1, %3), %%mm1         \n\t"        "psadbw (%2), %%mm0             \n\t"        "psadbw (%2, %3), %%mm1         \n\t"        "paddw %%mm0, %%mm6             \n\t"        "paddw %%mm1, %%mm6             \n\t"        "lea (%1,%3,2), %1              \n\t"        "lea (%2,%3,2), %2              \n\t"        "sub $2, %0                     \n\t"        " jg 1b                         \n\t"        : "+r" (h), "+r" (blk1), "+r" (blk2)        : "r" ((long)stride)    );}static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h){    asm volatile(        "movq (%1), %%mm0               \n\t"        "add %3, %1                     \n\t"        ASMALIGN(4)        "1:                             \n\t"        "movq (%1), %%mm1               \n\t"        "movq (%1, %3), %%mm2           \n\t"        "pavgb %%mm1, %%mm0             \n\t"        "pavgb %%mm2, %%mm1             \n\t"        "psadbw (%2), %%mm0             \n\t"        "psadbw (%2, %3), %%mm1         \n\t"        "paddw %%mm0, %%mm6             \n\t"        "paddw %%mm1, %%mm6             \n\t"        "movq %%mm2, %%mm0              \n\t"        "lea (%1,%3,2), %1              \n\t"        "lea (%2,%3,2), %2              \n\t"        "sub $2, %0                     \n\t"        " jg 1b                         \n\t"        : "+r" (h), "+r" (blk1), "+r" (blk2)        : "r" ((long)stride)    );}static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h){    asm volatile(        "movq "MANGLE(bone)", %%mm5     \n\t"        "movq (%1), %%mm0               \n\t"        "pavgb 1(%1), %%mm0             \n\t"        "add %3, %1                     \n\t"        ASMALIGN(4)        "1:                             \n\t"        "movq (%1), %%mm1               \n\t"        "movq (%1,%3), %%mm2            \n\t"        "pavgb 1(%1), %%mm1             \n\t"        "pavgb 1(%1,%3), %%mm2          \n\t"        "psubusb %%mm5, %%mm1           \n\t"        "pavgb %%mm1, %%mm0             \n\t"        "pavgb %%mm2, %%mm1             \n\t"        "psadbw (%2), %%mm0             \n\t"        "psadbw (%2,%3), %%mm1          \n\t"        "paddw %%mm0, %%mm6             \n\t"        "paddw %%mm1, %%mm6             \n\t"        "movq %%mm2, %%mm0              \n\t"        "lea (%1,%3,2), %1              \n\t"        "lea (%2,%3,2), %2              \n\t"        "sub $2, %0                     \n\t"        " jg 1b                         \n\t"        : "+r" (h), "+r" (blk1), "+r" (blk2)        : "r" ((long)stride)    );}static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h){    long len= -(stride*h);    asm volatile(        ASMALIGN(4)        "1:                             \n\t"        "movq (%1, %%"REG_a"), %%mm0    \n\t"        "movq (%2, %%"REG_a"), %%mm1    \n\t"        "movq (%1, %%"REG_a"), %%mm2    \n\t"        "movq (%2, %%"REG_a"), %%mm3    \n\t"        "punpcklbw %%mm7, %%mm0         \n\t"        "punpcklbw %%mm7, %%mm1         \n\t"        "punpckhbw %%mm7, %%mm2         \n\t"        "punpckhbw %%mm7, %%mm3         \n\t"        "paddw %%mm0, %%mm1             \n\t"        "paddw %%mm2, %%mm3             \n\t"        "movq (%3, %%"REG_a"), %%mm4    \n\t"        "movq (%3, %%"REG_a"), %%mm2    \n\t"        "paddw %%mm5, %%mm1             \n\t"        "paddw %%mm5, %%mm3             \n\t"        "psrlw $1, %%mm1                \n\t"        "psrlw $1, %%mm3                \n\t"        "packuswb %%mm3, %%mm1          \n\t"        "psubusb %%mm1, %%mm4           \n\t"        "psubusb %%mm2, %%mm1           \n\t"        "por %%mm4, %%mm1               \n\t"        "movq %%mm1, %%mm0              \n\t"        "punpcklbw %%mm7, %%mm0         \n\t"        "punpckhbw %%mm7, %%mm1         \n\t"        "paddw %%mm1, %%mm0             \n\t"        "paddw %%mm0, %%mm6             \n\t"        "add %4, %%"REG_a"              \n\t"        " js 1b                         \n\t"        : "+a" (len)        : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -