📄 swscale_template.c
字号:
/*
* Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* the C code (not assembly, mmx, ...) of this file can be used
* under the LGPL license too
*/
#undef REAL_MOVNTQ
#undef MOVNTQ
#undef PAVGB
#undef PREFETCH
#undef PREFETCHW
#undef EMMS
#undef SFENCE
#ifdef HAVE_3DNOW
/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
#define EMMS "femms"
#else
#define EMMS "emms"
#endif
#ifdef HAVE_3DNOW
#define PREFETCH "prefetch"
#define PREFETCHW "prefetchw"
#elif defined ( HAVE_MMX2 )
#define PREFETCH "prefetchnta"
#define PREFETCHW "prefetcht0"
#else
#define PREFETCH " # nop"
#define PREFETCHW " # nop"
#endif
#ifdef HAVE_MMX2
#define SFENCE "sfence"
#else
#define SFENCE " # nop"
#endif
#ifdef HAVE_MMX2
#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
#elif defined (HAVE_3DNOW)
#define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
#endif
#ifdef HAVE_MMX2
#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
#else
#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
#endif
#define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
#ifdef HAVE_ALTIVEC
#include "swscale_altivec_template.c"
#endif
#define YSCALEYUV2YV12X(x, offset, dest, width) \
asm volatile(\
"xor %%"REG_a", %%"REG_a" \n\t"\
"movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
"movq %%mm3, %%mm4 \n\t"\
"lea " offset "(%0), %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
ASMALIGN(4) /* FIXME Unroll? */\
"1: \n\t"\
"movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
"movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\
"movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" /* srcData */\
"add $16, %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"test %%"REG_S", %%"REG_S" \n\t"\
"pmulhw %%mm0, %%mm2 \n\t"\
"pmulhw %%mm0, %%mm5 \n\t"\
"paddw %%mm2, %%mm3 \n\t"\
"paddw %%mm5, %%mm4 \n\t"\
" jnz 1b \n\t"\
"psraw $3, %%mm3 \n\t"\
"psraw $3, %%mm4 \n\t"\
"packuswb %%mm4, %%mm3 \n\t"\
MOVNTQ(%%mm3, (%1, %%REGa))\
"add $8, %%"REG_a" \n\t"\
"cmp %2, %%"REG_a" \n\t"\
"movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
"movq %%mm3, %%mm4 \n\t"\
"lea " offset "(%0), %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"jb 1b \n\t"\
:: "r" (&c->redDither),\
"r" (dest), "g" (width)\
: "%"REG_a, "%"REG_d, "%"REG_S\
);
#define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
asm volatile(\
"lea " offset "(%0), %%"REG_d" \n\t"\
"xor %%"REG_a", %%"REG_a" \n\t"\
"pxor %%mm4, %%mm4 \n\t"\
"pxor %%mm5, %%mm5 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"pxor %%mm7, %%mm7 \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
ASMALIGN(4) \
"1: \n\t"\
"movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* srcData */\
"movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\
"mov 4(%%"REG_d"), %%"REG_S" \n\t"\
"movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm1 \n\t" /* srcData */\
"movq %%mm0, %%mm3 \n\t"\
"punpcklwd %%mm1, %%mm0 \n\t"\
"punpckhwd %%mm1, %%mm3 \n\t"\
"movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
"pmaddwd %%mm1, %%mm0 \n\t"\
"pmaddwd %%mm1, %%mm3 \n\t"\
"paddd %%mm0, %%mm4 \n\t"\
"paddd %%mm3, %%mm5 \n\t"\
"movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* srcData */\
"mov 16(%%"REG_d"), %%"REG_S" \n\t"\
"add $16, %%"REG_d" \n\t"\
"test %%"REG_S", %%"REG_S" \n\t"\
"movq %%mm2, %%mm0 \n\t"\
"punpcklwd %%mm3, %%mm2 \n\t"\
"punpckhwd %%mm3, %%mm0 \n\t"\
"pmaddwd %%mm1, %%mm2 \n\t"\
"pmaddwd %%mm1, %%mm0 \n\t"\
"paddd %%mm2, %%mm6 \n\t"\
"paddd %%mm0, %%mm7 \n\t"\
" jnz 1b \n\t"\
"psrad $16, %%mm4 \n\t"\
"psrad $16, %%mm5 \n\t"\
"psrad $16, %%mm6 \n\t"\
"psrad $16, %%mm7 \n\t"\
"movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
"packssdw %%mm5, %%mm4 \n\t"\
"packssdw %%mm7, %%mm6 \n\t"\
"paddw %%mm0, %%mm4 \n\t"\
"paddw %%mm0, %%mm6 \n\t"\
"psraw $3, %%mm4 \n\t"\
"psraw $3, %%mm6 \n\t"\
"packuswb %%mm6, %%mm4 \n\t"\
MOVNTQ(%%mm4, (%1, %%REGa))\
"add $8, %%"REG_a" \n\t"\
"cmp %2, %%"REG_a" \n\t"\
"lea " offset "(%0), %%"REG_d" \n\t"\
"pxor %%mm4, %%mm4 \n\t"\
"pxor %%mm5, %%mm5 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"pxor %%mm7, %%mm7 \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"jb 1b \n\t"\
:: "r" (&c->redDither),\
"r" (dest), "g" (width)\
: "%"REG_a, "%"REG_d, "%"REG_S\
);
#define YSCALEYUV2YV121 \
"mov %2, %%"REG_a" \n\t"\
ASMALIGN(4) /* FIXME Unroll? */\
"1: \n\t"\
"movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
"movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\
"psraw $7, %%mm0 \n\t"\
"psraw $7, %%mm1 \n\t"\
"packuswb %%mm1, %%mm0 \n\t"\
MOVNTQ(%%mm0, (%1, %%REGa))\
"add $8, %%"REG_a" \n\t"\
"jnc 1b \n\t"
/*
:: "m" (-lumFilterSize), "m" (-chrFilterSize),
"m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),
"r" (dest), "m" (dstW),
"m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
: "%eax", "%ebx", "%ecx", "%edx", "%esi"
*/
#define YSCALEYUV2PACKEDX \
asm volatile(\
"xor %%"REG_a", %%"REG_a" \n\t"\
ASMALIGN(4)\
"nop \n\t"\
"1: \n\t"\
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
"movq %%mm3, %%mm4 \n\t"\
ASMALIGN(4)\
"2: \n\t"\
"movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
"movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\
"movq 4096(%%"REG_S", %%"REG_a"), %%mm5 \n\t" /* VsrcData */\
"add $16, %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"pmulhw %%mm0, %%mm2 \n\t"\
"pmulhw %%mm0, %%mm5 \n\t"\
"paddw %%mm2, %%mm3 \n\t"\
"paddw %%mm5, %%mm4 \n\t"\
"test %%"REG_S", %%"REG_S" \n\t"\
" jnz 2b \n\t"\
\
"lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"movq "VROUNDER_OFFSET"(%0), %%mm1 \n\t"\
"movq %%mm1, %%mm7 \n\t"\
ASMALIGN(4)\
"2: \n\t"\
"movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
"movq (%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y1srcData */\
"movq 8(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" /* Y2srcData */\
"add $16, %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"pmulhw %%mm0, %%mm2 \n\t"\
"pmulhw %%mm0, %%mm5 \n\t"\
"paddw %%mm2, %%mm1 \n\t"\
"paddw %%mm5, %%mm7 \n\t"\
"test %%"REG_S", %%"REG_S" \n\t"\
" jnz 2b \n\t"\
#define YSCALEYUV2PACKEDX_END \
:: "r" (&c->redDither), \
"m" (dummy), "m" (dummy), "m" (dummy),\
"r" (dest), "m" (dstW) \
: "%"REG_a, "%"REG_d, "%"REG_S \
);
#define YSCALEYUV2PACKEDX_ACCURATE \
asm volatile(\
"xor %%"REG_a", %%"REG_a" \n\t"\
ASMALIGN(4)\
"nop \n\t"\
"1: \n\t"\
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"pxor %%mm4, %%mm4 \n\t"\
"pxor %%mm5, %%mm5 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"pxor %%mm7, %%mm7 \n\t"\
ASMALIGN(4)\
"2: \n\t"\
"movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\
"movq 4096(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\
"mov 4(%%"REG_d"), %%"REG_S" \n\t"\
"movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\
"movq %%mm0, %%mm3 \n\t"\
"punpcklwd %%mm1, %%mm0 \n\t"\
"punpckhwd %%mm1, %%mm3 \n\t"\
"movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
"pmaddwd %%mm1, %%mm0 \n\t"\
"pmaddwd %%mm1, %%mm3 \n\t"\
"paddd %%mm0, %%mm4 \n\t"\
"paddd %%mm3, %%mm5 \n\t"\
"movq 4096(%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\
"mov 16(%%"REG_d"), %%"REG_S" \n\t"\
"add $16, %%"REG_d" \n\t"\
"test %%"REG_S", %%"REG_S" \n\t"\
"movq %%mm2, %%mm0 \n\t"\
"punpcklwd %%mm3, %%mm2 \n\t"\
"punpckhwd %%mm3, %%mm0 \n\t"\
"pmaddwd %%mm1, %%mm2 \n\t"\
"pmaddwd %%mm1, %%mm0 \n\t"\
"paddd %%mm2, %%mm6 \n\t"\
"paddd %%mm0, %%mm7 \n\t"\
" jnz 2b \n\t"\
"psrad $16, %%mm4 \n\t"\
"psrad $16, %%mm5 \n\t"\
"psrad $16, %%mm6 \n\t"\
"psrad $16, %%mm7 \n\t"\
"movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
"packssdw %%mm5, %%mm4 \n\t"\
"packssdw %%mm7, %%mm6 \n\t"\
"paddw %%mm0, %%mm4 \n\t"\
"paddw %%mm0, %%mm6 \n\t"\
"movq %%mm4, "U_TEMP"(%0) \n\t"\
"movq %%mm6, "V_TEMP"(%0) \n\t"\
\
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -