📄 swscale_template.c
字号:
/*
Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
Modified to support multi-thread related features
by Haruhiko Yamagata <h.yamagata@nifty.com> in 2006.
This modification is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
*/
#include "asmalign.h"
#undef REAL_MOVNTQ
#undef MOVNTQ
#undef PAVGB
#undef PREFETCH
#undef PREFETCHW
#undef EMMS
#undef SFENCE
#ifdef HAVE_3DNOW
/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
#define EMMS "femms"
#else
#define EMMS "emms"
#endif
#ifdef HAVE_3DNOW
#define PREFETCH "prefetch"
#define PREFETCHW "prefetchw"
#elif defined ( HAVE_MMX2 )
#define PREFETCH "prefetchnta"
#define PREFETCHW "prefetcht0"
#else
#define PREFETCH "/nop"
#define PREFETCHW "/nop"
#endif
#ifdef HAVE_MMX2
#define SFENCE "sfence"
#else
#define SFENCE "/nop"
#endif
#ifdef HAVE_MMX2
#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
#elif defined (HAVE_3DNOW)
#define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
#endif
#ifdef HAVE_MMX2
#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
#else
#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
#endif
#define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
#ifdef HAVE_ALTIVEC
#include "swscale_altivec_template.c"
#endif
#define YSCALEYUV2YV12X(x, offset, dest, width) \
asm volatile(\
"xor %%"REG_a", %%"REG_a" \n\t"\
"movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
"movq %%mm3, %%mm4 \n\t"\
"lea " offset "(%0), %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
ASMALIGN16 /* FIXME Unroll? */\
"1: \n\t"\
"movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
"movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\
"movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm5\n\t" /* srcData */\
"add $16, %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"test %%"REG_S", %%"REG_S" \n\t"\
"pmulhw %%mm0, %%mm2 \n\t"\
"pmulhw %%mm0, %%mm5 \n\t"\
"paddw %%mm2, %%mm3 \n\t"\
"paddw %%mm5, %%mm4 \n\t"\
" jnz 1b \n\t"\
"psraw $3, %%mm3 \n\t"\
"psraw $3, %%mm4 \n\t"\
"packuswb %%mm4, %%mm3 \n\t"\
MOVNTQ(%%mm3, (%1, %%REGa))\
"add $8, %%"REG_a" \n\t"\
"cmp %2, %%"REG_a" \n\t"\
"movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
"movq %%mm3, %%mm4 \n\t"\
"lea " offset "(%0), %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"jb 1b \n\t"\
:: "r" (&c->redDither),\
"r" (dest), "p" (width)\
: "%"REG_a, "%"REG_d, "%"REG_S\
);
#define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
asm volatile(\
"lea " offset "(%0), %%"REG_d" \n\t"\
"xor %%"REG_a", %%"REG_a" \n\t"\
"pxor %%mm4, %%mm4 \n\t"\
"pxor %%mm5, %%mm5 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"pxor %%mm7, %%mm7 \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
ASMALIGN16 \
"1: \n\t"\
"movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm0\n\t" /* srcData */\
"movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\
"mov 4(%%"REG_d"), %%"REG_S" \n\t"\
"movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm1\n\t" /* srcData */\
"movq %%mm0, %%mm3 \n\t"\
"punpcklwd %%mm1, %%mm0 \n\t"\
"punpckhwd %%mm1, %%mm3 \n\t"\
"movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
"pmaddwd %%mm1, %%mm0 \n\t"\
"pmaddwd %%mm1, %%mm3 \n\t"\
"paddd %%mm0, %%mm4 \n\t"\
"paddd %%mm3, %%mm5 \n\t"\
"movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm3\n\t" /* srcData */\
"mov 16(%%"REG_d"), %%"REG_S" \n\t"\
"add $16, %%"REG_d" \n\t"\
"test %%"REG_S", %%"REG_S" \n\t"\
"movq %%mm2, %%mm0 \n\t"\
"punpcklwd %%mm3, %%mm2 \n\t"\
"punpckhwd %%mm3, %%mm0 \n\t"\
"pmaddwd %%mm1, %%mm2 \n\t"\
"pmaddwd %%mm1, %%mm0 \n\t"\
"paddd %%mm2, %%mm6 \n\t"\
"paddd %%mm0, %%mm7 \n\t"\
" jnz 1b \n\t"\
"psrad $16, %%mm4 \n\t"\
"psrad $16, %%mm5 \n\t"\
"psrad $16, %%mm6 \n\t"\
"psrad $16, %%mm7 \n\t"\
"movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
"packssdw %%mm5, %%mm4 \n\t"\
"packssdw %%mm7, %%mm6 \n\t"\
"paddw %%mm0, %%mm4 \n\t"\
"paddw %%mm0, %%mm6 \n\t"\
"psraw $3, %%mm4 \n\t"\
"psraw $3, %%mm6 \n\t"\
"packuswb %%mm6, %%mm4 \n\t"\
MOVNTQ(%%mm4, (%1, %%REGa))\
"add $8, %%"REG_a" \n\t"\
"cmp %2, %%"REG_a" \n\t"\
"lea " offset "(%0), %%"REG_d" \n\t"\
"pxor %%mm4, %%mm4 \n\t"\
"pxor %%mm5, %%mm5 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"pxor %%mm7, %%mm7 \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"jb 1b \n\t"\
:: "r" (&c->redDither),\
"r" (dest), "p" (width)\
: "%"REG_a, "%"REG_d, "%"REG_S\
);
#define YSCALEYUV2YV121 \
"mov %2, %%"REG_a" \n\t"\
ASMALIGN16 /* FIXME Unroll? */\
"1: \n\t"\
"movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
"movq 8(%0, %%"REG_a", 2), %%mm1\n\t"\
"psraw $7, %%mm0 \n\t"\
"psraw $7, %%mm1 \n\t"\
"packuswb %%mm1, %%mm0 \n\t"\
MOVNTQ(%%mm0, (%1, %%REGa))\
"add $8, %%"REG_a" \n\t"\
"jnc 1b \n\t"
/*
:: "m" (-lumFilterSize), "m" (-chrFilterSize),
"m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),
"r" (dest), "m" (dstW),
"m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
: "%eax", "%ebx", "%ecx", "%edx", "%esi"
*/
#define YSCALEYUV2PACKEDX \
asm volatile(\
"xor %%"REG_a", %%"REG_a" \n\t"\
ASMALIGN16\
"nop \n\t"\
"1: \n\t"\
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
"movq %%mm3, %%mm4 \n\t"\
ASMALIGN16\
"2: \n\t"\
"movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
"movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\
"movq 4096(%%"REG_S", %%"REG_a"), %%mm5 \n\t" /* VsrcData */\
"add $16, %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"pmulhw %%mm0, %%mm2 \n\t"\
"pmulhw %%mm0, %%mm5 \n\t"\
"paddw %%mm2, %%mm3 \n\t"\
"paddw %%mm5, %%mm4 \n\t"\
"test %%"REG_S", %%"REG_S" \n\t"\
" jnz 2b \n\t"\
\
"lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"movq "VROUNDER_OFFSET"(%0), %%mm1\n\t"\
"movq %%mm1, %%mm7 \n\t"\
ASMALIGN16\
"2: \n\t"\
"movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
"movq (%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y1srcData */\
"movq 8(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" /* Y2srcData */\
"add $16, %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"pmulhw %%mm0, %%mm2 \n\t"\
"pmulhw %%mm0, %%mm5 \n\t"\
"paddw %%mm2, %%mm1 \n\t"\
"paddw %%mm5, %%mm7 \n\t"\
"test %%"REG_S", %%"REG_S" \n\t"\
" jnz 2b \n\t"\
#define YSCALEYUV2PACKEDX_END\
:: "r" (&c->redDither), \
"m" (dummy), "m" (dummy), "m" (dummy),\
"r" (dest), "m" (dstW)\
: "%"REG_a, "%"REG_d, "%"REG_S\
);
#define YSCALEYUV2PACKEDX_ACCURATE \
asm volatile(\
"xor %%"REG_a", %%"REG_a" \n\t"\
ASMALIGN16\
"nop \n\t"\
"1: \n\t"\
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"pxor %%mm4, %%mm4 \n\t"\
"pxor %%mm5, %%mm5 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"pxor %%mm7, %%mm7 \n\t"\
ASMALIGN16\
"2: \n\t"\
"movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\
"movq 4096(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\
"mov 4(%%"REG_d"), %%"REG_S" \n\t"\
"movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\
"movq %%mm0, %%mm3 \n\t"\
"punpcklwd %%mm1, %%mm0 \n\t"\
"punpckhwd %%mm1, %%mm3 \n\t"\
"movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
"pmaddwd %%mm1, %%mm0 \n\t"\
"pmaddwd %%mm1, %%mm3 \n\t"\
"paddd %%mm0, %%mm4 \n\t"\
"paddd %%mm3, %%mm5 \n\t"\
"movq 4096(%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\
"mov 16(%%"REG_d"), %%"REG_S" \n\t"\
"add $16, %%"REG_d" \n\t"\
"test %%"REG_S", %%"REG_S" \n\t"\
"movq %%mm2, %%mm0 \n\t"\
"punpcklwd %%mm3, %%mm2 \n\t"\
"punpckhwd %%mm3, %%mm0 \n\t"\
"pmaddwd %%mm1, %%mm2 \n\t"\
"pmaddwd %%mm1, %%mm0 \n\t"\
"paddd %%mm2, %%mm6 \n\t"\
"paddd %%mm0, %%mm7 \n\t"\
" jnz 2b \n\t"\
"psrad $16, %%mm4 \n\t"\
"psrad $16, %%mm5 \n\t"\
"psrad $16, %%mm6 \n\t"\
"psrad $16, %%mm7 \n\t"\
"movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
"packssdw %%mm5, %%mm4 \n\t"\
"packssdw %%mm7, %%mm6 \n\t"\
"paddw %%mm0, %%mm4 \n\t"\
"paddw %%mm0, %%mm6 \n\t"\
"movq %%mm4, "U_TEMP"(%0) \n\t"\
"movq %%mm6, "V_TEMP"(%0) \n\t"\
\
"lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"pxor %%mm1, %%mm1 \n\t"\
"pxor %%mm5, %%mm5 \n\t"\
"pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
ASMALIGN16\
"2: \n\t"\
"movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\
"movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\
"mov 4(%%"REG_d"), %%"REG_S" \n\t"\
"movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\
"movq %%mm0, %%mm3 \n\t"\
"punpcklwd %%mm4, %%mm0 \n\t"\
"punpckhwd %%mm4, %%mm3 \n\t"\
"movq 8(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\
"pmaddwd %%mm4, %%mm0 \n\t"\
"pmaddwd %%mm4, %%mm3 \n\t"\
"paddd %%mm0, %%mm1 \n\t"\
"paddd %%mm3, %%mm5 \n\t"\
"movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\
"mov 16(%%"REG_d"), %%"REG_S" \n\t"\
"add $16, %%"REG_d" \n\t"\
"test %%"REG_S", %%"REG_S" \n\t"\
"movq %%mm2, %%mm0 \n\t"\
"punpcklwd %%mm3, %%mm2 \n\t"\
"punpckhwd %%mm3, %%mm0 \n\t"\
"pmaddwd %%mm4, %%mm2 \n\t"\
"pmaddwd %%mm4, %%mm0 \n\t"\
"paddd %%mm2, %%mm7 \n\t"\
"paddd %%mm0, %%mm6 \n\t"\
" jnz 2b \n\t"\
"psrad $16, %%mm1 \n\t"\
"psrad $16, %%mm5 \n\t"\
"psrad $16, %%mm7 \n\t"\
"psrad $16, %%mm6 \n\t"\
"movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
"packssdw %%mm5, %%mm1 \n\t"\
"packssdw %%mm6, %%mm7 \n\t"\
"paddw %%mm0, %%mm1 \n\t"\
"paddw %%mm0, %%mm7 \n\t"\
"movq "U_TEMP"(%0), %%mm3 \n\t"\
"movq "V_TEMP"(%0), %%mm4 \n\t"\
#define YSCALEYUV2RGBX \
"psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\
"psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\
"movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
"movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
"pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
"pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
/* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
"pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
"pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
"psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y-16)*/\
"psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y-16)*/\
"pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
"pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
"paddw %%mm3, %%mm4 \n\t"\
"movq %%mm2, %%mm0 \n\t"\
"movq %%mm5, %%mm6 \n\t"\
"movq %%mm4, %%mm3 \n\t"\
"punpcklwd %%mm2, %%mm2 \n\t"\
"punpcklwd %%mm5, %%mm5 \n\t"\
"punpcklwd %%mm4, %%mm4 \n\t"\
"paddw %%mm1, %%mm2 \n\t"\
"paddw %%mm1, %%mm5 \n\t"\
"paddw %%mm1, %%mm4 \n\t"\
"punpckhwd %%mm0, %%mm0 \n\t"\
"punpckhwd %%mm6, %%mm6 \n\t"\
"punpckhwd %%mm3, %%mm3 \n\t"\
"paddw %%mm7, %%mm0 \n\t"\
"paddw %%mm7, %%mm6 \n\t"\
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -