⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 swscale_template.c

📁 从FFMPEG转换而来的H264解码程序,VC下编译..
💻 C
📖 第 1 页 / 共 5 页
字号:
/*
    Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/

/*
    Modified to support multi-thread related features
    by Haruhiko Yamagata <h.yamagata@nifty.com> in 2006.

    This modification is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
*/
#include "asmalign.h"

#undef REAL_MOVNTQ
#undef MOVNTQ
#undef PAVGB
#undef PREFETCH
#undef PREFETCHW
#undef EMMS
#undef SFENCE

#ifdef HAVE_3DNOW
/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
#define EMMS     "femms"
#else
#define EMMS     "emms"
#endif

#ifdef HAVE_3DNOW
#define PREFETCH  "prefetch"
#define PREFETCHW "prefetchw"
#elif defined ( HAVE_MMX2 )
#define PREFETCH "prefetchnta"
#define PREFETCHW "prefetcht0"
#else
#define PREFETCH "/nop"
#define PREFETCHW "/nop"
#endif

#ifdef HAVE_MMX2
#define SFENCE "sfence"
#else
#define SFENCE "/nop"
#endif

#ifdef HAVE_MMX2
#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
#elif defined (HAVE_3DNOW)
#define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
#endif

#ifdef HAVE_MMX2
#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
#else
#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
#endif
#define MOVNTQ(a,b)  REAL_MOVNTQ(a,b)

#ifdef HAVE_ALTIVEC
#include "swscale_altivec_template.c"
#endif

#define YSCALEYUV2YV12X(x, offset, dest, width) \
		asm volatile(\
			"xor %%"REG_a", %%"REG_a"	\n\t"\
			"movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
			"movq %%mm3, %%mm4		\n\t"\
			"lea " offset "(%0), %%"REG_d"	\n\t"\
			"mov (%%"REG_d"), %%"REG_S"	\n\t"\
			ASMALIGN16 /* FIXME Unroll? */\
			"1:				\n\t"\
			"movq 8(%%"REG_d"), %%mm0	\n\t" /* filterCoeff */\
			"movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\
			"movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm5\n\t" /* srcData */\
			"add $16, %%"REG_d"		\n\t"\
			"mov (%%"REG_d"), %%"REG_S"	\n\t"\
			"test %%"REG_S", %%"REG_S"	\n\t"\
			"pmulhw %%mm0, %%mm2		\n\t"\
			"pmulhw %%mm0, %%mm5		\n\t"\
			"paddw %%mm2, %%mm3		\n\t"\
			"paddw %%mm5, %%mm4		\n\t"\
			" jnz 1b			\n\t"\
			"psraw $3, %%mm3		\n\t"\
			"psraw $3, %%mm4		\n\t"\
			"packuswb %%mm4, %%mm3		\n\t"\
			MOVNTQ(%%mm3, (%1, %%REGa))\
			"add $8, %%"REG_a"		\n\t"\
			"cmp %2, %%"REG_a"		\n\t"\
			"movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
			"movq %%mm3, %%mm4		\n\t"\
			"lea " offset "(%0), %%"REG_d"	\n\t"\
			"mov (%%"REG_d"), %%"REG_S"	\n\t"\
			"jb 1b				\n\t"\
                        :: "r" (&c->redDither),\
                        "r" (dest), "p" (width)\
                        : "%"REG_a, "%"REG_d, "%"REG_S\
                );

#define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
		asm volatile(\
			"lea " offset "(%0), %%"REG_d"	\n\t"\
			"xor %%"REG_a", %%"REG_a"	\n\t"\
                        "pxor %%mm4, %%mm4              \n\t"\
                        "pxor %%mm5, %%mm5              \n\t"\
                        "pxor %%mm6, %%mm6              \n\t"\
                        "pxor %%mm7, %%mm7              \n\t"\
			"mov (%%"REG_d"), %%"REG_S"	\n\t"\
			ASMALIGN16 \
			"1:				\n\t"\
			"movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm0\n\t" /* srcData */\
			"movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\
			"mov 4(%%"REG_d"), %%"REG_S"	\n\t"\
			"movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm1\n\t" /* srcData */\
                        "movq %%mm0, %%mm3              \n\t"\
                        "punpcklwd %%mm1, %%mm0        \n\t"\
                        "punpckhwd %%mm1, %%mm3        \n\t"\
			"movq 8(%%"REG_d"), %%mm1	\n\t" /* filterCoeff */\
                        "pmaddwd %%mm1, %%mm0           \n\t"\
                        "pmaddwd %%mm1, %%mm3           \n\t"\
                        "paddd %%mm0, %%mm4             \n\t"\
                        "paddd %%mm3, %%mm5             \n\t"\
			"movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm3\n\t" /* srcData */\
			"mov 16(%%"REG_d"), %%"REG_S"	\n\t"\
			"add $16, %%"REG_d"		\n\t"\
                        "test %%"REG_S", %%"REG_S"      \n\t"\
                        "movq %%mm2, %%mm0              \n\t"\
                        "punpcklwd %%mm3, %%mm2        \n\t"\
                        "punpckhwd %%mm3, %%mm0        \n\t"\
                        "pmaddwd %%mm1, %%mm2           \n\t"\
                        "pmaddwd %%mm1, %%mm0           \n\t"\
                        "paddd %%mm2, %%mm6             \n\t"\
                        "paddd %%mm0, %%mm7             \n\t"\
			" jnz 1b			\n\t"\
			"psrad $16, %%mm4		\n\t"\
			"psrad $16, %%mm5		\n\t"\
			"psrad $16, %%mm6		\n\t"\
			"psrad $16, %%mm7		\n\t"\
			"movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
			"packssdw %%mm5, %%mm4		\n\t"\
			"packssdw %%mm7, %%mm6		\n\t"\
                        "paddw %%mm0, %%mm4             \n\t"\
                        "paddw %%mm0, %%mm6             \n\t"\
			"psraw $3, %%mm4		\n\t"\
			"psraw $3, %%mm6		\n\t"\
			"packuswb %%mm6, %%mm4		\n\t"\
			MOVNTQ(%%mm4, (%1, %%REGa))\
			"add $8, %%"REG_a"		\n\t"\
			"cmp %2, %%"REG_a"		\n\t"\
			"lea " offset "(%0), %%"REG_d"	\n\t"\
                        "pxor %%mm4, %%mm4              \n\t"\
                        "pxor %%mm5, %%mm5              \n\t"\
                        "pxor %%mm6, %%mm6              \n\t"\
                        "pxor %%mm7, %%mm7              \n\t"\
			"mov (%%"REG_d"), %%"REG_S"	\n\t"\
			"jb 1b				\n\t"\
                        :: "r" (&c->redDither),\
                        "r" (dest), "p" (width)\
                        : "%"REG_a, "%"REG_d, "%"REG_S\
                );

#define YSCALEYUV2YV121 \
			"mov %2, %%"REG_a"		\n\t"\
			ASMALIGN16 /* FIXME Unroll? */\
			"1:				\n\t"\
			"movq (%0, %%"REG_a", 2), %%mm0	\n\t"\
			"movq 8(%0, %%"REG_a", 2), %%mm1\n\t"\
			"psraw $7, %%mm0		\n\t"\
			"psraw $7, %%mm1		\n\t"\
			"packuswb %%mm1, %%mm0		\n\t"\
			MOVNTQ(%%mm0, (%1, %%REGa))\
			"add $8, %%"REG_a"		\n\t"\
			"jnc 1b				\n\t"

/*
			:: "m" (-lumFilterSize), "m" (-chrFilterSize),
			   "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),
			   "r" (dest), "m" (dstW),
			   "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
			: "%eax", "%ebx", "%ecx", "%edx", "%esi"
*/
#define YSCALEYUV2PACKEDX \
	asm volatile(\
		"xor %%"REG_a", %%"REG_a"	\n\t"\
		ASMALIGN16\
		"nop				\n\t"\
		"1:				\n\t"\
		"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
		"mov (%%"REG_d"), %%"REG_S"	\n\t"\
		"movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
		"movq %%mm3, %%mm4		\n\t"\
		ASMALIGN16\
		"2:				\n\t"\
		"movq 8(%%"REG_d"), %%mm0	\n\t" /* filterCoeff */\
		"movq (%%"REG_S", %%"REG_a"), %%mm2	\n\t" /* UsrcData */\
		"movq 4096(%%"REG_S", %%"REG_a"), %%mm5	\n\t" /* VsrcData */\
		"add $16, %%"REG_d"		\n\t"\
		"mov (%%"REG_d"), %%"REG_S"	\n\t"\
		"pmulhw %%mm0, %%mm2		\n\t"\
		"pmulhw %%mm0, %%mm5		\n\t"\
		"paddw %%mm2, %%mm3		\n\t"\
		"paddw %%mm5, %%mm4		\n\t"\
		"test %%"REG_S", %%"REG_S"	\n\t"\
		" jnz 2b			\n\t"\
\
		"lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
		"mov (%%"REG_d"), %%"REG_S"	\n\t"\
		"movq "VROUNDER_OFFSET"(%0), %%mm1\n\t"\
		"movq %%mm1, %%mm7		\n\t"\
		ASMALIGN16\
		"2:				\n\t"\
		"movq 8(%%"REG_d"), %%mm0	\n\t" /* filterCoeff */\
		"movq (%%"REG_S", %%"REG_a", 2), %%mm2	\n\t" /* Y1srcData */\
		"movq 8(%%"REG_S", %%"REG_a", 2), %%mm5	\n\t" /* Y2srcData */\
		"add $16, %%"REG_d"		\n\t"\
		"mov (%%"REG_d"), %%"REG_S"	\n\t"\
		"pmulhw %%mm0, %%mm2		\n\t"\
		"pmulhw %%mm0, %%mm5		\n\t"\
		"paddw %%mm2, %%mm1		\n\t"\
		"paddw %%mm5, %%mm7		\n\t"\
		"test %%"REG_S", %%"REG_S"	\n\t"\
		" jnz 2b			\n\t"\

#define YSCALEYUV2PACKEDX_END\
        :: "r" (&c->redDither), \
            "m" (dummy), "m" (dummy), "m" (dummy),\
            "r" (dest), "m" (dstW)\
        : "%"REG_a, "%"REG_d, "%"REG_S\
        );

#define YSCALEYUV2PACKEDX_ACCURATE \
	asm volatile(\
		"xor %%"REG_a", %%"REG_a"	\n\t"\
		ASMALIGN16\
		"nop				\n\t"\
		"1:				\n\t"\
		"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
		"mov (%%"REG_d"), %%"REG_S"	\n\t"\
                "pxor %%mm4, %%mm4              \n\t"\
                "pxor %%mm5, %%mm5              \n\t"\
                "pxor %%mm6, %%mm6              \n\t"\
                "pxor %%mm7, %%mm7              \n\t"\
		ASMALIGN16\
		"2:				\n\t"\
		"movq (%%"REG_S", %%"REG_a"), %%mm0	\n\t" /* UsrcData */\
		"movq 4096(%%"REG_S", %%"REG_a"), %%mm2	\n\t" /* VsrcData */\
		"mov 4(%%"REG_d"), %%"REG_S"	\n\t"\
		"movq (%%"REG_S", %%"REG_a"), %%mm1	\n\t" /* UsrcData */\
                "movq %%mm0, %%mm3              \n\t"\
                "punpcklwd %%mm1, %%mm0        \n\t"\
                "punpckhwd %%mm1, %%mm3        \n\t"\
                "movq 8(%%"REG_d"), %%mm1	\n\t" /* filterCoeff */\
                "pmaddwd %%mm1, %%mm0           \n\t"\
                "pmaddwd %%mm1, %%mm3           \n\t"\
                "paddd %%mm0, %%mm4             \n\t"\
                "paddd %%mm3, %%mm5             \n\t"\
		"movq 4096(%%"REG_S", %%"REG_a"), %%mm3	\n\t" /* VsrcData */\
                "mov 16(%%"REG_d"), %%"REG_S"	\n\t"\
		"add $16, %%"REG_d"		\n\t"\
                "test %%"REG_S", %%"REG_S"      \n\t"\
                "movq %%mm2, %%mm0              \n\t"\
                "punpcklwd %%mm3, %%mm2        \n\t"\
                "punpckhwd %%mm3, %%mm0        \n\t"\
                "pmaddwd %%mm1, %%mm2           \n\t"\
                "pmaddwd %%mm1, %%mm0           \n\t"\
                "paddd %%mm2, %%mm6             \n\t"\
                "paddd %%mm0, %%mm7             \n\t"\
		" jnz 2b			\n\t"\
                "psrad $16, %%mm4		\n\t"\
                "psrad $16, %%mm5		\n\t"\
                "psrad $16, %%mm6		\n\t"\
                "psrad $16, %%mm7		\n\t"\
                "movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
                "packssdw %%mm5, %%mm4		\n\t"\
                "packssdw %%mm7, %%mm6		\n\t"\
                "paddw %%mm0, %%mm4             \n\t"\
                "paddw %%mm0, %%mm6             \n\t"\
                "movq %%mm4, "U_TEMP"(%0)       \n\t"\
                "movq %%mm6, "V_TEMP"(%0)       \n\t"\
\
		"lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
		"mov (%%"REG_d"), %%"REG_S"	\n\t"\
                "pxor %%mm1, %%mm1              \n\t"\
                "pxor %%mm5, %%mm5              \n\t"\
                "pxor %%mm7, %%mm7              \n\t"\
                "pxor %%mm6, %%mm6              \n\t"\
		ASMALIGN16\
		"2:				\n\t"\
		"movq (%%"REG_S", %%"REG_a", 2), %%mm0	\n\t" /* Y1srcData */\
		"movq 8(%%"REG_S", %%"REG_a", 2), %%mm2	\n\t" /* Y2srcData */\
		"mov 4(%%"REG_d"), %%"REG_S"	\n\t"\
		"movq (%%"REG_S", %%"REG_a", 2), %%mm4	\n\t" /* Y1srcData */\
                "movq %%mm0, %%mm3              \n\t"\
                "punpcklwd %%mm4, %%mm0        \n\t"\
                "punpckhwd %%mm4, %%mm3        \n\t"\
                "movq 8(%%"REG_d"), %%mm4	\n\t" /* filterCoeff */\
                "pmaddwd %%mm4, %%mm0           \n\t"\
                "pmaddwd %%mm4, %%mm3           \n\t"\
                "paddd %%mm0, %%mm1             \n\t"\
                "paddd %%mm3, %%mm5             \n\t"\
		"movq 8(%%"REG_S", %%"REG_a", 2), %%mm3	\n\t" /* Y2srcData */\
                "mov 16(%%"REG_d"), %%"REG_S"	\n\t"\
		"add $16, %%"REG_d"		\n\t"\
                "test %%"REG_S", %%"REG_S"      \n\t"\
                "movq %%mm2, %%mm0              \n\t"\
                "punpcklwd %%mm3, %%mm2        \n\t"\
                "punpckhwd %%mm3, %%mm0        \n\t"\
                "pmaddwd %%mm4, %%mm2           \n\t"\
                "pmaddwd %%mm4, %%mm0           \n\t"\
                "paddd %%mm2, %%mm7             \n\t"\
                "paddd %%mm0, %%mm6             \n\t"\
		" jnz 2b			\n\t"\
                "psrad $16, %%mm1		\n\t"\
                "psrad $16, %%mm5		\n\t"\
                "psrad $16, %%mm7		\n\t"\
                "psrad $16, %%mm6		\n\t"\
                "movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
                "packssdw %%mm5, %%mm1		\n\t"\
                "packssdw %%mm6, %%mm7		\n\t"\
                "paddw %%mm0, %%mm1             \n\t"\
                "paddw %%mm0, %%mm7             \n\t"\
                "movq  "U_TEMP"(%0), %%mm3      \n\t"\
                "movq  "V_TEMP"(%0), %%mm4      \n\t"\

#define YSCALEYUV2RGBX \
		"psubw "U_OFFSET"(%0), %%mm3	\n\t" /* (U-128)8*/\
		"psubw "V_OFFSET"(%0), %%mm4	\n\t" /* (V-128)8*/\
		"movq %%mm3, %%mm2		\n\t" /* (U-128)8*/\
		"movq %%mm4, %%mm5		\n\t" /* (V-128)8*/\
		"pmulhw "UG_COEFF"(%0), %%mm3	\n\t"\
		"pmulhw "VG_COEFF"(%0), %%mm4	\n\t"\
	/* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
		"pmulhw "UB_COEFF"(%0), %%mm2	\n\t"\
		"pmulhw "VR_COEFF"(%0), %%mm5	\n\t"\
		"psubw "Y_OFFSET"(%0), %%mm1	\n\t" /* 8(Y-16)*/\
		"psubw "Y_OFFSET"(%0), %%mm7	\n\t" /* 8(Y-16)*/\
		"pmulhw "Y_COEFF"(%0), %%mm1	\n\t"\
		"pmulhw "Y_COEFF"(%0), %%mm7	\n\t"\
	/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
		"paddw %%mm3, %%mm4		\n\t"\
		"movq %%mm2, %%mm0		\n\t"\
		"movq %%mm5, %%mm6		\n\t"\
		"movq %%mm4, %%mm3		\n\t"\
		"punpcklwd %%mm2, %%mm2		\n\t"\
		"punpcklwd %%mm5, %%mm5		\n\t"\
		"punpcklwd %%mm4, %%mm4		\n\t"\
		"paddw %%mm1, %%mm2		\n\t"\
		"paddw %%mm1, %%mm5		\n\t"\
		"paddw %%mm1, %%mm4		\n\t"\
		"punpckhwd %%mm0, %%mm0		\n\t"\
		"punpckhwd %%mm6, %%mm6		\n\t"\
		"punpckhwd %%mm3, %%mm3		\n\t"\
		"paddw %%mm7, %%mm0		\n\t"\
		"paddw %%mm7, %%mm6		\n\t"\

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -