⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 swscale_template.c

📁 从FFMPEG转换而来的H264解码程序,VC下编译..
💻 C
📖 第 1 页 / 共 5 页
字号:
			"psllq $2, %%mm3		\n\t"\
			"psllq $2, %%mm4		\n\t"\
\
			"por %%mm3, %%mm2		\n\t"\
			"por %%mm4, %%mm1		\n\t"\
\
			MOVNTQ(%%mm2, (dst, index, 2))\
			MOVNTQ(%%mm1, 8(dst, index, 2))\
\
			"add $8, "#index"		\n\t"\
			"cmp "#dstw", "#index"		\n\t"\
			" jb 1b				\n\t"
#define WRITEBGR15(dst, dstw, index)  REAL_WRITEBGR15(dst, dstw, index)

#define WRITEBGR24OLD(dst, dstw, index) \
		/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
			"movq %%mm2, %%mm1		\n\t" /* B */\
			"movq %%mm5, %%mm6		\n\t" /* R */\
			"punpcklbw %%mm4, %%mm2		\n\t" /* GBGBGBGB 0 */\
			"punpcklbw %%mm7, %%mm5		\n\t" /* 0R0R0R0R 0 */\
			"punpckhbw %%mm4, %%mm1		\n\t" /* GBGBGBGB 2 */\
			"punpckhbw %%mm7, %%mm6		\n\t" /* 0R0R0R0R 2 */\
			"movq %%mm2, %%mm0		\n\t" /* GBGBGBGB 0 */\
			"movq %%mm1, %%mm3		\n\t" /* GBGBGBGB 2 */\
			"punpcklwd %%mm5, %%mm0		\n\t" /* 0RGB0RGB 0 */\
			"punpckhwd %%mm5, %%mm2		\n\t" /* 0RGB0RGB 1 */\
			"punpcklwd %%mm6, %%mm1		\n\t" /* 0RGB0RGB 2 */\
			"punpckhwd %%mm6, %%mm3		\n\t" /* 0RGB0RGB 3 */\
\
			"movq %%mm0, %%mm4		\n\t" /* 0RGB0RGB 0 */\
			"psrlq $8, %%mm0		\n\t" /* 00RGB0RG 0 */\
			"pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 0 */\
			"pand "MANGLE(bm11111000)", %%mm0\n\t" /* 00RGB000 0.5 */\
			"por %%mm4, %%mm0		\n\t" /* 00RGBRGB 0 */\
			"movq %%mm2, %%mm4		\n\t" /* 0RGB0RGB 1 */\
			"psllq $48, %%mm2		\n\t" /* GB000000 1 */\
			"por %%mm2, %%mm0		\n\t" /* GBRGBRGB 0 */\
\
			"movq %%mm4, %%mm2		\n\t" /* 0RGB0RGB 1 */\
			"psrld $16, %%mm4		\n\t" /* 000R000R 1 */\
			"psrlq $24, %%mm2		\n\t" /* 0000RGB0 1.5 */\
			"por %%mm4, %%mm2		\n\t" /* 000RRGBR 1 */\
			"pand "MANGLE(bm00001111)", %%mm2\n\t" /* 0000RGBR 1 */\
			"movq %%mm1, %%mm4		\n\t" /* 0RGB0RGB 2 */\
			"psrlq $8, %%mm1		\n\t" /* 00RGB0RG 2 */\
			"pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 2 */\
			"pand "MANGLE(bm11111000)", %%mm1\n\t" /* 00RGB000 2.5 */\
			"por %%mm4, %%mm1		\n\t" /* 00RGBRGB 2 */\
			"movq %%mm1, %%mm4		\n\t" /* 00RGBRGB 2 */\
			"psllq $32, %%mm1		\n\t" /* BRGB0000 2 */\
			"por %%mm1, %%mm2		\n\t" /* BRGBRGBR 1 */\
\
			"psrlq $32, %%mm4		\n\t" /* 000000RG 2.5 */\
			"movq %%mm3, %%mm5		\n\t" /* 0RGB0RGB 3 */\
			"psrlq $8, %%mm3		\n\t" /* 00RGB0RG 3 */\
			"pand "MANGLE(bm00000111)", %%mm5\n\t" /* 00000RGB 3 */\
			"pand "MANGLE(bm11111000)", %%mm3\n\t" /* 00RGB000 3.5 */\
			"por %%mm5, %%mm3		\n\t" /* 00RGBRGB 3 */\
			"psllq $16, %%mm3		\n\t" /* RGBRGB00 3 */\
			"por %%mm4, %%mm3		\n\t" /* RGBRGBRG 2.5 */\
\
			MOVNTQ(%%mm0, (dst))\
			MOVNTQ(%%mm2, 8(dst))\
			MOVNTQ(%%mm3, 16(dst))\
			"add $24, "#dst"		\n\t"\
\
			"add $8, "#index"		\n\t"\
			"cmp "#dstw", "#index"		\n\t"\
			" jb 1b				\n\t"

#define WRITEBGR24MMX(dst, dstw, index) \
		/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
			"movq %%mm2, %%mm1		\n\t" /* B */\
			"movq %%mm5, %%mm6		\n\t" /* R */\
			"punpcklbw %%mm4, %%mm2		\n\t" /* GBGBGBGB 0 */\
			"punpcklbw %%mm7, %%mm5		\n\t" /* 0R0R0R0R 0 */\
			"punpckhbw %%mm4, %%mm1		\n\t" /* GBGBGBGB 2 */\
			"punpckhbw %%mm7, %%mm6		\n\t" /* 0R0R0R0R 2 */\
			"movq %%mm2, %%mm0		\n\t" /* GBGBGBGB 0 */\
			"movq %%mm1, %%mm3		\n\t" /* GBGBGBGB 2 */\
			"punpcklwd %%mm5, %%mm0		\n\t" /* 0RGB0RGB 0 */\
			"punpckhwd %%mm5, %%mm2		\n\t" /* 0RGB0RGB 1 */\
			"punpcklwd %%mm6, %%mm1		\n\t" /* 0RGB0RGB 2 */\
			"punpckhwd %%mm6, %%mm3		\n\t" /* 0RGB0RGB 3 */\
\
			"movq %%mm0, %%mm4		\n\t" /* 0RGB0RGB 0 */\
			"movq %%mm2, %%mm6		\n\t" /* 0RGB0RGB 1 */\
			"movq %%mm1, %%mm5		\n\t" /* 0RGB0RGB 2 */\
			"movq %%mm3, %%mm7		\n\t" /* 0RGB0RGB 3 */\
\
			"psllq $40, %%mm0		\n\t" /* RGB00000 0 */\
			"psllq $40, %%mm2		\n\t" /* RGB00000 1 */\
			"psllq $40, %%mm1		\n\t" /* RGB00000 2 */\
			"psllq $40, %%mm3		\n\t" /* RGB00000 3 */\
\
			"punpckhdq %%mm4, %%mm0		\n\t" /* 0RGBRGB0 0 */\
			"punpckhdq %%mm6, %%mm2		\n\t" /* 0RGBRGB0 1 */\
			"punpckhdq %%mm5, %%mm1		\n\t" /* 0RGBRGB0 2 */\
			"punpckhdq %%mm7, %%mm3		\n\t" /* 0RGBRGB0 3 */\
\
			"psrlq $8, %%mm0		\n\t" /* 00RGBRGB 0 */\
			"movq %%mm2, %%mm6		\n\t" /* 0RGBRGB0 1 */\
			"psllq $40, %%mm2		\n\t" /* GB000000 1 */\
			"por %%mm2, %%mm0		\n\t" /* GBRGBRGB 0 */\
			MOVNTQ(%%mm0, (dst))\
\
			"psrlq $24, %%mm6		\n\t" /* 0000RGBR 1 */\
			"movq %%mm1, %%mm5		\n\t" /* 0RGBRGB0 2 */\
			"psllq $24, %%mm1		\n\t" /* BRGB0000 2 */\
			"por %%mm1, %%mm6		\n\t" /* BRGBRGBR 1 */\
			MOVNTQ(%%mm6, 8(dst))\
\
			"psrlq $40, %%mm5		\n\t" /* 000000RG 2 */\
			"psllq $8, %%mm3		\n\t" /* RGBRGB00 3 */\
			"por %%mm3, %%mm5		\n\t" /* RGBRGBRG 2 */\
			MOVNTQ(%%mm5, 16(dst))\
\
			"add $24, "#dst"		\n\t"\
\
			"add $8, "#index"			\n\t"\
			"cmp "#dstw", "#index"			\n\t"\
			" jb 1b				\n\t"

#define WRITEBGR24MMX2(dst, dstw, index) \
		/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
			"movq "MANGLE(M24A)", %%mm0	\n\t"\
			"movq "MANGLE(M24C)", %%mm7	\n\t"\
			"pshufw $0x50, %%mm2, %%mm1	\n\t" /* B3 B2 B3 B2  B1 B0 B1 B0 */\
			"pshufw $0x50, %%mm4, %%mm3	\n\t" /* G3 G2 G3 G2  G1 G0 G1 G0 */\
			"pshufw $0x00, %%mm5, %%mm6	\n\t" /* R1 R0 R1 R0  R1 R0 R1 R0 */\
\
			"pand %%mm0, %%mm1		\n\t" /*    B2        B1       B0 */\
			"pand %%mm0, %%mm3		\n\t" /*    G2        G1       G0 */\
			"pand %%mm7, %%mm6		\n\t" /*       R1        R0       */\
\
			"psllq $8, %%mm3		\n\t" /* G2        G1       G0    */\
			"por %%mm1, %%mm6		\n\t"\
			"por %%mm3, %%mm6		\n\t"\
			MOVNTQ(%%mm6, (dst))\
\
			"psrlq $8, %%mm4		\n\t" /* 00 G7 G6 G5  G4 G3 G2 G1 */\
			"pshufw $0xA5, %%mm2, %%mm1	\n\t" /* B5 B4 B5 B4  B3 B2 B3 B2 */\
			"pshufw $0x55, %%mm4, %%mm3	\n\t" /* G4 G3 G4 G3  G4 G3 G4 G3 */\
			"pshufw $0xA5, %%mm5, %%mm6	\n\t" /* R5 R4 R5 R4  R3 R2 R3 R2 */\
\
			"pand "MANGLE(M24B)", %%mm1	\n\t" /* B5       B4        B3    */\
			"pand %%mm7, %%mm3		\n\t" /*       G4        G3       */\
			"pand %%mm0, %%mm6		\n\t" /*    R4        R3       R2 */\
\
			"por %%mm1, %%mm3		\n\t" /* B5    G4 B4     G3 B3    */\
			"por %%mm3, %%mm6		\n\t"\
			MOVNTQ(%%mm6, 8(dst))\
\
			"pshufw $0xFF, %%mm2, %%mm1	\n\t" /* B7 B6 B7 B6  B7 B6 B6 B7 */\
			"pshufw $0xFA, %%mm4, %%mm3	\n\t" /* 00 G7 00 G7  G6 G5 G6 G5 */\
			"pshufw $0xFA, %%mm5, %%mm6	\n\t" /* R7 R6 R7 R6  R5 R4 R5 R4 */\
\
			"pand %%mm7, %%mm1		\n\t" /*       B7        B6       */\
			"pand %%mm0, %%mm3		\n\t" /*    G7        G6       G5 */\
			"pand "MANGLE(M24B)", %%mm6	\n\t" /* R7       R6        R5    */\
\
			"por %%mm1, %%mm3		\n\t"\
			"por %%mm3, %%mm6		\n\t"\
			MOVNTQ(%%mm6, 16(dst))\
\
			"add $24, "#dst"		\n\t"\
\
			"add $8, "#index"		\n\t"\
			"cmp "#dstw", "#index"		\n\t"\
			" jb 1b				\n\t"

#ifdef HAVE_MMX2
#undef WRITEBGR24
#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX2(dst, dstw, index)
#else
#undef WRITEBGR24
#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX(dst, dstw, index)
#endif

#define REAL_WRITEYUY2(dst, dstw, index) \
			"packuswb %%mm3, %%mm3		\n\t"\
			"packuswb %%mm4, %%mm4		\n\t"\
			"packuswb %%mm7, %%mm1		\n\t"\
			"punpcklbw %%mm4, %%mm3		\n\t"\
			"movq %%mm1, %%mm7		\n\t"\
			"punpcklbw %%mm3, %%mm1		\n\t"\
			"punpckhbw %%mm3, %%mm7		\n\t"\
\
			MOVNTQ(%%mm1, (dst, index, 2))\
			MOVNTQ(%%mm7, 8(dst, index, 2))\
\
			"add $8, "#index"		\n\t"\
			"cmp "#dstw", "#index"		\n\t"\
			" jb 1b				\n\t"
#define WRITEYUY2(dst, dstw, index)  REAL_WRITEYUY2(dst, dstw, index)


static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
				    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
				    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
{
#ifdef HAVE_MMX
        if(c->params.subsampling & SWS_ACCURATE_RND){
                if(uDest){
                        YSCALEYUV2YV12X_ACCURATE(   0, CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
                        YSCALEYUV2YV12X_ACCURATE(4096, CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
                }

                YSCALEYUV2YV12X_ACCURATE(0, LUM_MMX_FILTER_OFFSET, dest, dstW)
        }else{
                if(uDest){
                        YSCALEYUV2YV12X(   0, CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
                        YSCALEYUV2YV12X(4096, CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
                }

                YSCALEYUV2YV12X(0, LUM_MMX_FILTER_OFFSET, dest, dstW)
        }
#else
#ifdef HAVE_ALTIVEC
yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
		      chrFilter, chrSrc, chrFilterSize,
		      dest, uDest, vDest, dstW, chrDstW);
#else //HAVE_ALTIVEC
yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
	    chrFilter, chrSrc, chrFilterSize,
	    dest, uDest, vDest, dstW, chrDstW);
#endif //!HAVE_ALTIVEC
#endif
}

static inline void RENAME(yuv2nv12X)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
				     int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
				     uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
{
yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
	     chrFilter, chrSrc, chrFilterSize,
	     dest, uDest, dstW, chrDstW, dstFormat);
}

static inline void RENAME(yuv2yuv1)(int16_t *lumSrc, int16_t *chrSrc,
				    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
{
#ifdef HAVE_MMX
	if(uDest != NULL)
	{
		asm volatile(
				YSCALEYUV2YV121
				:: "r" (chrSrc + chrDstW), "r" (uDest + chrDstW),
				"g" (-chrDstW)
				: "%"REG_a
			);

		asm volatile(
				YSCALEYUV2YV121
				:: "r" (chrSrc + 2048 + chrDstW), "r" (vDest + chrDstW),
				"g" (-chrDstW)
				: "%"REG_a
			);
	}

	asm volatile(
		YSCALEYUV2YV121
		:: "r" (lumSrc + dstW), "r" (dest + dstW),
		"g" (-dstW)
		: "%"REG_a
	);
#else
	int i;
	for(i=0; i<dstW; i++)
	{
		int val= lumSrc[i]>>7;

		if(val&256){
			if(val<0) val=0;
			else      val=255;
		}

		dest[i]= val;
	}

	if(uDest != NULL)
		for(i=0; i<chrDstW; i++)
		{
			int u=chrSrc[i]>>7;
			int v=chrSrc[i + 2048]>>7;

			if((u|v)&256){
				if(u<0)         u=0;
				else if (u>255) u=255;
				if(v<0)         v=0;
				else if (v>255) v=255;
			}

			uDest[i]= u;
			vDest[i]= v;
		}
#endif
}


/**
 * vertical scale YV12 to RGB
 */
static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
				    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
			    uint8_t *dest, long dstW, long dstY)
{
	long dummy=0;
#ifdef HAVE_MMX
    if(c->params.subsampling & SWS_ACCURATE_RND){
                switch(c->dstFormat){
                case IMGFMT_BGR32:
                                YSCALEYUV2PACKEDX_ACCURATE
				YSCALEYUV2RGBX
				WRITEBGR32(%4, %5, %%REGa)

                                YSCALEYUV2PACKEDX_END
                        return;
                case IMGFMT_BGR24:
                                YSCALEYUV2PACKEDX_ACCURATE
				YSCALEYUV2RGBX
				"lea (%%"REG_a", %%"REG_a", 2), %%"REG_b"\n\t" //FIXME optimize
				"add %4, %%"REG_b"			\n\t"
				WRITEBGR24(%%REGb, %5, %%REGa)


			:: "r" (&c->redDither),
			   "m" (dummy), "m" (dummy), "m" (dummy),
			   "r" (dest), "m" (dstW)
			: "%"REG_a, "%"REG_b, "%"REG_d, "%"REG_S //FIXME ebx
			);
                        return;
                case IMGFMT_BGR15:
                                YSCALEYUV2PACKEDX_ACCURATE
				YSCALEYUV2RGBX
		/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
				"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
				"paddusb "MANGLE(g5Dither)", %%mm4\n\t"
				"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
#endif

				WRITEBGR15(%4, %5, %%REGa)
                                YSCALEYUV2PACKEDX_END
                        return;
                case IMGFMT_BGR16:
                                YSCALEYUV2PACKEDX_ACCURATE
				YSCALEYUV2RGBX
		/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
				"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
				"paddusb "MANGLE(g6Dither)", %%mm4\n\t"
				"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
#endif

				WRITEBGR16(%4, %5, %%REGa)
                                YSCALEYUV2PACKEDX_END
                        return;
                case IMGFMT_YUY2:
				YSCALEYUV2PACKEDX_ACCURATE
		/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */

				"psraw $3, %%mm3		\n\t"
				"psraw $3, %%mm4		\n\t"
				"psraw $3, %%mm1		\n\t"
				"psraw $3, %%mm7		\n\t"
				WRITEYUY2(%4, %5, %%REGa)
                                YSCALEYUV2PACKEDX_END
                        return;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -