📄 rgb2rgb_template.c

📁 从FFMPEG转换而来的H264解码程序,VC下编译..
💻 C
📖 第 1 页 / 共 5 页
字号:
		"movq %3, %%mm5			\n\t"
		"movq %4, %%mm6			\n\t"
		"movq %5, %%mm7			\n\t"
		".balign 16			\n\t"
		"1:				\n\t"
		PREFETCH" 32(%1)		\n\t"
		"movd	(%1), %%mm0		\n\t"
		"movd	4(%1), %%mm3		\n\t"
		"punpckldq 8(%1), %%mm0		\n\t"
		"punpckldq 12(%1), %%mm3	\n\t"
		"movq %%mm0, %%mm1		\n\t"
		"movq %%mm3, %%mm4		\n\t"
		"pand %%mm6, %%mm0		\n\t"
		"pand %%mm6, %%mm3		\n\t"
		"pmaddwd %%mm7, %%mm0		\n\t"
		"pmaddwd %%mm7, %%mm3		\n\t"
		"pand %%mm5, %%mm1		\n\t"
		"pand %%mm5, %%mm4		\n\t"
		"por %%mm1, %%mm0		\n\t"
		"por %%mm4, %%mm3		\n\t"
		"psrld $6, %%mm0		\n\t"
		"pslld $10, %%mm3		\n\t"
		"por %%mm3, %%mm0		\n\t"
		MOVNTQ"	%%mm0, (%0)		\n\t"
		"add $16, %1			\n\t"
		"add $8, %0			\n\t"
		"cmp %2, %1			\n\t"
		" jb 1b				\n\t"
		: "+r" (d), "+r"(s)
		: "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215)
	);
#else
	__asm __volatile(PREFETCH"	%0"::"m"(*src):"memory");
	__asm __volatile(
	    "movq	%0, %%mm7\n\t"
	    "movq	%1, %%mm6\n\t"
	    ::"m"(red_15mask),"m"(green_15mask));
	while(s < mm_end)
	{
	    __asm __volatile(
		PREFETCH" 32%1\n\t"
		"movd	%1, %%mm0\n\t"
		"movd	4%1, %%mm3\n\t"
		"punpckldq 8%1, %%mm0\n\t"
		"punpckldq 12%1, %%mm3\n\t"
		"movq	%%mm0, %%mm1\n\t"
		"movq	%%mm0, %%mm2\n\t"
		"movq	%%mm3, %%mm4\n\t"
		"movq	%%mm3, %%mm5\n\t"
		"psrlq	$3, %%mm0\n\t"
		"psrlq	$3, %%mm3\n\t"
		"pand	%2, %%mm0\n\t"
		"pand	%2, %%mm3\n\t"
		"psrlq	$6, %%mm1\n\t"
		"psrlq	$6, %%mm4\n\t"
		"pand	%%mm6, %%mm1\n\t"
		"pand	%%mm6, %%mm4\n\t"
		"psrlq	$9, %%mm2\n\t"
		"psrlq	$9, %%mm5\n\t"
		"pand	%%mm7, %%mm2\n\t"
		"pand	%%mm7, %%mm5\n\t"
		"por	%%mm1, %%mm0\n\t"
		"por	%%mm4, %%mm3\n\t"
		"por	%%mm2, %%mm0\n\t"
		"por	%%mm5, %%mm3\n\t"
		"psllq	$16, %%mm3\n\t"
		"por	%%mm3, %%mm0\n\t"
		MOVNTQ"	%%mm0, %0\n\t"
		:"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
		d += 4;
		s += 16;
	}
#endif
	__asm __volatile(SFENCE:::"memory");
	__asm __volatile(EMMS:::"memory");
#endif
	while(s < end)
	{
		register int rgb = *(uint32_t*)s; s += 4;
		*d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
	}
}

static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, stride_t src_size)
{
	const uint8_t *s = src;
	const uint8_t *end;
#ifdef HAVE_MMX
	const uint8_t *mm_end;
#endif
	uint16_t *d = (uint16_t *)dst;
	end = s + src_size;
#ifdef HAVE_MMX
	__asm __volatile(PREFETCH"	%0"::"m"(*src):"memory");
	__asm __volatile(
	    "movq	%0, %%mm7\n\t"
	    "movq	%1, %%mm6\n\t"
	    ::"m"(red_15mask),"m"(green_15mask));
	mm_end = end - 15;
	while(s < mm_end)
	{
	    __asm __volatile(
		PREFETCH" 32%1\n\t"
		"movd	%1, %%mm0\n\t"
		"movd	4%1, %%mm3\n\t"
		"punpckldq 8%1, %%mm0\n\t"
		"punpckldq 12%1, %%mm3\n\t"
		"movq	%%mm0, %%mm1\n\t"
		"movq	%%mm0, %%mm2\n\t"
		"movq	%%mm3, %%mm4\n\t"
		"movq	%%mm3, %%mm5\n\t"
		"psllq	$7, %%mm0\n\t"
		"psllq	$7, %%mm3\n\t"
		"pand	%%mm7, %%mm0\n\t"
		"pand	%%mm7, %%mm3\n\t"
		"psrlq	$6, %%mm1\n\t"
		"psrlq	$6, %%mm4\n\t"
		"pand	%%mm6, %%mm1\n\t"
		"pand	%%mm6, %%mm4\n\t"
		"psrlq	$19, %%mm2\n\t"
		"psrlq	$19, %%mm5\n\t"
		"pand	%2, %%mm2\n\t"
		"pand	%2, %%mm5\n\t"
		"por	%%mm1, %%mm0\n\t"
		"por	%%mm4, %%mm3\n\t"
		"por	%%mm2, %%mm0\n\t"
		"por	%%mm5, %%mm3\n\t"
		"psllq	$16, %%mm3\n\t"
		"por	%%mm3, %%mm0\n\t"
		MOVNTQ"	%%mm0, %0\n\t"
		:"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
		d += 4;
		s += 16;
	}
	__asm __volatile(SFENCE:::"memory");
	__asm __volatile(EMMS:::"memory");
#endif
	while(s < end)
	{
		register int rgb = *(uint32_t*)s; s += 4;
		*d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
	}
}

static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, stride_t src_size)
{
	const uint8_t *s = src;
	const uint8_t *end;
#ifdef HAVE_MMX
	const uint8_t *mm_end;
#endif
	uint16_t *d = (uint16_t *)dst;
	end = s + src_size;
#ifdef HAVE_MMX
	__asm __volatile(PREFETCH"	%0"::"m"(*src):"memory");
	__asm __volatile(
	    "movq	%0, %%mm7\n\t"
	    "movq	%1, %%mm6\n\t"
	    ::"m"(red_16mask),"m"(green_16mask));
	mm_end = end - 11;
	while(s < mm_end)
	{
	    __asm __volatile(
		PREFETCH" 32%1\n\t"
		"movd	%1, %%mm0\n\t"
		"movd	3%1, %%mm3\n\t"
		"punpckldq 6%1, %%mm0\n\t"
		"punpckldq 9%1, %%mm3\n\t"
		"movq	%%mm0, %%mm1\n\t"
		"movq	%%mm0, %%mm2\n\t"
		"movq	%%mm3, %%mm4\n\t"
		"movq	%%mm3, %%mm5\n\t"
		"psrlq	$3, %%mm0\n\t"
		"psrlq	$3, %%mm3\n\t"
		"pand	%2, %%mm0\n\t"
		"pand	%2, %%mm3\n\t"
		"psrlq	$5, %%mm1\n\t"
		"psrlq	$5, %%mm4\n\t"
		"pand	%%mm6, %%mm1\n\t"
		"pand	%%mm6, %%mm4\n\t"
		"psrlq	$8, %%mm2\n\t"
		"psrlq	$8, %%mm5\n\t"
		"pand	%%mm7, %%mm2\n\t"
		"pand	%%mm7, %%mm5\n\t"
		"por	%%mm1, %%mm0\n\t"
		"por	%%mm4, %%mm3\n\t"
		"por	%%mm2, %%mm0\n\t"
		"por	%%mm5, %%mm3\n\t"
		"psllq	$16, %%mm3\n\t"
		"por	%%mm3, %%mm0\n\t"
		MOVNTQ"	%%mm0, %0\n\t"
		:"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
		d += 4;
		s += 12;
	}
	__asm __volatile(SFENCE:::"memory");
	__asm __volatile(EMMS:::"memory");
#endif
	while(s < end)
	{
		const int b= *s++;
		const int g= *s++;
		const int r= *s++;
		*d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
	}
}

static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, stride_t src_size)
{
	const uint8_t *s = src;
	const uint8_t *end;
#ifdef HAVE_MMX
	const uint8_t *mm_end;
#endif
	uint16_t *d = (uint16_t *)dst;
	end = s + src_size;
#ifdef HAVE_MMX
	__asm __volatile(PREFETCH"	%0"::"m"(*src):"memory");
	__asm __volatile(
	    "movq	%0, %%mm7\n\t"
	    "movq	%1, %%mm6\n\t"
	    ::"m"(red_16mask),"m"(green_16mask));
	mm_end = end - 15;
	while(s < mm_end)
	{
	    __asm __volatile(
		PREFETCH" 32%1\n\t"
		"movd	%1, %%mm0\n\t"
		"movd	3%1, %%mm3\n\t"
		"punpckldq 6%1, %%mm0\n\t"
		"punpckldq 9%1, %%mm3\n\t"
		"movq	%%mm0, %%mm1\n\t"
		"movq	%%mm0, %%mm2\n\t"
		"movq	%%mm3, %%mm4\n\t"
		"movq	%%mm3, %%mm5\n\t"
		"psllq	$8, %%mm0\n\t"
		"psllq	$8, %%mm3\n\t"
		"pand	%%mm7, %%mm0\n\t"
		"pand	%%mm7, %%mm3\n\t"
		"psrlq	$5, %%mm1\n\t"
		"psrlq	$5, %%mm4\n\t"
		"pand	%%mm6, %%mm1\n\t"
		"pand	%%mm6, %%mm4\n\t"
		"psrlq	$19, %%mm2\n\t"
		"psrlq	$19, %%mm5\n\t"
		"pand	%2, %%mm2\n\t"
		"pand	%2, %%mm5\n\t"
		"por	%%mm1, %%mm0\n\t"
		"por	%%mm4, %%mm3\n\t"
		"por	%%mm2, %%mm0\n\t"
		"por	%%mm5, %%mm3\n\t"
		"psllq	$16, %%mm3\n\t"
		"por	%%mm3, %%mm0\n\t"
		MOVNTQ"	%%mm0, %0\n\t"
		:"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
		d += 4;
		s += 12;
	}
	__asm __volatile(SFENCE:::"memory");
	__asm __volatile(EMMS:::"memory");
#endif
	while(s < end)
	{
		const int r= *s++;
		const int g= *s++;
		const int b= *s++;
		*d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
	}
}

static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, stride_t src_size)
{
	const uint8_t *s = src;
	const uint8_t *end;
#ifdef HAVE_MMX
	const uint8_t *mm_end;
#endif
	uint16_t *d = (uint16_t *)dst;
	end = s + src_size;
#ifdef HAVE_MMX
	__asm __volatile(PREFETCH"	%0"::"m"(*src):"memory");
	__asm __volatile(
	    "movq	%0, %%mm7\n\t"
	    "movq	%1, %%mm6\n\t"
	    ::"m"(red_15mask),"m"(green_15mask));
	mm_end = end - 11;
	while(s < mm_end)
	{
	    __asm __volatile(
		PREFETCH" 32%1\n\t"
		"movd	%1, %%mm0\n\t"
		"movd	3%1, %%mm3\n\t"
		"punpckldq 6%1, %%mm0\n\t"
		"punpckldq 9%1, %%mm3\n\t"
		"movq	%%mm0, %%mm1\n\t"
		"movq	%%mm0, %%mm2\n\t"
		"movq	%%mm3, %%mm4\n\t"
		"movq	%%mm3, %%mm5\n\t"
		"psrlq	$3, %%mm0\n\t"
		"psrlq	$3, %%mm3\n\t"
		"pand	%2, %%mm0\n\t"
		"pand	%2, %%mm3\n\t"
		"psrlq	$6, %%mm1\n\t"
		"psrlq	$6, %%mm4\n\t"
		"pand	%%mm6, %%mm1\n\t"
		"pand	%%mm6, %%mm4\n\t"
		"psrlq	$9, %%mm2\n\t"
		"psrlq	$9, %%mm5\n\t"
		"pand	%%mm7, %%mm2\n\t"
		"pand	%%mm7, %%mm5\n\t"
		"por	%%mm1, %%mm0\n\t"
		"por	%%mm4, %%mm3\n\t"
		"por	%%mm2, %%mm0\n\t"
		"por	%%mm5, %%mm3\n\t"
		"psllq	$16, %%mm3\n\t"
		"por	%%mm3, %%mm0\n\t"
		MOVNTQ"	%%mm0, %0\n\t"
		:"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
		d += 4;
		s += 12;
	}
	__asm __volatile(SFENCE:::"memory");
	__asm __volatile(EMMS:::"memory");
#endif
	while(s < end)
	{
		const int b= *s++;
		const int g= *s++;
		const int r= *s++;
		*d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
	}
}

static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, stride_t src_size)
{
	const uint8_t *s = src;
	const uint8_t *end;
#ifdef HAVE_MMX
	const uint8_t *mm_end;
#endif
	uint16_t *d = (uint16_t *)dst;
	end = s + src_size;
#ifdef HAVE_MMX
	__asm __volatile(PREFETCH"	%0"::"m"(*src):"memory");
	__asm __volatile(
	    "movq	%0, %%mm7\n\t"
	    "movq	%1, %%mm6\n\t"
	    ::"m"(red_15mask),"m"(green_15mask));
	mm_end = end - 15;
	while(s < mm_end)
	{
	    __asm __volatile(
		PREFETCH" 32%1\n\t"
		"movd	%1, %%mm0\n\t"
		"movd	3%1, %%mm3\n\t"
		"punpckldq 6%1, %%mm0\n\t"
		"punpckldq 9%1, %%mm3\n\t"
		"movq	%%mm0, %%mm1\n\t"
		"movq	%%mm0, %%mm2\n\t"
		"movq	%%mm3, %%mm4\n\t"
		"movq	%%mm3, %%mm5\n\t"
		"psllq	$7, %%mm0\n\t"
		"psllq	$7, %%mm3\n\t"
		"pand	%%mm7, %%mm0\n\t"
		"pand	%%mm7, %%mm3\n\t"
		"psrlq	$6, %%mm1\n\t"
		"psrlq	$6, %%mm4\n\t"
		"pand	%%mm6, %%mm1\n\t"
		"pand	%%mm6, %%mm4\n\t"
		"psrlq	$19, %%mm2\n\t"
		"psrlq	$19, %%mm5\n\t"
		"pand	%2, %%mm2\n\t"
		"pand	%2, %%mm5\n\t"
		"por	%%mm1, %%mm0\n\t"
		"por	%%mm4, %%mm3\n\t"
		"por	%%mm2, %%mm0\n\t"
		"por	%%mm5, %%mm3\n\t"
		"psllq	$16, %%mm3\n\t"
		"por	%%mm3, %%mm0\n\t"
		MOVNTQ"	%%mm0, %0\n\t"
		:"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
		d += 4;
		s += 12;
	}
	__asm __volatile(SFENCE:::"memory");
	__asm __volatile(EMMS:::"memory");
#endif
	while(s < end)
	{
		const int r= *s++;
		const int g= *s++;
		const int b= *s++;
		*d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
	}
}

/*
  I use here less accurate approximation by simply
 left-shifting the input
  value and filling the low order bits with
 zeroes. This method improves png's
  compression but this scheme cannot reproduce white exactly, since it does not
  generate an all-ones maximum value; the net effect is to darken the
  image slightly.

  The better method should be "left bit replication":

   4 3 2 1 0
   ---------
   1 1 0 1 1

   7 6 5 4 3  2 1 0
   ----------------
   1 1 0 1 1  1 1 0
   |=======|  |===|
       |      Leftmost Bits Repeated to Fill Open Bits
       |
   Original Bits
*/
static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, stride_t src_size)
{
	const uint16_t *end;
#ifdef HAVE_MMX
	const uint16_t *mm_end;
#endif
	uint8_t *d = (uint8_t *)dst;
	const uint16_t *s = (uint16_t *)src;
	end = s + src_size/2;
#ifdef HAVE_MMX
	__asm __volatile(PREFETCH"	%0"::"m"(*s):"memory");
	mm_end = end - 7;
	while(s < mm_end)
	{
	    __asm __volatile(
		PREFETCH" 32%1\n\t"
		"movq	%1, %%mm0\n\t"
		"movq	%1, %%mm1\n\t"
		"movq	%1, %%mm2\n\t"
		"pand	%2, %%mm0\n\t"
		"pand	%3, %%mm1\n\t"
		"pand	%4, %%mm2\n\t"
		"psllq	$3, %%mm0\n\t"
		"psrlq	$2, %%mm1\n\t"
		"psrlq	$7, %%mm2\n\t"
		"movq	%%mm0, %%mm3\n\t"
		"movq	%%mm1, %%mm4\n\t"
		"movq	%%mm2, %%mm5\n\t"
		"punpcklwd %5, %%mm0\n\t"
		"punpcklwd %5, %%mm1\n\t"
		"punpcklwd %5, %%mm2\n\t"
		"punpckhwd %5, %%mm3\n\t"
		"punpckhwd %5, %%mm4\n\t"
		"punpckhwd %5, %%mm5\n\t"
		"psllq	$8, %%mm1\n\t"
		"psllq	$16, %%mm2\n\t"
		"por	%%mm1, %%mm0\n\t"
		"por	%%mm2, %%mm0\n\t"
		"psllq	$8, %%mm4\n\t"
		"psllq	$16, %%mm5\n\t"
		"por	%%mm4, %%mm3\n\t"
		"por	%%mm5, %%mm3\n\t"

		"movq	%%mm0, %%mm6\n\t"
		"movq	%%mm3, %%mm7\n\t"

		"movq	8%1, %%mm0\n\t"
		"movq	8%1, %%mm1\n\t"
		"movq	8%1, %%mm2\n\t"
		"pand	%2, %%mm0\n\t"
		"pand	%3, %%mm1\n\t"
		"pand	%4, %%mm2\n\t"
		"psllq	$3, %%mm0\n\t"
		"psrlq	$2, %%mm1\n\t"
		"psrlq	$7, %%mm2\n\t"
		"movq	%%mm0, %%mm3\n\t"
		"movq	%%mm1, %%mm4\n\t"
		"movq	%%mm2, %%mm5\n\t"
		"punpcklwd %5, %%mm0\n\t"
		"punpcklwd %5, %%mm1\n\t"
		"punpcklwd %5, %%mm2\n\t"
		"punpckhwd %5, %%mm3\n\t"
		"punpckhwd %5, %%mm4\n\t"
		"punpckhwd %5, %%mm5\n\t"
		"psllq	$8, %%mm1\n\t"
		"psllq	$16, %%mm2\n\t"
		"por	%%mm1, %%mm0\n\t"
		"por	%%mm2, %%mm0\n\t"
		"psllq	$8, %%mm4\n\t"
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -