📄 rgb2rgb_template.c
字号:
"punpcklwd %5, %%mm0 \n\t"
"punpcklwd %5, %%mm1 \n\t"
"punpcklwd %5, %%mm2 \n\t"
"punpckhwd %5, %%mm3 \n\t"
"punpckhwd %5, %%mm4 \n\t"
"punpckhwd %5, %%mm5 \n\t"
"psllq $8, %%mm1 \n\t"
"psllq $16, %%mm2 \n\t"
"por %%mm1, %%mm0 \n\t"
"por %%mm2, %%mm0 \n\t"
"psllq $8, %%mm4 \n\t"
"psllq $16, %%mm5 \n\t"
"por %%mm4, %%mm3 \n\t"
"por %%mm5, %%mm3 \n\t"
"movq %%mm0, %%mm6 \n\t"
"movq %%mm3, %%mm7 \n\t"
"movq 8%1, %%mm0 \n\t"
"movq 8%1, %%mm1 \n\t"
"movq 8%1, %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
"psllq $3, %%mm0 \n\t"
"psrlq $3, %%mm1 \n\t"
"psrlq $8, %%mm2 \n\t"
"movq %%mm0, %%mm3 \n\t"
"movq %%mm1, %%mm4 \n\t"
"movq %%mm2, %%mm5 \n\t"
"punpcklwd %5, %%mm0 \n\t"
"punpcklwd %5, %%mm1 \n\t"
"punpcklwd %5, %%mm2 \n\t"
"punpckhwd %5, %%mm3 \n\t"
"punpckhwd %5, %%mm4 \n\t"
"punpckhwd %5, %%mm5 \n\t"
"psllq $8, %%mm1 \n\t"
"psllq $16, %%mm2 \n\t"
"por %%mm1, %%mm0 \n\t"
"por %%mm2, %%mm0 \n\t"
"psllq $8, %%mm4 \n\t"
"psllq $16, %%mm5 \n\t"
"por %%mm4, %%mm3 \n\t"
"por %%mm5, %%mm3 \n\t"
:"=m"(*d)
:"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
:"memory");
/* Borrowed 32 to 24 */
__asm __volatile(
"movq %%mm0, %%mm4 \n\t"
"movq %%mm3, %%mm5 \n\t"
"movq %%mm6, %%mm0 \n\t"
"movq %%mm7, %%mm1 \n\t"
"movq %%mm4, %%mm6 \n\t"
"movq %%mm5, %%mm7 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm1, %%mm3 \n\t"
"psrlq $8, %%mm2 \n\t"
"psrlq $8, %%mm3 \n\t"
"psrlq $8, %%mm6 \n\t"
"psrlq $8, %%mm7 \n\t"
"pand %2, %%mm0 \n\t"
"pand %2, %%mm1 \n\t"
"pand %2, %%mm4 \n\t"
"pand %2, %%mm5 \n\t"
"pand %3, %%mm2 \n\t"
"pand %3, %%mm3 \n\t"
"pand %3, %%mm6 \n\t"
"pand %3, %%mm7 \n\t"
"por %%mm2, %%mm0 \n\t"
"por %%mm3, %%mm1 \n\t"
"por %%mm6, %%mm4 \n\t"
"por %%mm7, %%mm5 \n\t"
"movq %%mm1, %%mm2 \n\t"
"movq %%mm4, %%mm3 \n\t"
"psllq $48, %%mm2 \n\t"
"psllq $32, %%mm3 \n\t"
"pand %4, %%mm2 \n\t"
"pand %5, %%mm3 \n\t"
"por %%mm2, %%mm0 \n\t"
"psrlq $16, %%mm1 \n\t"
"psrlq $32, %%mm4 \n\t"
"psllq $16, %%mm5 \n\t"
"por %%mm3, %%mm1 \n\t"
"pand %6, %%mm5 \n\t"
"por %%mm5, %%mm4 \n\t"
MOVNTQ" %%mm0, %0 \n\t"
MOVNTQ" %%mm1, 8%0 \n\t"
MOVNTQ" %%mm4, 16%0"
:"=m"(*d)
:"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
:"memory");
d += 24;
s += 8;
}
__asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory");
#endif
while (s < end)
{
register uint16_t bgr;
bgr = *s++;
*d++ = (bgr&0x1F)<<3;
*d++ = (bgr&0x7E0)>>3;
*d++ = (bgr&0xF800)>>8;
}
}
static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size)
{
const uint16_t *end;
#ifdef HAVE_MMX
const uint16_t *mm_end;
#endif
uint8_t *d = (uint8_t *)dst;
const uint16_t *s = (const uint16_t *)src;
end = s + src_size/2;
#ifdef HAVE_MMX
__asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
__asm __volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
mm_end = end - 3;
while (s < mm_end)
{
__asm __volatile(
PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t"
"movq %1, %%mm1 \n\t"
"movq %1, %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
"psllq $3, %%mm0 \n\t"
"psrlq $2, %%mm1 \n\t"
"psrlq $7, %%mm2 \n\t"
"movq %%mm0, %%mm3 \n\t"
"movq %%mm1, %%mm4 \n\t"
"movq %%mm2, %%mm5 \n\t"
"punpcklwd %%mm7, %%mm0 \n\t"
"punpcklwd %%mm7, %%mm1 \n\t"
"punpcklwd %%mm7, %%mm2 \n\t"
"punpckhwd %%mm7, %%mm3 \n\t"
"punpckhwd %%mm7, %%mm4 \n\t"
"punpckhwd %%mm7, %%mm5 \n\t"
"psllq $8, %%mm1 \n\t"
"psllq $16, %%mm2 \n\t"
"por %%mm1, %%mm0 \n\t"
"por %%mm2, %%mm0 \n\t"
"psllq $8, %%mm4 \n\t"
"psllq $16, %%mm5 \n\t"
"por %%mm4, %%mm3 \n\t"
"por %%mm5, %%mm3 \n\t"
MOVNTQ" %%mm0, %0 \n\t"
MOVNTQ" %%mm3, 8%0 \n\t"
:"=m"(*d)
:"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
:"memory");
d += 16;
s += 4;
}
__asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory");
#endif
while (s < end)
{
#if 0 //slightly slower on athlon
int bgr= *s++;
*((uint32_t*)d)++ = ((bgr&0x1F)<<3) + ((bgr&0x3E0)<<6) + ((bgr&0x7C00)<<9);
#else
register uint16_t bgr;
bgr = *s++;
#ifdef WORDS_BIGENDIAN
*d++ = 0;
*d++ = (bgr&0x7C00)>>7;
*d++ = (bgr&0x3E0)>>2;
*d++ = (bgr&0x1F)<<3;
#else
*d++ = (bgr&0x1F)<<3;
*d++ = (bgr&0x3E0)>>2;
*d++ = (bgr&0x7C00)>>7;
*d++ = 0;
#endif
#endif
}
}
static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size)
{
const uint16_t *end;
#ifdef HAVE_MMX
const uint16_t *mm_end;
#endif
uint8_t *d = (uint8_t *)dst;
const uint16_t *s = (uint16_t *)src;
end = s + src_size/2;
#ifdef HAVE_MMX
__asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
__asm __volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
mm_end = end - 3;
while (s < mm_end)
{
__asm __volatile(
PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t"
"movq %1, %%mm1 \n\t"
"movq %1, %%mm2 \n\t"
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
"psllq $3, %%mm0 \n\t"
"psrlq $3, %%mm1 \n\t"
"psrlq $8, %%mm2 \n\t"
"movq %%mm0, %%mm3 \n\t"
"movq %%mm1, %%mm4 \n\t"
"movq %%mm2, %%mm5 \n\t"
"punpcklwd %%mm7, %%mm0 \n\t"
"punpcklwd %%mm7, %%mm1 \n\t"
"punpcklwd %%mm7, %%mm2 \n\t"
"punpckhwd %%mm7, %%mm3 \n\t"
"punpckhwd %%mm7, %%mm4 \n\t"
"punpckhwd %%mm7, %%mm5 \n\t"
"psllq $8, %%mm1 \n\t"
"psllq $16, %%mm2 \n\t"
"por %%mm1, %%mm0 \n\t"
"por %%mm2, %%mm0 \n\t"
"psllq $8, %%mm4 \n\t"
"psllq $16, %%mm5 \n\t"
"por %%mm4, %%mm3 \n\t"
"por %%mm5, %%mm3 \n\t"
MOVNTQ" %%mm0, %0 \n\t"
MOVNTQ" %%mm3, 8%0 \n\t"
:"=m"(*d)
:"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
:"memory");
d += 16;
s += 4;
}
__asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory");
#endif
while (s < end)
{
register uint16_t bgr;
bgr = *s++;
#ifdef WORDS_BIGENDIAN
*d++ = 0;
*d++ = (bgr&0xF800)>>8;
*d++ = (bgr&0x7E0)>>3;
*d++ = (bgr&0x1F)<<3;
#else
*d++ = (bgr&0x1F)<<3;
*d++ = (bgr&0x7E0)>>3;
*d++ = (bgr&0xF800)>>8;
*d++ = 0;
#endif
}
}
static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
{
long idx = 15 - src_size;
uint8_t *s = (uint8_t *) src-idx, *d = dst-idx;
#ifdef HAVE_MMX
__asm __volatile(
"test %0, %0 \n\t"
"jns 2f \n\t"
PREFETCH" (%1, %0) \n\t"
"movq %3, %%mm7 \n\t"
"pxor %4, %%mm7 \n\t"
"movq %%mm7, %%mm6 \n\t"
"pxor %5, %%mm7 \n\t"
ASMALIGN(4)
"1: \n\t"
PREFETCH" 32(%1, %0) \n\t"
"movq (%1, %0), %%mm0 \n\t"
"movq 8(%1, %0), %%mm1 \n\t"
# ifdef HAVE_MMX2
"pshufw $177, %%mm0, %%mm3 \n\t"
"pshufw $177, %%mm1, %%mm5 \n\t"
"pand %%mm7, %%mm0 \n\t"
"pand %%mm6, %%mm3 \n\t"
"pand %%mm7, %%mm1 \n\t"
"pand %%mm6, %%mm5 \n\t"
"por %%mm3, %%mm0 \n\t"
"por %%mm5, %%mm1 \n\t"
# else
"movq %%mm0, %%mm2 \n\t"
"movq %%mm1, %%mm4 \n\t"
"pand %%mm7, %%mm0 \n\t"
"pand %%mm6, %%mm2 \n\t"
"pand %%mm7, %%mm1 \n\t"
"pand %%mm6, %%mm4 \n\t"
"movq %%mm2, %%mm3 \n\t"
"movq %%mm4, %%mm5 \n\t"
"pslld $16, %%mm2 \n\t"
"psrld $16, %%mm3 \n\t"
"pslld $16, %%mm4 \n\t"
"psrld $16, %%mm5 \n\t"
"por %%mm2, %%mm0 \n\t"
"por %%mm4, %%mm1 \n\t"
"por %%mm3, %%mm0 \n\t"
"por %%mm5, %%mm1 \n\t"
# endif
MOVNTQ" %%mm0, (%2, %0) \n\t"
MOVNTQ" %%mm1, 8(%2, %0) \n\t"
"add $16, %0 \n\t"
"js 1b \n\t"
SFENCE" \n\t"
EMMS" \n\t"
"2: \n\t"
: "+&r"(idx)
: "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one)
: "memory");
#endif
for (; idx<15; idx+=4) {
register int v = *(uint32_t *)&s[idx], g = v & 0xff00ff00;
v &= 0xff00ff;
*(uint32_t *)&d[idx] = (v>>16) + g + (v<<16);
}
}
static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
{
unsigned i;
#ifdef HAVE_MMX
long mmx_size= 23 - src_size;
asm volatile (
"test %%"REG_a", %%"REG_a" \n\t"
"jns 2f \n\t"
"movq "MANGLE(mask24r)", %%mm5 \n\t"
"movq "MANGLE(mask24g)", %%mm6 \n\t"
"movq "MANGLE(mask24b)", %%mm7 \n\t"
ASMALIGN(4)
"1: \n\t"
PREFETCH" 32(%1, %%"REG_a") \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG
"movq (%1, %%"REG_a"), %%mm1 \n\t" // BGR BGR BG
"movq 2(%1, %%"REG_a"), %%mm2 \n\t" // R BGR BGR B
"psllq $16, %%mm0 \n\t" // 00 BGR BGR
"pand %%mm5, %%mm0 \n\t"
"pand %%mm6, %%mm1 \n\t"
"pand %%mm7, %%mm2 \n\t"
"por %%mm0, %%mm1 \n\t"
"por %%mm2, %%mm1 \n\t"
"movq 6(%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG
MOVNTQ" %%mm1, (%2, %%"REG_a") \n\t" // RGB RGB RG
"movq 8(%1, %%"REG_a"), %%mm1 \n\t" // R BGR BGR B
"movq 10(%1, %%"REG_a"), %%mm2 \n\t" // GR BGR BGR
"pand %%mm7, %%mm0 \n\t"
"pand %%mm5, %%mm1 \n\t"
"pand %%mm6, %%mm2 \n\t"
"por %%mm0, %%mm1 \n\t"
"por %%mm2, %%mm1 \n\t"
"movq 14(%1, %%"REG_a"), %%mm0 \n\t" // R BGR BGR B
MOVNTQ" %%mm1, 8(%2, %%"REG_a") \n\t" // B RGB RGB R
"movq 16(%1, %%"REG_a"), %%mm1 \n\t" // GR BGR BGR
"movq 18(%1, %%"REG_a"), %%mm2 \n\t" // BGR BGR BG
"pand %%mm6, %%mm0 \n\t"
"pand %%mm7, %%mm1 \n\t"
"pand %%mm5, %%mm2 \n\t"
"por %%mm0, %%mm1 \n\t"
"por %%mm2, %%mm1 \n\t"
MOVNTQ" %%mm1, 16(%2, %%"REG_a") \n\t"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -