📄 rgb2rgb_template.c
字号:
"movq %3, %%mm5 \n\t"
"movq %4, %%mm6 \n\t"
"movq %5, %%mm7 \n\t"
".balign 16 \n\t"
"1: \n\t"
PREFETCH" 32(%1) \n\t"
"movd (%1), %%mm0 \n\t"
"movd 4(%1), %%mm3 \n\t"
"punpckldq 8(%1), %%mm0 \n\t"
"punpckldq 12(%1), %%mm3 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm3, %%mm4 \n\t"
"pand %%mm6, %%mm0 \n\t"
"pand %%mm6, %%mm3 \n\t"
"pmaddwd %%mm7, %%mm0 \n\t"
"pmaddwd %%mm7, %%mm3 \n\t"
"pand %%mm5, %%mm1 \n\t"
"pand %%mm5, %%mm4 \n\t"
"por %%mm1, %%mm0 \n\t"
"por %%mm4, %%mm3 \n\t"
"psrld $6, %%mm0 \n\t"
"pslld $10, %%mm3 \n\t"
"por %%mm3, %%mm0 \n\t"
MOVNTQ" %%mm0, (%0) \n\t"
"add $16, %1 \n\t"
"add $8, %0 \n\t"
"cmp %2, %1 \n\t"
" jb 1b \n\t"
: "+r" (d), "+r"(s)
: "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215)
);
#else
__asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
__asm __volatile(
"movq %0, %%mm7\n\t"
"movq %1, %%mm6\n\t"
::"m"(red_15mask),"m"(green_15mask));
while(s < mm_end)
{
__asm __volatile(
PREFETCH" 32%1\n\t"
"movd %1, %%mm0\n\t"
"movd 4%1, %%mm3\n\t"
"punpckldq 8%1, %%mm0\n\t"
"punpckldq 12%1, %%mm3\n\t"
"movq %%mm0, %%mm1\n\t"
"movq %%mm0, %%mm2\n\t"
"movq %%mm3, %%mm4\n\t"
"movq %%mm3, %%mm5\n\t"
"psrlq $3, %%mm0\n\t"
"psrlq $3, %%mm3\n\t"
"pand %2, %%mm0\n\t"
"pand %2, %%mm3\n\t"
"psrlq $6, %%mm1\n\t"
"psrlq $6, %%mm4\n\t"
"pand %%mm6, %%mm1\n\t"
"pand %%mm6, %%mm4\n\t"
"psrlq $9, %%mm2\n\t"
"psrlq $9, %%mm5\n\t"
"pand %%mm7, %%mm2\n\t"
"pand %%mm7, %%mm5\n\t"
"por %%mm1, %%mm0\n\t"
"por %%mm4, %%mm3\n\t"
"por %%mm2, %%mm0\n\t"
"por %%mm5, %%mm3\n\t"
"psllq $16, %%mm3\n\t"
"por %%mm3, %%mm0\n\t"
MOVNTQ" %%mm0, %0\n\t"
:"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
d += 4;
s += 16;
}
#endif
__asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory");
#endif
while(s < end)
{
register int rgb = *(uint32_t*)s; s += 4;
*d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
}
}
static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, stride_t src_size)
{
const uint8_t *s = src;
const uint8_t *end;
#ifdef HAVE_MMX
const uint8_t *mm_end;
#endif
uint16_t *d = (uint16_t *)dst;
end = s + src_size;
#ifdef HAVE_MMX
__asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
__asm __volatile(
"movq %0, %%mm7\n\t"
"movq %1, %%mm6\n\t"
::"m"(red_15mask),"m"(green_15mask));
mm_end = end - 15;
while(s < mm_end)
{
__asm __volatile(
PREFETCH" 32%1\n\t"
"movd %1, %%mm0\n\t"
"movd 4%1, %%mm3\n\t"
"punpckldq 8%1, %%mm0\n\t"
"punpckldq 12%1, %%mm3\n\t"
"movq %%mm0, %%mm1\n\t"
"movq %%mm0, %%mm2\n\t"
"movq %%mm3, %%mm4\n\t"
"movq %%mm3, %%mm5\n\t"
"psllq $7, %%mm0\n\t"
"psllq $7, %%mm3\n\t"
"pand %%mm7, %%mm0\n\t"
"pand %%mm7, %%mm3\n\t"
"psrlq $6, %%mm1\n\t"
"psrlq $6, %%mm4\n\t"
"pand %%mm6, %%mm1\n\t"
"pand %%mm6, %%mm4\n\t"
"psrlq $19, %%mm2\n\t"
"psrlq $19, %%mm5\n\t"
"pand %2, %%mm2\n\t"
"pand %2, %%mm5\n\t"
"por %%mm1, %%mm0\n\t"
"por %%mm4, %%mm3\n\t"
"por %%mm2, %%mm0\n\t"
"por %%mm5, %%mm3\n\t"
"psllq $16, %%mm3\n\t"
"por %%mm3, %%mm0\n\t"
MOVNTQ" %%mm0, %0\n\t"
:"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
d += 4;
s += 16;
}
__asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory");
#endif
while(s < end)
{
register int rgb = *(uint32_t*)s; s += 4;
*d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
}
}
static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, stride_t src_size)
{
const uint8_t *s = src;
const uint8_t *end;
#ifdef HAVE_MMX
const uint8_t *mm_end;
#endif
uint16_t *d = (uint16_t *)dst;
end = s + src_size;
#ifdef HAVE_MMX
__asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
__asm __volatile(
"movq %0, %%mm7\n\t"
"movq %1, %%mm6\n\t"
::"m"(red_16mask),"m"(green_16mask));
mm_end = end - 11;
while(s < mm_end)
{
__asm __volatile(
PREFETCH" 32%1\n\t"
"movd %1, %%mm0\n\t"
"movd 3%1, %%mm3\n\t"
"punpckldq 6%1, %%mm0\n\t"
"punpckldq 9%1, %%mm3\n\t"
"movq %%mm0, %%mm1\n\t"
"movq %%mm0, %%mm2\n\t"
"movq %%mm3, %%mm4\n\t"
"movq %%mm3, %%mm5\n\t"
"psrlq $3, %%mm0\n\t"
"psrlq $3, %%mm3\n\t"
"pand %2, %%mm0\n\t"
"pand %2, %%mm3\n\t"
"psrlq $5, %%mm1\n\t"
"psrlq $5, %%mm4\n\t"
"pand %%mm6, %%mm1\n\t"
"pand %%mm6, %%mm4\n\t"
"psrlq $8, %%mm2\n\t"
"psrlq $8, %%mm5\n\t"
"pand %%mm7, %%mm2\n\t"
"pand %%mm7, %%mm5\n\t"
"por %%mm1, %%mm0\n\t"
"por %%mm4, %%mm3\n\t"
"por %%mm2, %%mm0\n\t"
"por %%mm5, %%mm3\n\t"
"psllq $16, %%mm3\n\t"
"por %%mm3, %%mm0\n\t"
MOVNTQ" %%mm0, %0\n\t"
:"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
d += 4;
s += 12;
}
__asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory");
#endif
while(s < end)
{
const int b= *s++;
const int g= *s++;
const int r= *s++;
*d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
}
}
static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, stride_t src_size)
{
const uint8_t *s = src;
const uint8_t *end;
#ifdef HAVE_MMX
const uint8_t *mm_end;
#endif
uint16_t *d = (uint16_t *)dst;
end = s + src_size;
#ifdef HAVE_MMX
__asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
__asm __volatile(
"movq %0, %%mm7\n\t"
"movq %1, %%mm6\n\t"
::"m"(red_16mask),"m"(green_16mask));
mm_end = end - 15;
while(s < mm_end)
{
__asm __volatile(
PREFETCH" 32%1\n\t"
"movd %1, %%mm0\n\t"
"movd 3%1, %%mm3\n\t"
"punpckldq 6%1, %%mm0\n\t"
"punpckldq 9%1, %%mm3\n\t"
"movq %%mm0, %%mm1\n\t"
"movq %%mm0, %%mm2\n\t"
"movq %%mm3, %%mm4\n\t"
"movq %%mm3, %%mm5\n\t"
"psllq $8, %%mm0\n\t"
"psllq $8, %%mm3\n\t"
"pand %%mm7, %%mm0\n\t"
"pand %%mm7, %%mm3\n\t"
"psrlq $5, %%mm1\n\t"
"psrlq $5, %%mm4\n\t"
"pand %%mm6, %%mm1\n\t"
"pand %%mm6, %%mm4\n\t"
"psrlq $19, %%mm2\n\t"
"psrlq $19, %%mm5\n\t"
"pand %2, %%mm2\n\t"
"pand %2, %%mm5\n\t"
"por %%mm1, %%mm0\n\t"
"por %%mm4, %%mm3\n\t"
"por %%mm2, %%mm0\n\t"
"por %%mm5, %%mm3\n\t"
"psllq $16, %%mm3\n\t"
"por %%mm3, %%mm0\n\t"
MOVNTQ" %%mm0, %0\n\t"
:"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
d += 4;
s += 12;
}
__asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory");
#endif
while(s < end)
{
const int r= *s++;
const int g= *s++;
const int b= *s++;
*d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
}
}
static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, stride_t src_size)
{
const uint8_t *s = src;
const uint8_t *end;
#ifdef HAVE_MMX
const uint8_t *mm_end;
#endif
uint16_t *d = (uint16_t *)dst;
end = s + src_size;
#ifdef HAVE_MMX
__asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
__asm __volatile(
"movq %0, %%mm7\n\t"
"movq %1, %%mm6\n\t"
::"m"(red_15mask),"m"(green_15mask));
mm_end = end - 11;
while(s < mm_end)
{
__asm __volatile(
PREFETCH" 32%1\n\t"
"movd %1, %%mm0\n\t"
"movd 3%1, %%mm3\n\t"
"punpckldq 6%1, %%mm0\n\t"
"punpckldq 9%1, %%mm3\n\t"
"movq %%mm0, %%mm1\n\t"
"movq %%mm0, %%mm2\n\t"
"movq %%mm3, %%mm4\n\t"
"movq %%mm3, %%mm5\n\t"
"psrlq $3, %%mm0\n\t"
"psrlq $3, %%mm3\n\t"
"pand %2, %%mm0\n\t"
"pand %2, %%mm3\n\t"
"psrlq $6, %%mm1\n\t"
"psrlq $6, %%mm4\n\t"
"pand %%mm6, %%mm1\n\t"
"pand %%mm6, %%mm4\n\t"
"psrlq $9, %%mm2\n\t"
"psrlq $9, %%mm5\n\t"
"pand %%mm7, %%mm2\n\t"
"pand %%mm7, %%mm5\n\t"
"por %%mm1, %%mm0\n\t"
"por %%mm4, %%mm3\n\t"
"por %%mm2, %%mm0\n\t"
"por %%mm5, %%mm3\n\t"
"psllq $16, %%mm3\n\t"
"por %%mm3, %%mm0\n\t"
MOVNTQ" %%mm0, %0\n\t"
:"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
d += 4;
s += 12;
}
__asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory");
#endif
while(s < end)
{
const int b= *s++;
const int g= *s++;
const int r= *s++;
*d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
}
}
static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, stride_t src_size)
{
const uint8_t *s = src;
const uint8_t *end;
#ifdef HAVE_MMX
const uint8_t *mm_end;
#endif
uint16_t *d = (uint16_t *)dst;
end = s + src_size;
#ifdef HAVE_MMX
__asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
__asm __volatile(
"movq %0, %%mm7\n\t"
"movq %1, %%mm6\n\t"
::"m"(red_15mask),"m"(green_15mask));
mm_end = end - 15;
while(s < mm_end)
{
__asm __volatile(
PREFETCH" 32%1\n\t"
"movd %1, %%mm0\n\t"
"movd 3%1, %%mm3\n\t"
"punpckldq 6%1, %%mm0\n\t"
"punpckldq 9%1, %%mm3\n\t"
"movq %%mm0, %%mm1\n\t"
"movq %%mm0, %%mm2\n\t"
"movq %%mm3, %%mm4\n\t"
"movq %%mm3, %%mm5\n\t"
"psllq $7, %%mm0\n\t"
"psllq $7, %%mm3\n\t"
"pand %%mm7, %%mm0\n\t"
"pand %%mm7, %%mm3\n\t"
"psrlq $6, %%mm1\n\t"
"psrlq $6, %%mm4\n\t"
"pand %%mm6, %%mm1\n\t"
"pand %%mm6, %%mm4\n\t"
"psrlq $19, %%mm2\n\t"
"psrlq $19, %%mm5\n\t"
"pand %2, %%mm2\n\t"
"pand %2, %%mm5\n\t"
"por %%mm1, %%mm0\n\t"
"por %%mm4, %%mm3\n\t"
"por %%mm2, %%mm0\n\t"
"por %%mm5, %%mm3\n\t"
"psllq $16, %%mm3\n\t"
"por %%mm3, %%mm0\n\t"
MOVNTQ" %%mm0, %0\n\t"
:"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
d += 4;
s += 12;
}
__asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory");
#endif
while(s < end)
{
const int r= *s++;
const int g= *s++;
const int b= *s++;
*d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
}
}
/*
I use here less accurate approximation by simply
left-shifting the input
value and filling the low order bits with
zeroes. This method improves png's
compression but this scheme cannot reproduce white exactly, since it does not
generate an all-ones maximum value; the net effect is to darken the
image slightly.
The better method should be "left bit replication":
4 3 2 1 0
---------
1 1 0 1 1
7 6 5 4 3 2 1 0
----------------
1 1 0 1 1 1 1 0
|=======| |===|
| Leftmost Bits Repeated to Fill Open Bits
|
Original Bits
*/
static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, stride_t src_size)
{
const uint16_t *end;
#ifdef HAVE_MMX
const uint16_t *mm_end;
#endif
uint8_t *d = (uint8_t *)dst;
const uint16_t *s = (uint16_t *)src;
end = s + src_size/2;
#ifdef HAVE_MMX
__asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
mm_end = end - 7;
while(s < mm_end)
{
__asm __volatile(
PREFETCH" 32%1\n\t"
"movq %1, %%mm0\n\t"
"movq %1, %%mm1\n\t"
"movq %1, %%mm2\n\t"
"pand %2, %%mm0\n\t"
"pand %3, %%mm1\n\t"
"pand %4, %%mm2\n\t"
"psllq $3, %%mm0\n\t"
"psrlq $2, %%mm1\n\t"
"psrlq $7, %%mm2\n\t"
"movq %%mm0, %%mm3\n\t"
"movq %%mm1, %%mm4\n\t"
"movq %%mm2, %%mm5\n\t"
"punpcklwd %5, %%mm0\n\t"
"punpcklwd %5, %%mm1\n\t"
"punpcklwd %5, %%mm2\n\t"
"punpckhwd %5, %%mm3\n\t"
"punpckhwd %5, %%mm4\n\t"
"punpckhwd %5, %%mm5\n\t"
"psllq $8, %%mm1\n\t"
"psllq $16, %%mm2\n\t"
"por %%mm1, %%mm0\n\t"
"por %%mm2, %%mm0\n\t"
"psllq $8, %%mm4\n\t"
"psllq $16, %%mm5\n\t"
"por %%mm4, %%mm3\n\t"
"por %%mm5, %%mm3\n\t"
"movq %%mm0, %%mm6\n\t"
"movq %%mm3, %%mm7\n\t"
"movq 8%1, %%mm0\n\t"
"movq 8%1, %%mm1\n\t"
"movq 8%1, %%mm2\n\t"
"pand %2, %%mm0\n\t"
"pand %3, %%mm1\n\t"
"pand %4, %%mm2\n\t"
"psllq $3, %%mm0\n\t"
"psrlq $2, %%mm1\n\t"
"psrlq $7, %%mm2\n\t"
"movq %%mm0, %%mm3\n\t"
"movq %%mm1, %%mm4\n\t"
"movq %%mm2, %%mm5\n\t"
"punpcklwd %5, %%mm0\n\t"
"punpcklwd %5, %%mm1\n\t"
"punpcklwd %5, %%mm2\n\t"
"punpckhwd %5, %%mm3\n\t"
"punpckhwd %5, %%mm4\n\t"
"punpckhwd %5, %%mm5\n\t"
"psllq $8, %%mm1\n\t"
"psllq $16, %%mm2\n\t"
"por %%mm1, %%mm0\n\t"
"por %%mm2, %%mm0\n\t"
"psllq $8, %%mm4\n\t"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -