📄 rgb2rgb_template.c
字号:
"psllq $16, %%mm5\n\t"
"por %%mm4, %%mm3\n\t"
"por %%mm5, %%mm3\n\t"
:"=m"(*d)
:"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
:"memory");
/* Borrowed 32 to 24 */
__asm __volatile(
"movq %%mm0, %%mm4\n\t"
"movq %%mm3, %%mm5\n\t"
"movq %%mm6, %%mm0\n\t"
"movq %%mm7, %%mm1\n\t"
"movq %%mm4, %%mm6\n\t"
"movq %%mm5, %%mm7\n\t"
"movq %%mm0, %%mm2\n\t"
"movq %%mm1, %%mm3\n\t"
"psrlq $8, %%mm2\n\t"
"psrlq $8, %%mm3\n\t"
"psrlq $8, %%mm6\n\t"
"psrlq $8, %%mm7\n\t"
"pand %2, %%mm0\n\t"
"pand %2, %%mm1\n\t"
"pand %2, %%mm4\n\t"
"pand %2, %%mm5\n\t"
"pand %3, %%mm2\n\t"
"pand %3, %%mm3\n\t"
"pand %3, %%mm6\n\t"
"pand %3, %%mm7\n\t"
"por %%mm2, %%mm0\n\t"
"por %%mm3, %%mm1\n\t"
"por %%mm6, %%mm4\n\t"
"por %%mm7, %%mm5\n\t"
"movq %%mm1, %%mm2\n\t"
"movq %%mm4, %%mm3\n\t"
"psllq $48, %%mm2\n\t"
"psllq $32, %%mm3\n\t"
"pand %4, %%mm2\n\t"
"pand %5, %%mm3\n\t"
"por %%mm2, %%mm0\n\t"
"psrlq $16, %%mm1\n\t"
"psrlq $32, %%mm4\n\t"
"psllq $16, %%mm5\n\t"
"por %%mm3, %%mm1\n\t"
"pand %6, %%mm5\n\t"
"por %%mm5, %%mm4\n\t"
MOVNTQ" %%mm0, %0\n\t"
MOVNTQ" %%mm1, 8%0\n\t"
MOVNTQ" %%mm4, 16%0"
:"=m"(*d)
:"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
:"memory");
d += 24;
s += 8;
}
__asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory");
#endif
while(s < end)
{
register uint16_t bgr;
bgr = *s++;
*d++ = (bgr&0x1F)<<3;
*d++ = (bgr&0x3E0)>>2;
*d++ = (bgr&0x7C00)>>7;
}
}
static inline void RENAME(rgb16to24)(const uint8_t *src, uint8_t *dst, stride_t src_size)
{
const uint16_t *end;
#ifdef HAVE_MMX
const uint16_t *mm_end;
#endif
uint8_t *d = (uint8_t *)dst;
const uint16_t *s = (const uint16_t *)src;
end = s + src_size/2;
#ifdef HAVE_MMX
__asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
mm_end = end - 7;
while(s < mm_end)
{
__asm __volatile(
PREFETCH" 32%1\n\t"
"movq %1, %%mm0\n\t"
"movq %1, %%mm1\n\t"
"movq %1, %%mm2\n\t"
"pand %2, %%mm0\n\t"
"pand %3, %%mm1\n\t"
"pand %4, %%mm2\n\t"
"psllq $3, %%mm0\n\t"
"psrlq $3, %%mm1\n\t"
"psrlq $8, %%mm2\n\t"
"movq %%mm0, %%mm3\n\t"
"movq %%mm1, %%mm4\n\t"
"movq %%mm2, %%mm5\n\t"
"punpcklwd %5, %%mm0\n\t"
"punpcklwd %5, %%mm1\n\t"
"punpcklwd %5, %%mm2\n\t"
"punpckhwd %5, %%mm3\n\t"
"punpckhwd %5, %%mm4\n\t"
"punpckhwd %5, %%mm5\n\t"
"psllq $8, %%mm1\n\t"
"psllq $16, %%mm2\n\t"
"por %%mm1, %%mm0\n\t"
"por %%mm2, %%mm0\n\t"
"psllq $8, %%mm4\n\t"
"psllq $16, %%mm5\n\t"
"por %%mm4, %%mm3\n\t"
"por %%mm5, %%mm3\n\t"
"movq %%mm0, %%mm6\n\t"
"movq %%mm3, %%mm7\n\t"
"movq 8%1, %%mm0\n\t"
"movq 8%1, %%mm1\n\t"
"movq 8%1, %%mm2\n\t"
"pand %2, %%mm0\n\t"
"pand %3, %%mm1\n\t"
"pand %4, %%mm2\n\t"
"psllq $3, %%mm0\n\t"
"psrlq $3, %%mm1\n\t"
"psrlq $8, %%mm2\n\t"
"movq %%mm0, %%mm3\n\t"
"movq %%mm1, %%mm4\n\t"
"movq %%mm2, %%mm5\n\t"
"punpcklwd %5, %%mm0\n\t"
"punpcklwd %5, %%mm1\n\t"
"punpcklwd %5, %%mm2\n\t"
"punpckhwd %5, %%mm3\n\t"
"punpckhwd %5, %%mm4\n\t"
"punpckhwd %5, %%mm5\n\t"
"psllq $8, %%mm1\n\t"
"psllq $16, %%mm2\n\t"
"por %%mm1, %%mm0\n\t"
"por %%mm2, %%mm0\n\t"
"psllq $8, %%mm4\n\t"
"psllq $16, %%mm5\n\t"
"por %%mm4, %%mm3\n\t"
"por %%mm5, %%mm3\n\t"
:"=m"(*d)
:"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
:"memory");
/* Borrowed 32 to 24 */
__asm __volatile(
"movq %%mm0, %%mm4\n\t"
"movq %%mm3, %%mm5\n\t"
"movq %%mm6, %%mm0\n\t"
"movq %%mm7, %%mm1\n\t"
"movq %%mm4, %%mm6\n\t"
"movq %%mm5, %%mm7\n\t"
"movq %%mm0, %%mm2\n\t"
"movq %%mm1, %%mm3\n\t"
"psrlq $8, %%mm2\n\t"
"psrlq $8, %%mm3\n\t"
"psrlq $8, %%mm6\n\t"
"psrlq $8, %%mm7\n\t"
"pand %2, %%mm0\n\t"
"pand %2, %%mm1\n\t"
"pand %2, %%mm4\n\t"
"pand %2, %%mm5\n\t"
"pand %3, %%mm2\n\t"
"pand %3, %%mm3\n\t"
"pand %3, %%mm6\n\t"
"pand %3, %%mm7\n\t"
"por %%mm2, %%mm0\n\t"
"por %%mm3, %%mm1\n\t"
"por %%mm6, %%mm4\n\t"
"por %%mm7, %%mm5\n\t"
"movq %%mm1, %%mm2\n\t"
"movq %%mm4, %%mm3\n\t"
"psllq $48, %%mm2\n\t"
"psllq $32, %%mm3\n\t"
"pand %4, %%mm2\n\t"
"pand %5, %%mm3\n\t"
"por %%mm2, %%mm0\n\t"
"psrlq $16, %%mm1\n\t"
"psrlq $32, %%mm4\n\t"
"psllq $16, %%mm5\n\t"
"por %%mm3, %%mm1\n\t"
"pand %6, %%mm5\n\t"
"por %%mm5, %%mm4\n\t"
MOVNTQ" %%mm0, %0\n\t"
MOVNTQ" %%mm1, 8%0\n\t"
MOVNTQ" %%mm4, 16%0"
:"=m"(*d)
:"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
:"memory");
d += 24;
s += 8;
}
__asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory");
#endif
while(s < end)
{
register uint16_t bgr;
bgr = *s++;
*d++ = (bgr&0x1F)<<3;
*d++ = (bgr&0x7E0)>>3;
*d++ = (bgr&0xF800)>>8;
}
}
static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, stride_t src_size)
{
const uint16_t *end;
#ifdef HAVE_MMX
const uint16_t *mm_end;
#endif
uint8_t *d = (uint8_t *)dst;
const uint16_t *s = (const uint16_t *)src;
end = s + src_size/2;
#ifdef HAVE_MMX
__asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
__asm __volatile("pxor %%mm7,%%mm7\n\t":::"memory");
mm_end = end - 3;
while(s < mm_end)
{
__asm __volatile(
PREFETCH" 32%1\n\t"
"movq %1, %%mm0\n\t"
"movq %1, %%mm1\n\t"
"movq %1, %%mm2\n\t"
"pand %2, %%mm0\n\t"
"pand %3, %%mm1\n\t"
"pand %4, %%mm2\n\t"
"psllq $3, %%mm0\n\t"
"psrlq $2, %%mm1\n\t"
"psrlq $7, %%mm2\n\t"
"movq %%mm0, %%mm3\n\t"
"movq %%mm1, %%mm4\n\t"
"movq %%mm2, %%mm5\n\t"
"punpcklwd %%mm7, %%mm0\n\t"
"punpcklwd %%mm7, %%mm1\n\t"
"punpcklwd %%mm7, %%mm2\n\t"
"punpckhwd %%mm7, %%mm3\n\t"
"punpckhwd %%mm7, %%mm4\n\t"
"punpckhwd %%mm7, %%mm5\n\t"
"psllq $8, %%mm1\n\t"
"psllq $16, %%mm2\n\t"
"por %%mm1, %%mm0\n\t"
"por %%mm2, %%mm0\n\t"
"psllq $8, %%mm4\n\t"
"psllq $16, %%mm5\n\t"
"por %%mm4, %%mm3\n\t"
"por %%mm5, %%mm3\n\t"
MOVNTQ" %%mm0, %0\n\t"
MOVNTQ" %%mm3, 8%0\n\t"
:"=m"(*d)
:"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
:"memory");
d += 16;
s += 4;
}
__asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory");
#endif
while(s < end)
{
#if 0 //slightly slower on athlon
int bgr= *s++;
*((uint32_t*)d)++ = ((bgr&0x1F)<<3) + ((bgr&0x3E0)<<6) + ((bgr&0x7C00)<<9);
#else
register uint16_t bgr;
bgr = *s++;
#ifdef WORDS_BIGENDIAN
*d++ = 0;
*d++ = (bgr&0x7C00)>>7;
*d++ = (bgr&0x3E0)>>2;
*d++ = (bgr&0x1F)<<3;
#else
*d++ = (bgr&0x1F)<<3;
*d++ = (bgr&0x3E0)>>2;
*d++ = (bgr&0x7C00)>>7;
*d++ = 0;
#endif
#endif
}
}
static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, stride_t src_size)
{
const uint16_t *end;
#ifdef HAVE_MMX
const uint16_t *mm_end;
#endif
uint8_t *d = (uint8_t *)dst;
const uint16_t *s = (uint16_t *)src;
end = s + src_size/2;
#ifdef HAVE_MMX
__asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
__asm __volatile("pxor %%mm7,%%mm7\n\t":::"memory");
mm_end = end - 3;
while(s < mm_end)
{
__asm __volatile(
PREFETCH" 32%1\n\t"
"movq %1, %%mm0\n\t"
"movq %1, %%mm1\n\t"
"movq %1, %%mm2\n\t"
"pand %2, %%mm0\n\t"
"pand %3, %%mm1\n\t"
"pand %4, %%mm2\n\t"
"psllq $3, %%mm0\n\t"
"psrlq $3, %%mm1\n\t"
"psrlq $8, %%mm2\n\t"
"movq %%mm0, %%mm3\n\t"
"movq %%mm1, %%mm4\n\t"
"movq %%mm2, %%mm5\n\t"
"punpcklwd %%mm7, %%mm0\n\t"
"punpcklwd %%mm7, %%mm1\n\t"
"punpcklwd %%mm7, %%mm2\n\t"
"punpckhwd %%mm7, %%mm3\n\t"
"punpckhwd %%mm7, %%mm4\n\t"
"punpckhwd %%mm7, %%mm5\n\t"
"psllq $8, %%mm1\n\t"
"psllq $16, %%mm2\n\t"
"por %%mm1, %%mm0\n\t"
"por %%mm2, %%mm0\n\t"
"psllq $8, %%mm4\n\t"
"psllq $16, %%mm5\n\t"
"por %%mm4, %%mm3\n\t"
"por %%mm5, %%mm3\n\t"
MOVNTQ" %%mm0, %0\n\t"
MOVNTQ" %%mm3, 8%0\n\t"
:"=m"(*d)
:"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
:"memory");
d += 16;
s += 4;
}
__asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory");
#endif
while(s < end)
{
register uint16_t bgr;
bgr = *s++;
#ifdef WORDS_BIGENDIAN
*d++ = 0;
*d++ = (bgr&0xF800)>>8;
*d++ = (bgr&0x7E0)>>3;
*d++ = (bgr&0x1F)<<3;
#else
*d++ = (bgr&0x1F)<<3;
*d++ = (bgr&0x7E0)>>3;
*d++ = (bgr&0xF800)>>8;
*d++ = 0;
#endif
}
}
static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, stride_t src_size)
{
#ifdef HAVE_MMX
/* TODO: unroll this loop */
asm volatile (
"xor %%"REG_a", %%"REG_a" \n\t"
".balign 16 \n\t"
"1: \n\t"
PREFETCH" 32(%0, %%"REG_a") \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm0, %%mm2 \n\t"
"pslld $16, %%mm0 \n\t"
"psrld $16, %%mm1 \n\t"
"pand "MANGLE(mask32r)", %%mm0 \n\t"
"pand "MANGLE(mask32g)", %%mm2 \n\t"
"pand "MANGLE(mask32b)", %%mm1 \n\t"
"por %%mm0, %%mm2 \n\t"
"por %%mm1, %%mm2 \n\t"
MOVNTQ" %%mm2, (%1, %%"REG_a") \n\t"
"add $8, %%"REG_a" \n\t"
"cmp %2, %%"REG_a" \n\t"
" jb 1b \n\t"
:: "r" (src), "r"(dst), "r" (src_size-7)
: "%"REG_a
);
__asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory");
#else
unsigned i;
stride_t num_pixels = src_size >> 2;
for(i=0; i<num_pixels; i++)
{
#ifdef WORDS_BIGENDIAN
dst[4*i + 1] = src[4*i + 3];
dst[4*i + 2] = src[4*i + 2];
dst[4*i + 3] = src[4*i + 1];
#else
dst[4*i + 0] = src[4*i + 2];
dst[4*i + 1] = src[4*i + 1];
dst[4*i + 2] = src[4*i + 0];
#endif
}
#endif
}
static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, stride_t src_size)
{
unsigned i;
#ifdef HAVE_MMX
long mmx_size= 23 - src_size;
asm volatile (
"movq "MANGLE(mask24r)", %%mm5 \n\t"
"movq "MANGLE(mask24g)", %%mm6 \n\t"
"movq "MANGLE(mask24b)", %%mm7 \n\t"
".balign 16 \n\t"
"1: \n\t"
PREFETCH" 32(%1, %%"REG_a") \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG
"movq (%1, %%"REG_a"), %%mm1 \n\t" // BGR BGR BG
"movq 2(%1, %%"REG_a"), %%mm2 \n\t" // R BGR BGR B
"psllq $16, %%mm0 \n\t" // 00 BGR BGR
"pand %%mm5, %%mm0 \n\t"
"pand %%mm6, %%mm1 \n\t"
"pand %%mm7, %%mm2 \n\t"
"por %%mm0, %%mm1 \n\t"
"por %%mm2, %%mm1 \n\t"
"movq 6(%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG
MOVNTQ" %%mm1, (%2, %%"REG_a")\n\t" // RGB RGB RG
"movq 8(%1, %%"REG_a"), %%mm1 \n\t" // R BGR BGR B
"movq 10(%1, %%"REG_a"), %%mm2 \n\t" // GR BGR BGR
"pand %%mm7, %%mm0 \n\t"
"pand %%mm5, %%mm1 \n\t"
"pand %%mm6, %%mm2 \n\t"
"por %%mm0, %%mm1 \n\t"
"por %%mm2, %%mm1 \n\t"
"movq 14(%1, %%"REG_a"), %%mm0 \n\t" // R BGR BGR B
MOVNTQ" %%mm1, 8(%2, %%"REG_a")\n\t" // B RGB RGB R
"movq 16(%1, %%"REG_a"), %%mm1 \n\t" // GR BGR BGR
"movq 18(%1, %%"REG_a"), %%mm2 \n\t" // BGR BGR BG
"pand %%mm6, %%mm0 \n\t"
"pand %%mm7, %%mm1 \n\t"
"pand %%mm5, %%mm2 \n\t"
"por %%mm0, %%mm1 \n\t"
"por %%mm2, %%mm1 \n\t"
MOVNTQ" %%mm1, 16(%2, %%"REG_a")\n\t"
"add $24, %%"REG_a" \n\t"
" js 1b \n\t"
: "+a" (mmx_size)
: "r" (src-mmx_size), "r"(dst-mmx_size)
);
__asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory");
if(mmx_size==23) return; //finihsed, was multiple of 8
src+= src_size;
dst+= src_size;
src_size= 23-mmx_size;
src-= src_size;
dst-= src_size;
#endif
for(i=0; i<src_size; i+=3)
{
register uint8_t x;
x = src[i + 2];
dst[i + 1] = src[i + 1];
dst[i + 2] = src[i + 0];
dst[i + 0] = x;
}
}
static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
long width, long height,
stride_t lumStride, stride_t chromStride, stride_t dstStride, long vertLumPerChroma)
{
long y;
const long chromWidth= width>>1;
for(y=0; y<height; y++)
{
#ifdef HAVE_MMX
//FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -