📄 rgb2rgb_template.c
字号:
"punpckldq 8%1, %%mm0\n\t" "punpckldq 12%1, %%mm3\n\t" "movq %%mm0, %%mm1\n\t" "movq %%mm0, %%mm2\n\t" "movq %%mm3, %%mm4\n\t" "movq %%mm3, %%mm5\n\t" "psrlq $3, %%mm0\n\t" "psrlq $3, %%mm3\n\t" "pand %2, %%mm0\n\t" "pand %2, %%mm3\n\t" "psrlq $6, %%mm1\n\t" "psrlq $6, %%mm4\n\t" "pand %%mm6, %%mm1\n\t" "pand %%mm6, %%mm4\n\t" "psrlq $9, %%mm2\n\t" "psrlq $9, %%mm5\n\t" "pand %%mm7, %%mm2\n\t" "pand %%mm7, %%mm5\n\t" "por %%mm1, %%mm0\n\t" "por %%mm4, %%mm3\n\t" "por %%mm2, %%mm0\n\t" "por %%mm5, %%mm3\n\t" "psllq $16, %%mm3\n\t" "por %%mm3, %%mm0\n\t" MOVNTQ" %%mm0, %0\n\t" :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); d += 4; s += 16; }#endif __asm __volatile(SFENCE:::"memory"); __asm __volatile(EMMS:::"memory");#endif while(s < end) { const int src= *s; s += 4; *d++ = ((src&0xFF)>>3) + ((src&0xF800)>>6) + ((src&0xF80000)>>9); }}static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, unsigned src_size){ const uint8_t *s = src; const uint8_t *end;#ifdef HAVE_MMX const uint8_t *mm_end;#endif uint16_t *d = (uint16_t *)dst; end = s + src_size;#ifdef HAVE_MMX __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); __asm __volatile( "movq %0, %%mm7\n\t" "movq %1, %%mm6\n\t" ::"m"(red_15mask),"m"(green_15mask)); mm_end = end - 15; while(s < mm_end) { __asm __volatile( PREFETCH" 32%1\n\t" "movd %1, %%mm0\n\t" "movd 4%1, %%mm3\n\t" "punpckldq 8%1, %%mm0\n\t" "punpckldq 12%1, %%mm3\n\t" "movq %%mm0, %%mm1\n\t" "movq %%mm0, %%mm2\n\t" "movq %%mm3, %%mm4\n\t" "movq %%mm3, %%mm5\n\t" "psllq $7, %%mm0\n\t" "psllq $7, %%mm3\n\t" "pand %%mm7, %%mm0\n\t" "pand %%mm7, %%mm3\n\t" "psrlq $6, %%mm1\n\t" "psrlq $6, %%mm4\n\t" "pand %%mm6, %%mm1\n\t" "pand %%mm6, %%mm4\n\t" "psrlq $19, %%mm2\n\t" "psrlq $19, %%mm5\n\t" "pand %2, %%mm2\n\t" "pand %2, %%mm5\n\t" "por %%mm1, %%mm0\n\t" "por %%mm4, %%mm3\n\t" "por %%mm2, %%mm0\n\t" "por %%mm5, %%mm3\n\t" "psllq $16, %%mm3\n\t" "por %%mm3, %%mm0\n\t" MOVNTQ" %%mm0, %0\n\t" :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); d += 4; s += 16; } __asm __volatile(SFENCE:::"memory"); __asm __volatile(EMMS:::"memory");#endif while(s < end) { const int src= *s; s += 4; *d++ = ((src&0xF8)<<7) + ((src&0xF800)>>6) + ((src&0xF80000)>>19); }}static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, unsigned src_size){ const uint8_t *s = src; const uint8_t *end;#ifdef HAVE_MMX const uint8_t *mm_end;#endif uint16_t *d = (uint16_t *)dst; end = s + src_size;#ifdef HAVE_MMX __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); __asm __volatile( "movq %0, %%mm7\n\t" "movq %1, %%mm6\n\t" ::"m"(red_16mask),"m"(green_16mask)); mm_end = end - 11; while(s < mm_end) { __asm __volatile( PREFETCH" 32%1\n\t" "movd %1, %%mm0\n\t" "movd 3%1, %%mm3\n\t" "punpckldq 6%1, %%mm0\n\t" "punpckldq 9%1, %%mm3\n\t" "movq %%mm0, %%mm1\n\t" "movq %%mm0, %%mm2\n\t" "movq %%mm3, %%mm4\n\t" "movq %%mm3, %%mm5\n\t" "psrlq $3, %%mm0\n\t" "psrlq $3, %%mm3\n\t" "pand %2, %%mm0\n\t" "pand %2, %%mm3\n\t" "psrlq $5, %%mm1\n\t" "psrlq $5, %%mm4\n\t" "pand %%mm6, %%mm1\n\t" "pand %%mm6, %%mm4\n\t" "psrlq $8, %%mm2\n\t" "psrlq $8, %%mm5\n\t" "pand %%mm7, %%mm2\n\t" "pand %%mm7, %%mm5\n\t" "por %%mm1, %%mm0\n\t" "por %%mm4, %%mm3\n\t" "por %%mm2, %%mm0\n\t" "por %%mm5, %%mm3\n\t" "psllq $16, %%mm3\n\t" "por %%mm3, %%mm0\n\t" MOVNTQ" %%mm0, %0\n\t" :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); d += 4; s += 12; } __asm __volatile(SFENCE:::"memory"); __asm __volatile(EMMS:::"memory");#endif while(s < end) { const int b= *s++; const int g= *s++; const int r= *s++; *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); }}static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, unsigned int src_size){ const uint8_t *s = src; const uint8_t *end;#ifdef HAVE_MMX const uint8_t *mm_end;#endif uint16_t *d = (uint16_t *)dst; end = s + src_size;#ifdef HAVE_MMX __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); __asm __volatile( "movq %0, %%mm7\n\t" "movq %1, %%mm6\n\t" ::"m"(red_16mask),"m"(green_16mask)); mm_end = end - 15; while(s < mm_end) { __asm __volatile( PREFETCH" 32%1\n\t" "movd %1, %%mm0\n\t" "movd 3%1, %%mm3\n\t" "punpckldq 6%1, %%mm0\n\t" "punpckldq 9%1, %%mm3\n\t" "movq %%mm0, %%mm1\n\t" "movq %%mm0, %%mm2\n\t" "movq %%mm3, %%mm4\n\t" "movq %%mm3, %%mm5\n\t" "psllq $8, %%mm0\n\t" "psllq $8, %%mm3\n\t" "pand %%mm7, %%mm0\n\t" "pand %%mm7, %%mm3\n\t" "psrlq $5, %%mm1\n\t" "psrlq $5, %%mm4\n\t" "pand %%mm6, %%mm1\n\t" "pand %%mm6, %%mm4\n\t" "psrlq $19, %%mm2\n\t" "psrlq $19, %%mm5\n\t" "pand %2, %%mm2\n\t" "pand %2, %%mm5\n\t" "por %%mm1, %%mm0\n\t" "por %%mm4, %%mm3\n\t" "por %%mm2, %%mm0\n\t" "por %%mm5, %%mm3\n\t" "psllq $16, %%mm3\n\t" "por %%mm3, %%mm0\n\t" MOVNTQ" %%mm0, %0\n\t" :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); d += 4; s += 12; } __asm __volatile(SFENCE:::"memory"); __asm __volatile(EMMS:::"memory");#endif while(s < end) { const int r= *s++; const int g= *s++; const int b= *s++; *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); }}static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, unsigned src_size){ const uint8_t *s = src; const uint8_t *end;#ifdef HAVE_MMX const uint8_t *mm_end;#endif uint16_t *d = (uint16_t *)dst; end = s + src_size;#ifdef HAVE_MMX __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); __asm __volatile( "movq %0, %%mm7\n\t" "movq %1, %%mm6\n\t" ::"m"(red_15mask),"m"(green_15mask)); mm_end = end - 11; while(s < mm_end) { __asm __volatile( PREFETCH" 32%1\n\t" "movd %1, %%mm0\n\t" "movd 3%1, %%mm3\n\t" "punpckldq 6%1, %%mm0\n\t" "punpckldq 9%1, %%mm3\n\t" "movq %%mm0, %%mm1\n\t" "movq %%mm0, %%mm2\n\t" "movq %%mm3, %%mm4\n\t" "movq %%mm3, %%mm5\n\t" "psrlq $3, %%mm0\n\t" "psrlq $3, %%mm3\n\t" "pand %2, %%mm0\n\t" "pand %2, %%mm3\n\t" "psrlq $6, %%mm1\n\t" "psrlq $6, %%mm4\n\t" "pand %%mm6, %%mm1\n\t" "pand %%mm6, %%mm4\n\t" "psrlq $9, %%mm2\n\t" "psrlq $9, %%mm5\n\t" "pand %%mm7, %%mm2\n\t" "pand %%mm7, %%mm5\n\t" "por %%mm1, %%mm0\n\t" "por %%mm4, %%mm3\n\t" "por %%mm2, %%mm0\n\t" "por %%mm5, %%mm3\n\t" "psllq $16, %%mm3\n\t" "por %%mm3, %%mm0\n\t" MOVNTQ" %%mm0, %0\n\t" :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); d += 4; s += 12; } __asm __volatile(SFENCE:::"memory"); __asm __volatile(EMMS:::"memory");#endif while(s < end) { const int b= *s++; const int g= *s++; const int r= *s++; *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); }}static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, unsigned src_size){ const uint8_t *s = src; const uint8_t *end;#ifdef HAVE_MMX const uint8_t *mm_end;#endif uint16_t *d = (uint16_t *)dst; end = s + src_size;#ifdef HAVE_MMX __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); __asm __volatile( "movq %0, %%mm7\n\t" "movq %1, %%mm6\n\t" ::"m"(red_15mask),"m"(green_15mask)); mm_end = end - 15; while(s < mm_end) { __asm __volatile( PREFETCH" 32%1\n\t" "movd %1, %%mm0\n\t" "movd 3%1, %%mm3\n\t" "punpckldq 6%1, %%mm0\n\t" "punpckldq 9%1, %%mm3\n\t" "movq %%mm0, %%mm1\n\t" "movq %%mm0, %%mm2\n\t" "movq %%mm3, %%mm4\n\t" "movq %%mm3, %%mm5\n\t" "psllq $7, %%mm0\n\t" "psllq $7, %%mm3\n\t" "pand %%mm7, %%mm0\n\t" "pand %%mm7, %%mm3\n\t" "psrlq $6, %%mm1\n\t" "psrlq $6, %%mm4\n\t" "pand %%mm6, %%mm1\n\t" "pand %%mm6, %%mm4\n\t" "psrlq $19, %%mm2\n\t" "psrlq $19, %%mm5\n\t" "pand %2, %%mm2\n\t" "pand %2, %%mm5\n\t" "por %%mm1, %%mm0\n\t" "por %%mm4, %%mm3\n\t" "por %%mm2, %%mm0\n\t" "por %%mm5, %%mm3\n\t" "psllq $16, %%mm3\n\t" "por %%mm3, %%mm0\n\t" MOVNTQ" %%mm0, %0\n\t" :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); d += 4; s += 12; } __asm __volatile(SFENCE:::"memory"); __asm __volatile(EMMS:::"memory");#endif while(s < end) { const int r= *s++; const int g= *s++; const int b= *s++; *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); }}/* I use here less accurate approximation by simply left-shifting the input value and filling the low order bits with zeroes. This method improves png's compression but this scheme cannot reproduce white exactly, since it does not generate an all-ones maximum value; the net effect is to darken the image slightly. The better method should be "left bit replication": 4 3 2 1 0 --------- 1 1 0 1 1 7 6 5 4 3 2 1 0 ---------------- 1 1 0 1 1 1 1 0 |=======| |===| | Leftmost Bits Repeated to Fill Open Bits | Original Bits*/static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, unsigned src_size){ const uint16_t *end;#ifdef HAVE_MMX const uint16_t *mm_end;#endif uint8_t *d = (uint8_t *)dst; const uint16_t *s = (uint16_t *)src; end = s + src_size/2;#ifdef HAVE_MMX __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); mm_end = end - 7; while(s < mm_end) { __asm __volatile( PREFETCH" 32%1\n\t" "movq %1, %%mm0\n\t" "movq %1, %%mm1\n\t" "movq %1, %%mm2\n\t" "pand %2, %%mm0\n\t" "pand %3, %%mm1\n\t" "pand %4, %%mm2\n\t" "psllq $3, %%mm0\n\t" "psrlq $2, %%mm1\n\t" "psrlq $7, %%mm2\n\t" "movq %%mm0, %%mm3\n\t" "movq %%mm1, %%mm4\n\t" "movq %%mm2, %%mm5\n\t" "punpcklwd %5, %%mm0\n\t" "punpcklwd %5, %%mm1\n\t" "punpcklwd %5, %%mm2\n\t" "punpckhwd %5, %%mm3\n\t" "punpckhwd %5, %%mm4\n\t" "punpckhwd %5, %%mm5\n\t" "psllq $8, %%mm1\n\t" "psllq $16, %%mm2\n\t" "por %%mm1, %%mm0\n\t" "por %%mm2, %%mm0\n\t" "psllq $8, %%mm4\n\t" "psllq $16, %%mm5\n\t" "por %%mm4, %%mm3\n\t" "por %%mm5, %%mm3\n\t" "movq %%mm0, %%mm6\n\t" "movq %%mm3, %%mm7\n\t" "movq 8%1, %%mm0\n\t" "movq 8%1, %%mm1\n\t" "movq 8%1, %%mm2\n\t" "pand %2, %%mm0\n\t" "pand %3, %%mm1\n\t" "pand %4, %%mm2\n\t" "psllq $3, %%mm0\n\t" "psrlq $2, %%mm1\n\t" "psrlq $7, %%mm2\n\t" "movq %%mm0, %%mm3\n\t" "movq %%mm1, %%mm4\n\t" "movq %%mm2, %%mm5\n\t" "punpcklwd %5, %%mm0\n\t" "punpcklwd %5, %%mm1\n\t" "punpcklwd %5, %%mm2\n\t" "punpckhwd %5, %%mm3\n\t" "punpckhwd %5, %%mm4\n\t" "punpckhwd %5, %%mm5\n\t" "psllq $8, %%mm1\n\t" "psllq $16, %%mm2\n\t" "por %%mm1, %%mm0\n\t" "por %%mm2, %%mm0\n\t" "psllq $8, %%mm4\n\t" "psllq $16, %%mm5\n\t" "por %%mm4, %%mm3\n\t" "por %%mm5, %%mm3\n\t" :"=m"(*d) :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) :"memory"); /* Borrowed 32 to 24 */ __asm __volatile( "movq %%mm0, %%mm4\n\t" "movq %%mm3, %%mm5\n\t" "movq %%mm6, %%mm0\n\t" "movq %%mm7, %%mm1\n\t" "movq %%mm4, %%mm6\n\t" "movq %%mm5, %%mm7\n\t" "movq %%mm0, %%mm2\n\t" "movq %%mm1, %%mm3\n\t" "psrlq $8, %%mm2\n\t" "psrlq $8, %%mm3\n\t" "psrlq $8, %%mm6\n\t" "psrlq $8, %%mm7\n\t" "pand %2, %%mm0\n\t" "pand %2, %%mm1\n\t" "pand %2, %%mm4\n\t" "pand %2, %%mm5\n\t" "pand %3, %%mm2\n\t" "pand %3, %%mm3\n\t" "pand %3, %%mm6\n\t" "pand %3, %%mm7\n\t" "por %%mm2, %%mm0\n\t" "por %%mm3, %%mm1\n\t" "por %%mm6, %%mm4\n\t" "por %%mm7, %%mm5\n\t" "movq %%mm1, %%mm2\n\t" "movq %%mm4, %%mm3\n\t" "psllq $48, %%mm2\n\t" "psllq $32, %%mm3\n\t" "pand %4, %%mm2\n\t" "pand %5, %%mm3\n\t" "por %%mm2, %%mm0\n\t" "psrlq $16, %%mm1\n\t" "psrlq $32, %%mm4\n\t" "psllq $16, %%mm5\n\t" "por %%mm3, %%mm1\n\t" "pand %6, %%mm5\n\t" "por %%mm5, %%mm4\n\t" MOVNTQ" %%mm0, %0\n\t" MOVNTQ" %%mm1, 8%0\n\t" MOVNTQ" %%mm4, 16%0" :"=m"(*d) :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) :"memory"); d += 24; s += 8; } __asm __volatile(SFENCE:::"memory"); __asm __volatile(EMMS:::"memory");#endif while(s < end) { register uint16_t bgr; bgr = *s++; *d++ = (bgr&0x1F)<<3;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -