📄 rgb2rgb_template.c
字号:
*d++ = (bgr&0x3E0)>>2; *d++ = (bgr&0x7C00)>>7; }}static inline void RENAME(rgb16to24)(const uint8_t *src, uint8_t *dst, unsigned src_size){ const uint16_t *end;#ifdef HAVE_MMX const uint16_t *mm_end;#endif uint8_t *d = (uint8_t *)dst; const uint16_t *s = (const uint16_t *)src; end = s + src_size/2;#ifdef HAVE_MMX __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); mm_end = end - 7; while(s < mm_end) { __asm __volatile( PREFETCH" 32%1\n\t" "movq %1, %%mm0\n\t" "movq %1, %%mm1\n\t" "movq %1, %%mm2\n\t" "pand %2, %%mm0\n\t" "pand %3, %%mm1\n\t" "pand %4, %%mm2\n\t" "psllq $3, %%mm0\n\t" "psrlq $3, %%mm1\n\t" "psrlq $8, %%mm2\n\t" "movq %%mm0, %%mm3\n\t" "movq %%mm1, %%mm4\n\t" "movq %%mm2, %%mm5\n\t" "punpcklwd %5, %%mm0\n\t" "punpcklwd %5, %%mm1\n\t" "punpcklwd %5, %%mm2\n\t" "punpckhwd %5, %%mm3\n\t" "punpckhwd %5, %%mm4\n\t" "punpckhwd %5, %%mm5\n\t" "psllq $8, %%mm1\n\t" "psllq $16, %%mm2\n\t" "por %%mm1, %%mm0\n\t" "por %%mm2, %%mm0\n\t" "psllq $8, %%mm4\n\t" "psllq $16, %%mm5\n\t" "por %%mm4, %%mm3\n\t" "por %%mm5, %%mm3\n\t" "movq %%mm0, %%mm6\n\t" "movq %%mm3, %%mm7\n\t" "movq 8%1, %%mm0\n\t" "movq 8%1, %%mm1\n\t" "movq 8%1, %%mm2\n\t" "pand %2, %%mm0\n\t" "pand %3, %%mm1\n\t" "pand %4, %%mm2\n\t" "psllq $3, %%mm0\n\t" "psrlq $3, %%mm1\n\t" "psrlq $8, %%mm2\n\t" "movq %%mm0, %%mm3\n\t" "movq %%mm1, %%mm4\n\t" "movq %%mm2, %%mm5\n\t" "punpcklwd %5, %%mm0\n\t" "punpcklwd %5, %%mm1\n\t" "punpcklwd %5, %%mm2\n\t" "punpckhwd %5, %%mm3\n\t" "punpckhwd %5, %%mm4\n\t" "punpckhwd %5, %%mm5\n\t" "psllq $8, %%mm1\n\t" "psllq $16, %%mm2\n\t" "por %%mm1, %%mm0\n\t" "por %%mm2, %%mm0\n\t" "psllq $8, %%mm4\n\t" "psllq $16, %%mm5\n\t" "por %%mm4, %%mm3\n\t" "por %%mm5, %%mm3\n\t" :"=m"(*d) :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) :"memory"); /* Borrowed 32 to 24 */ __asm __volatile( "movq %%mm0, %%mm4\n\t" "movq %%mm3, %%mm5\n\t" "movq %%mm6, %%mm0\n\t" "movq %%mm7, %%mm1\n\t" "movq %%mm4, %%mm6\n\t" "movq %%mm5, %%mm7\n\t" "movq %%mm0, %%mm2\n\t" "movq %%mm1, %%mm3\n\t" "psrlq $8, %%mm2\n\t" "psrlq $8, %%mm3\n\t" "psrlq $8, %%mm6\n\t" "psrlq $8, %%mm7\n\t" "pand %2, %%mm0\n\t" "pand %2, %%mm1\n\t" "pand %2, %%mm4\n\t" "pand %2, %%mm5\n\t" "pand %3, %%mm2\n\t" "pand %3, %%mm3\n\t" "pand %3, %%mm6\n\t" "pand %3, %%mm7\n\t" "por %%mm2, %%mm0\n\t" "por %%mm3, %%mm1\n\t" "por %%mm6, %%mm4\n\t" "por %%mm7, %%mm5\n\t" "movq %%mm1, %%mm2\n\t" "movq %%mm4, %%mm3\n\t" "psllq $48, %%mm2\n\t" "psllq $32, %%mm3\n\t" "pand %4, %%mm2\n\t" "pand %5, %%mm3\n\t" "por %%mm2, %%mm0\n\t" "psrlq $16, %%mm1\n\t" "psrlq $32, %%mm4\n\t" "psllq $16, %%mm5\n\t" "por %%mm3, %%mm1\n\t" "pand %6, %%mm5\n\t" "por %%mm5, %%mm4\n\t" MOVNTQ" %%mm0, %0\n\t" MOVNTQ" %%mm1, 8%0\n\t" MOVNTQ" %%mm4, 16%0" :"=m"(*d) :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) :"memory"); d += 24; s += 8; } __asm __volatile(SFENCE:::"memory"); __asm __volatile(EMMS:::"memory");#endif while(s < end) { register uint16_t bgr; bgr = *s++; *d++ = (bgr&0x1F)<<3; *d++ = (bgr&0x7E0)>>3; *d++ = (bgr&0xF800)>>8; }}static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, unsigned src_size){ const uint16_t *end;#ifdef HAVE_MMX const uint16_t *mm_end;#endif uint8_t *d = (uint8_t *)dst; const uint16_t *s = (const uint16_t *)src; end = s + src_size/2;#ifdef HAVE_MMX __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); __asm __volatile("pxor %%mm7,%%mm7\n\t":::"memory"); mm_end = end - 3; while(s < mm_end) { __asm __volatile( PREFETCH" 32%1\n\t" "movq %1, %%mm0\n\t" "movq %1, %%mm1\n\t" "movq %1, %%mm2\n\t" "pand %2, %%mm0\n\t" "pand %3, %%mm1\n\t" "pand %4, %%mm2\n\t" "psllq $3, %%mm0\n\t" "psrlq $2, %%mm1\n\t" "psrlq $7, %%mm2\n\t" "movq %%mm0, %%mm3\n\t" "movq %%mm1, %%mm4\n\t" "movq %%mm2, %%mm5\n\t" "punpcklwd %%mm7, %%mm0\n\t" "punpcklwd %%mm7, %%mm1\n\t" "punpcklwd %%mm7, %%mm2\n\t" "punpckhwd %%mm7, %%mm3\n\t" "punpckhwd %%mm7, %%mm4\n\t" "punpckhwd %%mm7, %%mm5\n\t" "psllq $8, %%mm1\n\t" "psllq $16, %%mm2\n\t" "por %%mm1, %%mm0\n\t" "por %%mm2, %%mm0\n\t" "psllq $8, %%mm4\n\t" "psllq $16, %%mm5\n\t" "por %%mm4, %%mm3\n\t" "por %%mm5, %%mm3\n\t" MOVNTQ" %%mm0, %0\n\t" MOVNTQ" %%mm3, 8%0\n\t" :"=m"(*d) :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r) :"memory"); d += 16; s += 4; } __asm __volatile(SFENCE:::"memory"); __asm __volatile(EMMS:::"memory");#endif while(s < end) {#if 0 //slightly slower on athlon int bgr= *s++; *((uint32_t*)d)++ = ((bgr&0x1F)<<3) + ((bgr&0x3E0)<<6) + ((bgr&0x7C00)<<9);#else//FIXME this is very likely wrong for bigendian (and the following converters too) register uint16_t bgr; bgr = *s++; *d++ = (bgr&0x1F)<<3; *d++ = (bgr&0x3E0)>>2; *d++ = (bgr&0x7C00)>>7; *d++ = 0;#endif }}static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, unsigned src_size){ const uint16_t *end;#ifdef HAVE_MMX const uint16_t *mm_end;#endif uint8_t *d = (uint8_t *)dst; const uint16_t *s = (uint16_t *)src; end = s + src_size/2;#ifdef HAVE_MMX __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); __asm __volatile("pxor %%mm7,%%mm7\n\t":::"memory"); mm_end = end - 3; while(s < mm_end) { __asm __volatile( PREFETCH" 32%1\n\t" "movq %1, %%mm0\n\t" "movq %1, %%mm1\n\t" "movq %1, %%mm2\n\t" "pand %2, %%mm0\n\t" "pand %3, %%mm1\n\t" "pand %4, %%mm2\n\t" "psllq $3, %%mm0\n\t" "psrlq $3, %%mm1\n\t" "psrlq $8, %%mm2\n\t" "movq %%mm0, %%mm3\n\t" "movq %%mm1, %%mm4\n\t" "movq %%mm2, %%mm5\n\t" "punpcklwd %%mm7, %%mm0\n\t" "punpcklwd %%mm7, %%mm1\n\t" "punpcklwd %%mm7, %%mm2\n\t" "punpckhwd %%mm7, %%mm3\n\t" "punpckhwd %%mm7, %%mm4\n\t" "punpckhwd %%mm7, %%mm5\n\t" "psllq $8, %%mm1\n\t" "psllq $16, %%mm2\n\t" "por %%mm1, %%mm0\n\t" "por %%mm2, %%mm0\n\t" "psllq $8, %%mm4\n\t" "psllq $16, %%mm5\n\t" "por %%mm4, %%mm3\n\t" "por %%mm5, %%mm3\n\t" MOVNTQ" %%mm0, %0\n\t" MOVNTQ" %%mm3, 8%0\n\t" :"=m"(*d) :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r) :"memory"); d += 16; s += 4; } __asm __volatile(SFENCE:::"memory"); __asm __volatile(EMMS:::"memory");#endif while(s < end) { register uint16_t bgr; bgr = *s++; *d++ = (bgr&0x1F)<<3; *d++ = (bgr&0x7E0)>>3; *d++ = (bgr&0xF800)>>8; *d++ = 0; }}static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, unsigned int src_size){#ifdef HAVE_MMX/* TODO: unroll this loop */ asm volatile ( "xorl %%eax, %%eax \n\t" ".balign 16 \n\t" "1: \n\t" PREFETCH" 32(%0, %%eax) \n\t" "movq (%0, %%eax), %%mm0 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" "pslld $16, %%mm0 \n\t" "psrld $16, %%mm1 \n\t" "pand "MANGLE(mask32r)", %%mm0 \n\t" "pand "MANGLE(mask32g)", %%mm2 \n\t" "pand "MANGLE(mask32b)", %%mm1 \n\t" "por %%mm0, %%mm2 \n\t" "por %%mm1, %%mm2 \n\t" MOVNTQ" %%mm2, (%1, %%eax) \n\t" "addl $8, %%eax \n\t" "cmpl %2, %%eax \n\t" " jb 1b \n\t" :: "r" (src), "r"(dst), "r" (src_size-7) : "%eax" ); __asm __volatile(SFENCE:::"memory"); __asm __volatile(EMMS:::"memory");#else unsigned i; unsigned num_pixels = src_size >> 2; for(i=0; i<num_pixels; i++) {#ifdef WORDS_BIGENDIAN dst[4*i + 1] = src[4*i + 3]; dst[4*i + 2] = src[4*i + 2]; dst[4*i + 3] = src[4*i + 1];#else dst[4*i + 0] = src[4*i + 2]; dst[4*i + 1] = src[4*i + 1]; dst[4*i + 2] = src[4*i + 0];#endif }#endif}static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, unsigned int src_size){ unsigned i;#ifdef HAVE_MMX int mmx_size= 23 - src_size; asm volatile ( "movq "MANGLE(mask24r)", %%mm5 \n\t" "movq "MANGLE(mask24g)", %%mm6 \n\t" "movq "MANGLE(mask24b)", %%mm7 \n\t" ".balign 16 \n\t" "1: \n\t" PREFETCH" 32(%1, %%eax) \n\t" "movq (%1, %%eax), %%mm0 \n\t" // BGR BGR BG "movq (%1, %%eax), %%mm1 \n\t" // BGR BGR BG "movq 2(%1, %%eax), %%mm2 \n\t" // R BGR BGR B "psllq $16, %%mm0 \n\t" // 00 BGR BGR "pand %%mm5, %%mm0 \n\t" "pand %%mm6, %%mm1 \n\t" "pand %%mm7, %%mm2 \n\t" "por %%mm0, %%mm1 \n\t" "por %%mm2, %%mm1 \n\t" "movq 6(%1, %%eax), %%mm0 \n\t" // BGR BGR BG MOVNTQ" %%mm1, (%2, %%eax) \n\t" // RGB RGB RG "movq 8(%1, %%eax), %%mm1 \n\t" // R BGR BGR B "movq 10(%1, %%eax), %%mm2 \n\t" // GR BGR BGR "pand %%mm7, %%mm0 \n\t" "pand %%mm5, %%mm1 \n\t" "pand %%mm6, %%mm2 \n\t" "por %%mm0, %%mm1 \n\t" "por %%mm2, %%mm1 \n\t" "movq 14(%1, %%eax), %%mm0 \n\t" // R BGR BGR B MOVNTQ" %%mm1, 8(%2, %%eax) \n\t" // B RGB RGB R "movq 16(%1, %%eax), %%mm1 \n\t" // GR BGR BGR "movq 18(%1, %%eax), %%mm2 \n\t" // BGR BGR BG "pand %%mm6, %%mm0 \n\t" "pand %%mm7, %%mm1 \n\t" "pand %%mm5, %%mm2 \n\t" "por %%mm0, %%mm1 \n\t" "por %%mm2, %%mm1 \n\t" MOVNTQ" %%mm1, 16(%2, %%eax) \n\t" "addl $24, %%eax \n\t" " js 1b \n\t" : "+a" (mmx_size) : "r" (src-mmx_size), "r"(dst-mmx_size) ); __asm __volatile(SFENCE:::"memory"); __asm __volatile(EMMS:::"memory"); if(mmx_size==23) return; //finihsed, was multiple of 8 src+= src_size; dst+= src_size; src_size= 23-mmx_size; src-= src_size; dst-= src_size;#endif for(i=0; i<src_size; i+=3) { register uint8_t x; x = src[i + 2]; dst[i + 1] = src[i + 1]; dst[i + 2] = src[i + 0]; dst[i + 0] = x; }}static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, unsigned int width, unsigned int height, int lumStride, int chromStride, int dstStride, int vertLumPerChroma){ unsigned y; const unsigned chromWidth= width>>1; for(y=0; y<height; y++) {#ifdef HAVE_MMX//FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway) asm volatile( "xorl %%eax, %%eax \n\t" ".balign 16 \n\t" "1: \n\t" PREFETCH" 32(%1, %%eax, 2) \n\t" PREFETCH" 32(%2, %%eax) \n\t" PREFETCH" 32(%3, %%eax) \n\t" "movq (%2, %%eax), %%mm0 \n\t" // U(0) "movq %%mm0, %%mm2 \n\t" // U(0) "movq (%3, %%eax), %%mm1 \n\t" // V(0) "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0) "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8) "movq (%1, %%eax,2), %%mm3 \n\t" // Y(0) "movq 8(%1, %%eax,2), %%mm5 \n\t" // Y(8) "movq %%mm3, %%mm4 \n\t" // Y(0) "movq %%mm5, %%mm6 \n\t" // Y(8) "punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0) "punpckhbw %%mm0, %%mm4 \n\t" // YUYV YUYV(4) "punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8) "punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12) MOVNTQ" %%mm3, (%0, %%eax, 4) \n\t" MOVNTQ" %%mm4, 8(%0, %%eax, 4) \n\t" MOVNTQ" %%mm5, 16(%0, %%eax, 4) \n\t" MOVNTQ" %%mm6, 24(%0, %%eax, 4) \n\t" "addl $8, %%eax \n\t" "cmpl %4, %%eax \n\t" " jb 1b \n\t" ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth) : "%eax" );#else#if defined ARCH_ALPHA && defined HAVE_MVI#define pl2yuy2(n) \ y1 = yc[n]; \ y2 = yc2[n]; \ u = uc[n]; \ v = vc[n]; \ asm("unpkbw %1, %0" : "=r"(y1) : "r"(y1)); \ asm("unpkbw %1, %0" : "=r"(y2) : "r"(y2)); \ asm("unpkbl %1, %0" : "=r"(u) : "r"(u)); \ asm("unpkbl %1, %0" : "=r"(v) : "r"(v)); \ yuv1 = (u << 8) + (v << 24); \ yuv2 = yuv1 + y2; \ yuv1 += y1; \ qdst[n] = yuv1; \ qdst2[n] = yuv2; int i; uint64_t *qdst = (uint64_t *) dst; uint64_t *qdst2 = (uint64_t *) (dst + dstStride); const uint32_t *yc = (uint32_t *) ysrc; const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride); const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc; for(i = 0; i < chromWidth; i += 8){ uint64_t y1, y2, yuv1, yuv2; uint64_t u, v; /* Prefetch */ asm("ldq $31,64(%0)" :: "r"(yc)); asm("ldq $31,64(%0)" :: "r"(yc2)); asm("ldq $31,64(%0)" :: "r"(uc)); asm("ldq $31,64(%0)" :: "r"(vc)); pl2yuy2(0); pl2yuy2(1); pl2yuy2(2); pl2yuy2(3); yc += 4; yc2 += 4; uc += 4; vc += 4; qdst += 4; qdst2 += 4; } y++; ysrc += lumStride; dst += dstStride;#elif __WORDSIZE >= 64 int i; uint64_t *ldst = (uint64_t *) dst; const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; for(i = 0; i < chromWidth; i += 2){ uint64_t k, l; k = yc[0] + (uc[0] << 8) + (yc[1] << 16) + (vc[0] << 24); l = yc[2] + (uc[1] << 8) + (yc[3] << 16) + (vc[1] << 24); *ldst++ = k + (l << 32); yc += 4; uc += 2; vc += 2; }#else int i, *idst = (int32_t *) dst; const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; for(i = 0; i < chromWidth; i++){#ifdef WORDS_BIGENDIAN *idst++ = (yc[0] << 24)+ (uc[0] << 16) + (yc[1] << 8) + (vc[0] << 0);#else *idst++ = yc[0] + (uc[0] << 8) +
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -