📄 rgb2rgb_template.c
字号:
:::"memory");#endif}/** * * height should be a multiple of 2 and width should be a multiple of 2 (if this is a * problem for anyone then tell me, and ill fix it) * chrominance data is only taken from every secound line others are ignored in the C version FIXME write HQ version */static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, unsigned int width, unsigned int height, int lumStride, int chromStride, int srcStride){ unsigned y; const unsigned chromWidth= width>>1;#ifdef HAVE_MMX for(y=0; y<height-2; y+=2) { unsigned i; for(i=0; i<2; i++) { asm volatile( "movl %2, %%eax \n\t" "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" "movq "MANGLE(w1111)", %%mm5 \n\t" "pxor %%mm7, %%mm7 \n\t" "leal (%%eax, %%eax, 2), %%ebx \n\t" ".balign 16 \n\t" "1: \n\t" PREFETCH" 64(%0, %%ebx) \n\t" "movd (%0, %%ebx), %%mm0 \n\t" "movd 3(%0, %%ebx), %%mm1 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm1 \n\t" "movd 6(%0, %%ebx), %%mm2 \n\t" "movd 9(%0, %%ebx), %%mm3 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" "punpcklbw %%mm7, %%mm3 \n\t" "pmaddwd %%mm6, %%mm0 \n\t" "pmaddwd %%mm6, %%mm1 \n\t" "pmaddwd %%mm6, %%mm2 \n\t" "pmaddwd %%mm6, %%mm3 \n\t"#ifndef FAST_BGR2YV12 "psrad $8, %%mm0 \n\t" "psrad $8, %%mm1 \n\t" "psrad $8, %%mm2 \n\t" "psrad $8, %%mm3 \n\t"#endif "packssdw %%mm1, %%mm0 \n\t" "packssdw %%mm3, %%mm2 \n\t" "pmaddwd %%mm5, %%mm0 \n\t" "pmaddwd %%mm5, %%mm2 \n\t" "packssdw %%mm2, %%mm0 \n\t" "psraw $7, %%mm0 \n\t" "movd 12(%0, %%ebx), %%mm4 \n\t" "movd 15(%0, %%ebx), %%mm1 \n\t" "punpcklbw %%mm7, %%mm4 \n\t" "punpcklbw %%mm7, %%mm1 \n\t" "movd 18(%0, %%ebx), %%mm2 \n\t" "movd 21(%0, %%ebx), %%mm3 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" "punpcklbw %%mm7, %%mm3 \n\t" "pmaddwd %%mm6, %%mm4 \n\t" "pmaddwd %%mm6, %%mm1 \n\t" "pmaddwd %%mm6, %%mm2 \n\t" "pmaddwd %%mm6, %%mm3 \n\t"#ifndef FAST_BGR2YV12 "psrad $8, %%mm4 \n\t" "psrad $8, %%mm1 \n\t" "psrad $8, %%mm2 \n\t" "psrad $8, %%mm3 \n\t"#endif "packssdw %%mm1, %%mm4 \n\t" "packssdw %%mm3, %%mm2 \n\t" "pmaddwd %%mm5, %%mm4 \n\t" "pmaddwd %%mm5, %%mm2 \n\t" "addl $24, %%ebx \n\t" "packssdw %%mm2, %%mm4 \n\t" "psraw $7, %%mm4 \n\t" "packuswb %%mm4, %%mm0 \n\t" "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t" MOVNTQ" %%mm0, (%1, %%eax) \n\t" "addl $8, %%eax \n\t" " js 1b \n\t" : : "r" (src+width*3), "r" (ydst+width), "g" (-width) : "%eax", "%ebx" ); ydst += lumStride; src += srcStride; } src -= srcStride*2; asm volatile( "movl %4, %%eax \n\t" "movq "MANGLE(w1111)", %%mm5 \n\t" "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t" "pxor %%mm7, %%mm7 \n\t" "leal (%%eax, %%eax, 2), %%ebx \n\t" "addl %%ebx, %%ebx \n\t" ".balign 16 \n\t" "1: \n\t" PREFETCH" 64(%0, %%ebx) \n\t" PREFETCH" 64(%1, %%ebx) \n\t"#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) "movq (%0, %%ebx), %%mm0 \n\t" "movq (%1, %%ebx), %%mm1 \n\t" "movq 6(%0, %%ebx), %%mm2 \n\t" "movq 6(%1, %%ebx), %%mm3 \n\t" PAVGB" %%mm1, %%mm0 \n\t" PAVGB" %%mm3, %%mm2 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm2, %%mm3 \n\t" "psrlq $24, %%mm0 \n\t" "psrlq $24, %%mm2 \n\t" PAVGB" %%mm1, %%mm0 \n\t" PAVGB" %%mm3, %%mm2 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm2 \n\t"#else "movd (%0, %%ebx), %%mm0 \n\t" "movd (%1, %%ebx), %%mm1 \n\t" "movd 3(%0, %%ebx), %%mm2 \n\t" "movd 3(%1, %%ebx), %%mm3 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm1 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" "punpcklbw %%mm7, %%mm3 \n\t" "paddw %%mm1, %%mm0 \n\t" "paddw %%mm3, %%mm2 \n\t" "paddw %%mm2, %%mm0 \n\t" "movd 6(%0, %%ebx), %%mm4 \n\t" "movd 6(%1, %%ebx), %%mm1 \n\t" "movd 9(%0, %%ebx), %%mm2 \n\t" "movd 9(%1, %%ebx), %%mm3 \n\t" "punpcklbw %%mm7, %%mm4 \n\t" "punpcklbw %%mm7, %%mm1 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" "punpcklbw %%mm7, %%mm3 \n\t" "paddw %%mm1, %%mm4 \n\t" "paddw %%mm3, %%mm2 \n\t" "paddw %%mm4, %%mm2 \n\t" "psrlw $2, %%mm0 \n\t" "psrlw $2, %%mm2 \n\t"#endif "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" "pmaddwd %%mm0, %%mm1 \n\t" "pmaddwd %%mm2, %%mm3 \n\t" "pmaddwd %%mm6, %%mm0 \n\t" "pmaddwd %%mm6, %%mm2 \n\t"#ifndef FAST_BGR2YV12 "psrad $8, %%mm0 \n\t" "psrad $8, %%mm1 \n\t" "psrad $8, %%mm2 \n\t" "psrad $8, %%mm3 \n\t"#endif "packssdw %%mm2, %%mm0 \n\t" "packssdw %%mm3, %%mm1 \n\t" "pmaddwd %%mm5, %%mm0 \n\t" "pmaddwd %%mm5, %%mm1 \n\t" "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0 "psraw $7, %%mm0 \n\t"#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) "movq 12(%0, %%ebx), %%mm4 \n\t" "movq 12(%1, %%ebx), %%mm1 \n\t" "movq 18(%0, %%ebx), %%mm2 \n\t" "movq 18(%1, %%ebx), %%mm3 \n\t" PAVGB" %%mm1, %%mm4 \n\t" PAVGB" %%mm3, %%mm2 \n\t" "movq %%mm4, %%mm1 \n\t" "movq %%mm2, %%mm3 \n\t" "psrlq $24, %%mm4 \n\t" "psrlq $24, %%mm2 \n\t" PAVGB" %%mm1, %%mm4 \n\t" PAVGB" %%mm3, %%mm2 \n\t" "punpcklbw %%mm7, %%mm4 \n\t" "punpcklbw %%mm7, %%mm2 \n\t"#else "movd 12(%0, %%ebx), %%mm4 \n\t" "movd 12(%1, %%ebx), %%mm1 \n\t" "movd 15(%0, %%ebx), %%mm2 \n\t" "movd 15(%1, %%ebx), %%mm3 \n\t" "punpcklbw %%mm7, %%mm4 \n\t" "punpcklbw %%mm7, %%mm1 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" "punpcklbw %%mm7, %%mm3 \n\t" "paddw %%mm1, %%mm4 \n\t" "paddw %%mm3, %%mm2 \n\t" "paddw %%mm2, %%mm4 \n\t" "movd 18(%0, %%ebx), %%mm5 \n\t" "movd 18(%1, %%ebx), %%mm1 \n\t" "movd 21(%0, %%ebx), %%mm2 \n\t" "movd 21(%1, %%ebx), %%mm3 \n\t" "punpcklbw %%mm7, %%mm5 \n\t" "punpcklbw %%mm7, %%mm1 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" "punpcklbw %%mm7, %%mm3 \n\t" "paddw %%mm1, %%mm5 \n\t" "paddw %%mm3, %%mm2 \n\t" "paddw %%mm5, %%mm2 \n\t" "movq "MANGLE(w1111)", %%mm5 \n\t" "psrlw $2, %%mm4 \n\t" "psrlw $2, %%mm2 \n\t"#endif "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" "pmaddwd %%mm4, %%mm1 \n\t" "pmaddwd %%mm2, %%mm3 \n\t" "pmaddwd %%mm6, %%mm4 \n\t" "pmaddwd %%mm6, %%mm2 \n\t"#ifndef FAST_BGR2YV12 "psrad $8, %%mm4 \n\t" "psrad $8, %%mm1 \n\t" "psrad $8, %%mm2 \n\t" "psrad $8, %%mm3 \n\t"#endif "packssdw %%mm2, %%mm4 \n\t" "packssdw %%mm3, %%mm1 \n\t" "pmaddwd %%mm5, %%mm4 \n\t" "pmaddwd %%mm5, %%mm1 \n\t" "addl $24, %%ebx \n\t" "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2 "psraw $7, %%mm4 \n\t" "movq %%mm0, %%mm1 \n\t" "punpckldq %%mm4, %%mm0 \n\t" "punpckhdq %%mm4, %%mm1 \n\t" "packsswb %%mm1, %%mm0 \n\t" "paddb "MANGLE(bgr2UVOffset)", %%mm0 \n\t" "movd %%mm0, (%2, %%eax) \n\t" "punpckhdq %%mm0, %%mm0 \n\t" "movd %%mm0, (%3, %%eax) \n\t" "addl $4, %%eax \n\t" " js 1b \n\t" : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth) : "%eax", "%ebx" ); udst += chromStride; vdst += chromStride; src += srcStride*2; } asm volatile( EMMS" \n\t" SFENCE" \n\t" :::"memory");#else y=0;#endif for(; y<height; y+=2) { unsigned i; for(i=0; i<chromWidth; i++) { unsigned int b= src[6*i+0]; unsigned int g= src[6*i+1]; unsigned int r= src[6*i+2]; unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; unsigned int V = ((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128; unsigned int U = ((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128; udst[i] = U; vdst[i] = V; ydst[2*i] = Y; b= src[6*i+3]; g= src[6*i+4]; r= src[6*i+5]; Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; ydst[2*i+1] = Y; } ydst += lumStride; src += srcStride; for(i=0; i<chromWidth; i++) { unsigned int b= src[6*i+0]; unsigned int g= src[6*i+1]; unsigned int r= src[6*i+2]; unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; ydst[2*i] = Y; b= src[6*i+3]; g= src[6*i+4]; r= src[6*i+5]; Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; ydst[2*i+1] = Y; } udst += chromStride; vdst += chromStride; ydst += lumStride; src += srcStride; }}void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest, unsigned width, unsigned height, int src1Stride, int src2Stride, int dstStride){ unsigned h; for(h=0; h < height; h++) { unsigned w;#ifdef HAVE_MMX#ifdef HAVE_SSE2 asm( "xorl %%eax, %%eax \n\t" "1: \n\t" PREFETCH" 64(%1, %%eax) \n\t" PREFETCH" 64(%2, %%eax) \n\t" "movdqa (%1, %%eax), %%xmm0 \n\t" "movdqa (%1, %%eax), %%xmm1 \n\t" "movdqa (%2, %%eax), %%xmm2 \n\t" "punpcklbw %%xmm2, %%xmm0 \n\t" "punpckhbw %%xmm2, %%xmm1 \n\t" "movntdq %%xmm0, (%0, %%eax, 2) \n\t" "movntdq %%xmm1, 16(%0, %%eax, 2)\n\t" "addl $16, %%eax \n\t" "cmpl %3, %%eax \n\t" " jb 1b \n\t" ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15) : "memory", "%eax" );#else asm( "xorl %%eax, %%eax \n\t" "1: \n\t" PREFETCH" 64(%1, %%eax) \n\t" PREFETCH" 64(%2, %%eax) \n\t" "movq (%1, %%eax), %%mm0 \n\t" "movq 8(%1, %%eax), %%mm2 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm2, %%mm3 \n\t" "movq (%2, %%eax), %%mm4 \n\t" "movq 8(%2, %%eax), %%mm5 \n\t" "punpcklbw %%mm4, %%mm0 \n\t" "punpckhbw %%mm4, %%mm1 \n\t" "punpcklbw %%mm5, %%mm2 \n\t" "punpckhbw %%mm5, %%mm3 \n\t" MOVNTQ" %%mm0, (%0, %%eax, 2) \n\t" MOVNTQ" %%mm1, 8(%0, %%eax, 2) \n\t" MOVNTQ" %%mm2, 16(%0, %%eax, 2) \n\t" MOVNTQ" %%mm3, 24(%0, %%eax, 2) \n\t" "addl $16, %%eax \n\t" "cmpl %3, %%eax \n\t" " jb 1b \n\t" ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15) : "memory", "%eax" );#endif for(w= (width&(~15)); w < width; w++) { dest[2*w+0] = src1[w]; dest[2*w+1] = src2[w]; }#else for(w=0; w < width; w++) { dest[2*w+0] = src1[w]; dest[2*w+1] = src2[w]; }#endif dest += dstStride; src1 += src1Stride; src2 += src2Stride; }#ifdef HAVE_MMX asm( EMMS" \n\t" SFENCE" \n\t" ::: "memory" );#endif}static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst1, uint8_t *dst2, unsigned width, unsigned height, int srcStride1, int srcStride2, int dstStride1, int dstStride2){ unsigned int y,x,h; int w; w=width/2; h=height/2;#ifdef HAVE_MMX asm volatile( PREFETCH" %0\n\t" PREFETCH" %1\n\t" ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory");#endif for(y=0;y<h;y++){ const uint8_t* s1=src1+srcStride1*(y>>1); uint8_t* d=dst1+dstStride1*y; x=0;#ifdef HAVE_MMX for(;x<w-31;x+=32) { asm volatile( PREFETCH" 32%1\n\t" "movq %1, %%mm0\n\t" "movq 8%1, %%mm2\n\t" "movq 16%1, %%mm4\n\t" "movq 24%1, %%mm6\n\t" "movq %%mm0, %%mm1\n\t" "movq %%mm2, %%mm3\n\t" "movq %%mm4, %%mm5\n\t" "movq %%mm6, %%mm7\n\t" "punpcklbw %%mm0, %%mm0\n\t" "punpckhbw %%mm1, %%mm1\n\t" "punpcklbw %%mm2, %%mm2\n\t" "punpckhbw %%mm3, %%mm3\n\t" "punpcklbw %%mm4, %%mm4\n\t" "punpckhbw %%mm5, %%mm5\n\t" "punpcklbw %%mm6, %%mm6\n\t" "punpckhbw %%mm7, %%mm7\n\t" MOVNTQ" %%mm0, %0\n\t" MOVNTQ" %%mm1, 8%0\n\t" MOVNTQ" %%mm2, 16%0\n\t" MOVNTQ" %%mm3, 24%0\n\t" MOVNTQ" %%mm4, 32%0\n\t" MOVNTQ" %%mm5, 40%0\n\t" MOVNTQ" %%mm6, 48%0\n\t" MOVNTQ" %%mm7, 56%0" :"=m"(d[2*x]) :"m"(s1[x]) :"memory"); }#endif for(;x<w;x++) d[2*x]=d[2*x+1]=s1[x]; } for(y=0;y<h;y++){ const uint8_t* s2=src2+srcStride2*(y>>1); uint8_t* d=dst2+dstStride2*y; x=0;#ifdef HAVE_MMX for(;x<w-31;x+=32) { asm volatile( PREFETCH" 32%1\n\t" "movq %1, %%mm0\n\t" "movq 8%1, %%mm2\n\t" "movq 16%1, %%mm4\n\t" "movq 24%1, %%mm6\n\t" "movq %%mm0, %%mm1\n\t" "movq %%mm2, %%mm3\n\t" "movq %%mm4, %%mm5\n\t" "movq %%mm6, %%mm7\n\t" "punpcklbw %%mm0, %%mm0\n\t" "punpckhbw %%mm1, %%mm1\n\t" "punpcklbw %%mm2, %%mm2\n\t" "punpckhbw %%mm3, %%mm3\n\t" "punpcklbw %%mm4, %%mm4\n\t" "punpckhbw %%mm5, %%mm5\n\t" "punpcklbw %%mm6, %%mm6\n\t" "punpckhbw %%mm7, %%mm7\n\t" MOVNTQ" %%mm0, %0\n\t" MOVNTQ" %%mm1, 8%0\n\t" MOVNTQ" %%mm2, 16%0\n\t" MOVNTQ" %%mm3, 24%0\n\t" MOVNTQ" %%mm4, 32%0\n\t" MOVNTQ" %%mm5, 40%0\n\t" MOVNTQ" %%mm6, 48%0\n\t" MOVNTQ" %%mm7, 56%0" :"=m"(d[2*x]) :"m"(s2[x]) :"memory"); }#endif for(;x<w;x++) d[2*x]=d[2*x+1]=s2[x]; }#ifdef HAVE_MMX asm( EMMS" \n\t" SFENCE" \n\t" ::: "memory" );#endif}static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, uint8_t *dst, unsigned width, unsigned height, int srcStride1, int srcStride2, int srcStride3, int dstStride){ unsigned y,x,w,h; w=width/2; h=height; for(y=0;y<h;y++){ const uint8_t* yp=src1+srcStride1*y; const uint8_t* up=src2+srcStride2*(y>>2); const uint8_t* vp=src3+srcStride3*(y>>2); uint8_t* d=dst+dstStride*y; x=0;#ifdef HAVE_MMX for(;x<w-7;x+=8) { asm volatile( PREFETCH" 32(%1, %0)\n\t" PREFETCH" 32
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -