📄 rgb2rgb_template.c
字号:
asm volatile(
"xor %%"REG_a", %%"REG_a" \n\t"
".balign 16 \n\t"
"1: \n\t"
PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
PREFETCH" 32(%2, %%"REG_a") \n\t"
PREFETCH" 32(%3, %%"REG_a") \n\t"
"movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0)
"movq %%mm0, %%mm2 \n\t" // U(0)
"movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0)
"punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
"punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8)
"movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0)
"movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8)
"movq %%mm3, %%mm4 \n\t" // Y(0)
"movq %%mm5, %%mm6 \n\t" // Y(8)
"punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0)
"punpckhbw %%mm0, %%mm4 \n\t" // YUYV YUYV(4)
"punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8)
"punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12)
MOVNTQ" %%mm3, (%0, %%"REG_a", 4)\n\t"
MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4)\n\t"
MOVNTQ" %%mm5, 16(%0, %%"REG_a", 4)\n\t"
MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4)\n\t"
"add $8, %%"REG_a" \n\t"
"cmp %4, %%"REG_a" \n\t"
" jb 1b \n\t"
::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
: "%"REG_a
);
#else
#if defined ARCH_ALPHA && defined HAVE_MVI
#define pl2yuy2(n) \
y1 = yc[n]; \
y2 = yc2[n]; \
u = uc[n]; \
v = vc[n]; \
asm("unpkbw %1, %0" : "=r"(y1) : "r"(y1)); \
asm("unpkbw %1, %0" : "=r"(y2) : "r"(y2)); \
asm("unpkbl %1, %0" : "=r"(u) : "r"(u)); \
asm("unpkbl %1, %0" : "=r"(v) : "r"(v)); \
yuv1 = (u << 8) + (v << 24); \
yuv2 = yuv1 + y2; \
yuv1 += y1; \
qdst[n] = yuv1; \
qdst2[n] = yuv2;
int i;
uint64_t *qdst = (uint64_t *) dst;
uint64_t *qdst2 = (uint64_t *) (dst + dstStride);
const uint32_t *yc = (uint32_t *) ysrc;
const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride);
const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc;
for(i = 0; i < chromWidth; i += 8){
uint64_t y1, y2, yuv1, yuv2;
uint64_t u, v;
/* Prefetch */
asm("ldq $31,64(%0)" :: "r"(yc));
asm("ldq $31,64(%0)" :: "r"(yc2));
asm("ldq $31,64(%0)" :: "r"(uc));
asm("ldq $31,64(%0)" :: "r"(vc));
pl2yuy2(0);
pl2yuy2(1);
pl2yuy2(2);
pl2yuy2(3);
yc += 4;
yc2 += 4;
uc += 4;
vc += 4;
qdst += 4;
qdst2 += 4;
}
y++;
ysrc += lumStride;
dst += dstStride;
#elif __WORDSIZE >= 64
int i;
uint64_t *ldst = (uint64_t *) dst;
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
for(i = 0; i < chromWidth; i += 2){
uint64_t k, l;
k = yc[0] + (uc[0] << 8) +
(yc[1] << 16) + (vc[0] << 24);
l = yc[2] + (uc[1] << 8) +
(yc[3] << 16) + (vc[1] << 24);
*ldst++ = k + (l << 32);
yc += 4;
uc += 2;
vc += 2;
}
#else
int i, *idst = (int32_t *) dst;
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
for(i = 0; i < chromWidth; i++){
#ifdef WORDS_BIGENDIAN
*idst++ = (yc[0] << 24)+ (uc[0] << 16) +
(yc[1] << 8) + (vc[0] << 0);
#else
*idst++ = yc[0] + (uc[0] << 8) +
(yc[1] << 16) + (vc[0] << 24);
#endif
yc += 2;
uc++;
vc++;
}
#endif
#endif
if((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) )
{
usrc += chromStride;
vsrc += chromStride;
}
ysrc += lumStride;
dst += dstStride;
}
#ifdef HAVE_MMX
asm( EMMS" \n\t"
SFENCE" \n\t"
:::"memory");
#endif
}
/**
*
* height should be a multiple of 2 and width should be a multiple of 16 (if this is a
* problem for anyone then tell me, and ill fix it)
*/
static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
long width, long height,
stride_t lumStride, stride_t chromStride, stride_t dstStride)
{
//FIXME interpolate chroma
RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
}
static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
long width, long height,
stride_t lumStride, stride_t chromStride, stride_t dstStride, stride_t vertLumPerChroma)
{
long y;
const long chromWidth= width>>1;
for(y=0; y<height; y++)
{
#ifdef HAVE_MMX
//FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway)
asm volatile(
"xor %%"REG_a", %%"REG_a" \n\t"
".balign 16 \n\t"
"1: \n\t"
PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
PREFETCH" 32(%2, %%"REG_a") \n\t"
PREFETCH" 32(%3, %%"REG_a") \n\t"
"movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0)
"movq %%mm0, %%mm2 \n\t" // U(0)
"movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0)
"punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
"punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8)
"movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0)
"movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8)
"movq %%mm0, %%mm4 \n\t" // Y(0)
"movq %%mm2, %%mm6 \n\t" // Y(8)
"punpcklbw %%mm3, %%mm0 \n\t" // YUYV YUYV(0)
"punpckhbw %%mm3, %%mm4 \n\t" // YUYV YUYV(4)
"punpcklbw %%mm5, %%mm2 \n\t" // YUYV YUYV(8)
"punpckhbw %%mm5, %%mm6 \n\t" // YUYV YUYV(12)
MOVNTQ" %%mm0, (%0, %%"REG_a", 4)\n\t"
MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4)\n\t"
MOVNTQ" %%mm2, 16(%0, %%"REG_a", 4)\n\t"
MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4)\n\t"
"add $8, %%"REG_a" \n\t"
"cmp %4, %%"REG_a" \n\t"
" jb 1b \n\t"
::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
: "%"REG_a
);
#else
//FIXME adapt the alpha asm code from yv12->yuy2
#if __WORDSIZE >= 64
int i;
uint64_t *ldst = (uint64_t *) dst;
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
for(i = 0; i < chromWidth; i += 2){
uint64_t k, l;
k = uc[0] + (yc[0] << 8) +
(vc[0] << 16) + (yc[1] << 24);
l = uc[1] + (yc[2] << 8) +
(vc[1] << 16) + (yc[3] << 24);
*ldst++ = k + (l << 32);
yc += 4;
uc += 2;
vc += 2;
}
#else
int i, *idst = (int32_t *) dst;
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
for(i = 0; i < chromWidth; i++){
#ifdef WORDS_BIGENDIAN
*idst++ = (uc[0] << 24)+ (yc[0] << 16) +
(vc[0] << 8) + (yc[1] << 0);
#else
*idst++ = uc[0] + (yc[0] << 8) +
(vc[0] << 16) + (yc[1] << 24);
#endif
yc += 2;
uc++;
vc++;
}
#endif
#endif
if((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) )
{
usrc += chromStride;
vsrc += chromStride;
}
ysrc += lumStride;
dst += dstStride;
}
#ifdef HAVE_MMX
asm( EMMS" \n\t"
SFENCE" \n\t"
:::"memory");
#endif
}
static inline void RENAME(yuvPlanartovyuy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
long width, long height,
stride_t lumStride, stride_t chromStride, stride_t dstStride, stride_t vertLumPerChroma)
{
long y;
const long chromWidth= width>>1;
for(y=0; y<height; y++)
{
int i, *idst = (int32_t *) dst;
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
for(i = 0; i < chromWidth; i++){
*idst++ = vc[0] + (yc[0] << 8) +
(uc[0] << 16) + (yc[1] << 24);
yc += 2;
uc++;
vc++;
}
if((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) )
{
usrc += chromStride;
vsrc += chromStride;
}
ysrc += lumStride;
dst += dstStride;
}
}
static inline void RENAME(yuvPlanartoyvyu)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
long width, long height,
stride_t lumStride, stride_t chromStride, stride_t dstStride, stride_t vertLumPerChroma)
{
long y;
const long chromWidth= width>>1;
for(y=0; y<height; y++)
{
int i, *idst = (int32_t *) dst;
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
for(i = 0; i < chromWidth; i++){
*idst++ = yc[0] + (vc[0] << 8) +
(yc[1] << 16) + (uc[0] << 24);
yc += 2;
uc++;
vc++;
}
if((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) )
{
usrc += chromStride;
vsrc += chromStride;
}
ysrc += lumStride;
dst += dstStride;
}
}
/**
*
* height should be a multiple of 2 and width should be a multiple of 16 (if this is a
* problem for anyone then tell me, and ill fix it)
*/
static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
long width, long height,
stride_t lumStride, stride_t chromStride, stride_t dstStride)
{
//FIXME interpolate chroma
RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
}
static inline void RENAME(yv12toyvyu)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
long width, long height,
stride_t lumStride, stride_t chromStride, stride_t dstStride)
{
//FIXME interpolate chroma
RENAME(yuvPlanartoyvyu)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
}
static inline void RENAME(yv12tovyuy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
long width, long height,
stride_t lumStride, stride_t chromStride, stride_t dstStride)
{
//FIXME interpolate chroma
RENAME(yuvPlanartovyuy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
}
/**
*
* width should be a multiple of 16
*/
static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
long width, long height,
stride_t lumStride, stride_t chromStride, stride_t dstStride)
{
RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
}
/**
*
* height should be a multiple of 2 and width should be a multiple of 16 (if this is a
* problem for anyone then tell me, and ill fix it)
*/
static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
long width, long height,
stride_t lumStride, stride_t chromStride, stride_t srcStride)
{
long y;
const long chromWidth= width>>1;
for(y=0; y<height; y+=2)
{
#ifdef HAVE_MMX
asm volatile(
"xor %%"REG_a", %%"REG_a" \n\t"
"pcmpeqw %%mm7, %%mm7 \n\t"
"psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
".balign 16 \n\t"
"1: \n\t"
PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
"movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
"movq 8(%0, %%"REG_a", 4), %%mm1\n\t" // YUYV YUYV(4)
"movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0)
"movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4)
"psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0)
"psrlw $8, %%mm1 \n\t" // U0V0 U0V0(4)
"pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(0)
"pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(4)
"packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
"packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0)
MOVNTQ" %%mm2, (%1, %%"REG_a", 2)\n\t"
"movq 16(%0, %%"REG_a", 4), %%mm1\n\t" // YUYV YUYV(8)
"movq 24(%0, %%"REG_a", 4), %%mm2\n\t" // YUYV YUYV(12)
"movq %%mm1, %%mm3 \n\t" // YUYV YUYV(8)
"movq %%mm2, %%mm4 \n\t" // YUYV YUYV(12)
"psrlw $8, %%mm1 \n\t" // U0V0 U0V0(8)
"psrlw $8, %%mm2 \n\t" // U0V0 U0V0(12)
"pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(8)
"pand %%mm7, %%mm4 \n\t" // Y0Y0 Y0Y0(12)
"packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8)
"packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8)
MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2)\n\t"
"movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0)
"movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8)
"psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0)
"psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8)
"pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0)
"pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8)
"packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0)
"packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0)
MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t"
MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t"
"add $8, %%"REG_a" \n\t"
"cmp %4, %%"REG_a" \n\t"
" jb 1b \n\t"
::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
: "memory", "%"REG_a
);
ydst += lumStride;
src += srcStride;
asm volatile(
"xor %%"REG_a", %%"REG_a" \n\t"
".balign 16 \n\t"
"1: \n\t"
PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
"movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
"movq 8(%0, %%"REG_a", 4), %%mm1\n\t" // YUYV YUYV(4)
"movq 16(%0, %%"REG_a", 4), %%mm2\n\t" // YUYV YUYV(8)
"movq 24(%0, %%"REG_a", 4), %%mm3\n\t" // YUYV YUYV(12)
"pand %%mm7, %%mm0 \n\t" // Y0Y0 Y0Y0(0)
"pand %%mm7, %%mm1 \n\t" // Y0Y0 Y0Y0(4)
"pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(8)
"pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(12)
"packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0)
"packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8)
MOVNTQ" %%mm0, (%1, %%"REG_a", 2)\n\t"
MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2)\n\t"
"add $8, %%"REG_a" \n\t"
"cmp %4, %%"REG_a" \n\t"
" jb 1b \n\t"
::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
: "memory", "%"REG_a
);
#else
long i;
for(i=0; i<chromWidth; i++)
{
ydst[2*i+0] = src[4*i+0];
udst[i] = src[4*i+1];
ydst[2*i+1] = src[4*i+2];
vdst[i] = src[4*i+3];
}
ydst += lumStride;
src += srcStride;
for(i=0; i<chromWidth; i++)
{
ydst[2*i+0] = src[4*i+0];
ydst[2*i+1] = src[4*i+2];
}
#endif
udst += chromStride;
vdst += chromStride;
ydst += lumStride;
src += srcStride;
}
#ifdef HAVE_MMX
asm volatile( EMMS" \n\t"
SFENCE" \n\t"
:::"memory");
#endif
}
static inline void RENAME(yvu9toyv12)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc,
uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
long width, long height, stride_t lumStride, stride_t chromStride)
{
/* Y Plane */
memcpy(ydst, ysrc, width*height);
/* XXX: implement upscaling for U,V */
}
static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, stride_t srcStride, stride_t dstStride)
{
long x,y;
dst[0]= src[0];
// first line
for(x=0; x<srcWidth-1; x++){
dst[2*x+1]= (3*src[x] + src[x+1])>>2;
dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
}
dst[2*srcWidth-1]= src[srcWidth-1];
dst+= dstStride;
for(y=1; y<srcHeight; y++){
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
const long mmxSize= srcWidth&~15;
asm volatile(
"mov %4, %%"REG_a" \n\t"
"1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -