📄 rgb2rgb_template.c
字号:
"add $24, %%"REG_a" \n\t"
" js 1b \n\t"
"2: \n\t"
: "+a" (mmx_size)
: "r" (src-mmx_size), "r"(dst-mmx_size)
);
__asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory");
if (mmx_size==23) return; //finihsed, was multiple of 8
src+= src_size;
dst+= src_size;
src_size= 23-mmx_size;
src-= src_size;
dst-= src_size;
#endif
for (i=0; i<src_size; i+=3)
{
register uint8_t x;
x = src[i + 2];
dst[i + 1] = src[i + 1];
dst[i + 2] = src[i + 0];
dst[i + 0] = x;
}
}
static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
long width, long height,
long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
{
long y;
const long chromWidth= width>>1;
for (y=0; y<height; y++)
{
#ifdef HAVE_MMX
//FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway)
asm volatile(
"xor %%"REG_a", %%"REG_a" \n\t"
ASMALIGN(4)
"1: \n\t"
PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
PREFETCH" 32(%2, %%"REG_a") \n\t"
PREFETCH" 32(%3, %%"REG_a") \n\t"
"movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0)
"movq %%mm0, %%mm2 \n\t" // U(0)
"movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0)
"punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
"punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8)
"movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0)
"movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8)
"movq %%mm3, %%mm4 \n\t" // Y(0)
"movq %%mm5, %%mm6 \n\t" // Y(8)
"punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0)
"punpckhbw %%mm0, %%mm4 \n\t" // YUYV YUYV(4)
"punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8)
"punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12)
MOVNTQ" %%mm3, (%0, %%"REG_a", 4) \n\t"
MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t"
MOVNTQ" %%mm5, 16(%0, %%"REG_a", 4) \n\t"
MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t"
"add $8, %%"REG_a" \n\t"
"cmp %4, %%"REG_a" \n\t"
" jb 1b \n\t"
::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
: "%"REG_a
);
#else
#if defined ARCH_ALPHA && defined HAVE_MVI
#define pl2yuy2(n) \
y1 = yc[n]; \
y2 = yc2[n]; \
u = uc[n]; \
v = vc[n]; \
asm("unpkbw %1, %0" : "=r"(y1) : "r"(y1)); \
asm("unpkbw %1, %0" : "=r"(y2) : "r"(y2)); \
asm("unpkbl %1, %0" : "=r"(u) : "r"(u)); \
asm("unpkbl %1, %0" : "=r"(v) : "r"(v)); \
yuv1 = (u << 8) + (v << 24); \
yuv2 = yuv1 + y2; \
yuv1 += y1; \
qdst[n] = yuv1; \
qdst2[n] = yuv2;
int i;
uint64_t *qdst = (uint64_t *) dst;
uint64_t *qdst2 = (uint64_t *) (dst + dstStride);
const uint32_t *yc = (uint32_t *) ysrc;
const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride);
const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc;
for (i = 0; i < chromWidth; i += 8){
uint64_t y1, y2, yuv1, yuv2;
uint64_t u, v;
/* Prefetch */
asm("ldq $31,64(%0)" :: "r"(yc));
asm("ldq $31,64(%0)" :: "r"(yc2));
asm("ldq $31,64(%0)" :: "r"(uc));
asm("ldq $31,64(%0)" :: "r"(vc));
pl2yuy2(0);
pl2yuy2(1);
pl2yuy2(2);
pl2yuy2(3);
yc += 4;
yc2 += 4;
uc += 4;
vc += 4;
qdst += 4;
qdst2 += 4;
}
y++;
ysrc += lumStride;
dst += dstStride;
#elif __WORDSIZE >= 64
int i;
uint64_t *ldst = (uint64_t *) dst;
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
for (i = 0; i < chromWidth; i += 2){
uint64_t k, l;
k = yc[0] + (uc[0] << 8) +
(yc[1] << 16) + (vc[0] << 24);
l = yc[2] + (uc[1] << 8) +
(yc[3] << 16) + (vc[1] << 24);
*ldst++ = k + (l << 32);
yc += 4;
uc += 2;
vc += 2;
}
#else
int i, *idst = (int32_t *) dst;
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
for (i = 0; i < chromWidth; i++){
#ifdef WORDS_BIGENDIAN
*idst++ = (yc[0] << 24)+ (uc[0] << 16) +
(yc[1] << 8) + (vc[0] << 0);
#else
*idst++ = yc[0] + (uc[0] << 8) +
(yc[1] << 16) + (vc[0] << 24);
#endif
yc += 2;
uc++;
vc++;
}
#endif
#endif
if ((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) )
{
usrc += chromStride;
vsrc += chromStride;
}
ysrc += lumStride;
dst += dstStride;
}
#ifdef HAVE_MMX
asm( EMMS" \n\t"
SFENCE" \n\t"
:::"memory");
#endif
}
/**
*
* height should be a multiple of 2 and width should be a multiple of 16 (if this is a
* problem for anyone then tell me, and ill fix it)
*/
static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
long width, long height,
long lumStride, long chromStride, long dstStride)
{
//FIXME interpolate chroma
RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
}
static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
long width, long height,
long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
{
long y;
const long chromWidth= width>>1;
for (y=0; y<height; y++)
{
#ifdef HAVE_MMX
//FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway)
asm volatile(
"xor %%"REG_a", %%"REG_a" \n\t"
ASMALIGN(4)
"1: \n\t"
PREFETCH" 32(%1, %%"REG_a", 2) \n\t"
PREFETCH" 32(%2, %%"REG_a") \n\t"
PREFETCH" 32(%3, %%"REG_a") \n\t"
"movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0)
"movq %%mm0, %%mm2 \n\t" // U(0)
"movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0)
"punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
"punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8)
"movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0)
"movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8)
"movq %%mm0, %%mm4 \n\t" // Y(0)
"movq %%mm2, %%mm6 \n\t" // Y(8)
"punpcklbw %%mm3, %%mm0 \n\t" // YUYV YUYV(0)
"punpckhbw %%mm3, %%mm4 \n\t" // YUYV YUYV(4)
"punpcklbw %%mm5, %%mm2 \n\t" // YUYV YUYV(8)
"punpckhbw %%mm5, %%mm6 \n\t" // YUYV YUYV(12)
MOVNTQ" %%mm0, (%0, %%"REG_a", 4) \n\t"
MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t"
MOVNTQ" %%mm2, 16(%0, %%"REG_a", 4) \n\t"
MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t"
"add $8, %%"REG_a" \n\t"
"cmp %4, %%"REG_a" \n\t"
" jb 1b \n\t"
::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
: "%"REG_a
);
#else
//FIXME adapt the alpha asm code from yv12->yuy2
#if __WORDSIZE >= 64
int i;
uint64_t *ldst = (uint64_t *) dst;
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
for (i = 0; i < chromWidth; i += 2){
uint64_t k, l;
k = uc[0] + (yc[0] << 8) +
(vc[0] << 16) + (yc[1] << 24);
l = uc[1] + (yc[2] << 8) +
(vc[1] << 16) + (yc[3] << 24);
*ldst++ = k + (l << 32);
yc += 4;
uc += 2;
vc += 2;
}
#else
int i, *idst = (int32_t *) dst;
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
for (i = 0; i < chromWidth; i++){
#ifdef WORDS_BIGENDIAN
*idst++ = (uc[0] << 24)+ (yc[0] << 16) +
(vc[0] << 8) + (yc[1] << 0);
#else
*idst++ = uc[0] + (yc[0] << 8) +
(vc[0] << 16) + (yc[1] << 24);
#endif
yc += 2;
uc++;
vc++;
}
#endif
#endif
if ((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) )
{
usrc += chromStride;
vsrc += chromStride;
}
ysrc += lumStride;
dst += dstStride;
}
#ifdef HAVE_MMX
asm( EMMS" \n\t"
SFENCE" \n\t"
:::"memory");
#endif
}
/**
*
* height should be a multiple of 2 and width should be a multiple of 16 (if this is a
* problem for anyone then tell me, and ill fix it)
*/
static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
long width, long height,
long lumStride, long chromStride, long dstStride)
{
//FIXME interpolate chroma
RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
}
/**
*
* width should be a multiple of 16
*/
static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
long width, long height,
long lumStride, long chromStride, long dstStride)
{
RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
}
/**
*
* height should be a multiple of 2 and width should be a multiple of 16 (if this is a
* problem for anyone then tell me, and ill fix it)
*/
static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
long width, long height,
long lumStride, long chromStride, long srcStride)
{
long y;
const long chromWidth= width>>1;
for (y=0; y<height; y+=2)
{
#ifdef HAVE_MMX
asm volatile(
"xor %%"REG_a", %%"REG_a" \n\t"
"pcmpeqw %%mm7, %%mm7 \n\t"
"psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
ASMALIGN(4)
"1: \n\t"
PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
"movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
"movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4)
"movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0)
"movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4)
"psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0)
"psrlw $8, %%mm1 \n\t" // U0V0 U0V0(4)
"pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(0)
"pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(4)
"packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
"packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0)
MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t"
"movq 16(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(8)
"movq 24(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(12)
"movq %%mm1, %%mm3 \n\t" // YUYV YUYV(8)
"movq %%mm2, %%mm4 \n\t" // YUYV YUYV(12)
"psrlw $8, %%mm1 \n\t" // U0V0 U0V0(8)
"psrlw $8, %%mm2 \n\t" // U0V0 U0V0(12)
"pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(8)
"pand %%mm7, %%mm4 \n\t" // Y0Y0 Y0Y0(12)
"packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8)
"packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8)
MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t"
"movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0)
"movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8)
"psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0)
"psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8)
"pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0)
"pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8)
"packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0)
"packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0)
MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t"
MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t"
"add $8, %%"REG_a" \n\t"
"cmp %4, %%"REG_a" \n\t"
" jb 1b \n\t"
::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
: "memory", "%"REG_a
);
ydst += lumStride;
src
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -