📄 swscale_template.c
字号:
WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither)
);
return;
case IMGFMT_YUY2:
asm volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
"push %%"REG_BP" \n\t"
YSCALEYUV2PACKED(%%REGBP, %5)
WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither)
);
return;
default: break;
}
#endif //HAVE_MMX
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C)
}
/**
* YV12 to RGB without scaling or interpolating
*/
static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1,
uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
{
const int yalpha1=0;
int i;
uint16_t *buf1= buf0; //FIXME needed for the rgb1/bgr1
const int yalpha= 4096; //FIXME ...
if(flags&SWS_FULL_CHR_H_INT)
{
RENAME(yuv2packed2)(c, buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, y);
return;
}
#ifdef HAVE_MMX
if( uvalpha < 2048 ) // note this is not correct (shifts chrominance by 0.5 pixels) but its a bit faster
{
switch(dstFormat)
{
case IMGFMT_BGR32:
asm volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
"push %%"REG_BP" \n\t"
YSCALEYUV2RGB1(%%REGBP, %5)
WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither)
);
return;
case IMGFMT_BGR24:
asm volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
"push %%"REG_BP" \n\t"
YSCALEYUV2RGB1(%%REGBP, %5)
WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither)
);
return;
case IMGFMT_BGR15:
asm volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
"push %%"REG_BP" \n\t"
YSCALEYUV2RGB1(%%REGBP, %5)
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
"paddusb "MANGLE(g5Dither)", %%mm4\n\t"
"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
#endif
WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither)
);
return;
case IMGFMT_BGR16:
asm volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
"push %%"REG_BP" \n\t"
YSCALEYUV2RGB1(%%REGBP, %5)
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
"paddusb "MANGLE(g6Dither)", %%mm4\n\t"
"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
#endif
WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither)
);
return;
case IMGFMT_YUY2:
asm volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
"push %%"REG_BP" \n\t"
YSCALEYUV2PACKED1(%%REGBP, %5)
WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither)
);
return;
}
}
else
{
switch(dstFormat)
{
case IMGFMT_BGR32:
asm volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
"push %%"REG_BP" \n\t"
YSCALEYUV2RGB1b(%%REGBP, %5)
WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither)
);
return;
case IMGFMT_BGR24:
asm volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
"push %%"REG_BP" \n\t"
YSCALEYUV2RGB1b(%%REGBP, %5)
WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither)
);
return;
case IMGFMT_BGR15:
asm volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
"push %%"REG_BP" \n\t"
YSCALEYUV2RGB1b(%%REGBP, %5)
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
"paddusb "MANGLE(g5Dither)", %%mm4\n\t"
"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
#endif
WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither)
);
return;
case IMGFMT_BGR16:
asm volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
"push %%"REG_BP" \n\t"
YSCALEYUV2RGB1b(%%REGBP, %5)
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
"paddusb "MANGLE(g6Dither)", %%mm4\n\t"
"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
#endif
WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither)
);
return;
case IMGFMT_YUY2:
asm volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
"push %%"REG_BP" \n\t"
YSCALEYUV2PACKED1b(%%REGBP, %5)
WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither)
);
return;
}
}
#endif
if( uvalpha < 2048 )
{
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C)
}else{
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C)
}
}
//FIXME yuy2* can read upto 7 samples to much
static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width)
{
#ifdef HAVE_MMX
asm volatile(
"movq "MANGLE(bm01010101)", %%mm2\n\t"
"mov %0, %%"REG_a" \n\t"
"1: \n\t"
"movq (%1, %%"REG_a",2), %%mm0 \n\t"
"movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
"pand %%mm2, %%mm0 \n\t"
"pand %%mm2, %%mm1 \n\t"
"packuswb %%mm1, %%mm0 \n\t"
"movq %%mm0, (%2, %%"REG_a") \n\t"
"add $8, %%"REG_a" \n\t"
" js 1b \n\t"
: : "g" (-width), "r" (src+width*2), "r" (dst+width)
: "%"REG_a
);
#else
int i;
for(i=0; i<width; i++)
dst[i]= src[2*i];
#endif
}
static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
{
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
asm volatile(
"movq "MANGLE(bm01010101)", %%mm4\n\t"
"mov %0, %%"REG_a" \n\t"
"1: \n\t"
"movq (%1, %%"REG_a",4), %%mm0 \n\t"
"movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
"movq (%2, %%"REG_a",4), %%mm2 \n\t"
"movq 8(%2, %%"REG_a",4), %%mm3 \n\t"
PAVGB(%%mm2, %%mm0)
PAVGB(%%mm3, %%mm1)
"psrlw $8, %%mm0 \n\t"
"psrlw $8, %%mm1 \n\t"
"packuswb %%mm1, %%mm0 \n\t"
"movq %%mm0, %%mm1 \n\t"
"psrlw $8, %%mm0 \n\t"
"pand %%mm4, %%mm1 \n\t"
"packuswb %%mm0, %%mm0 \n\t"
"packuswb %%mm1, %%mm1 \n\t"
"movd %%mm0, (%4, %%"REG_a") \n\t"
"movd %%mm1, (%3, %%"REG_a") \n\t"
"add $4, %%"REG_a" \n\t"
" js 1b \n\t"
: : "g" (-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width)
: "%"REG_a
);
#else
int i;
for(i=0; i<width; i++)
{
dstU[i]= (src1[4*i + 1] + src2[4*i + 1])>>1;
dstV[i]= (src1[4*i + 3] + src2[4*i + 3])>>1;
}
#endif
}
//this is allmost identical to the previous, end exists only cuz yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses
static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, long width)
{
#ifdef HAVE_MMX
asm volatile(
"mov %0, %%"REG_a" \n\t"
"1: \n\t"
"movq (%1, %%"REG_a",2), %%mm0 \n\t"
"movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
"psrlw $8, %%mm0 \n\t"
"psrlw $8, %%mm1 \n\t"
"packuswb %%mm1, %%mm0 \n\t"
"movq %%mm0, (%2, %%"REG_a") \n\t"
"add $8, %%"REG_a" \n\t"
" js 1b \n\t"
: : "g" (-width), "r" (src+width*2), "r" (dst+width)
: "%"REG_a
);
#else
int i;
for(i=0; i<width; i++)
dst[i]= src[2*i+1];
#endif
}
static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
{
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
asm volatile(
"movq "MANGLE(bm01010101)", %%mm4\n\t"
"mov %0, %%"REG_a" \n\t"
"1: \n\t"
"movq (%1, %%"REG_a",4), %%mm0 \n\t"
"movq 8(%1, %%"REG_a",4), %%mm1 \n\t"
"movq (%2, %%"REG_a",4), %%mm2 \n\t"
"movq 8(%2, %%"REG_a",4), %%mm3 \n\t"
PAVGB(%%mm2, %%mm0)
PAVGB(%%mm3, %%mm1)
"pand %%mm4, %%mm0 \n\t"
"pand %%mm4, %%mm1 \n\t"
"packuswb %%mm1, %%mm0 \n\t"
"movq %%mm0, %%mm1 \n\t"
"psrlw $8, %%mm0 \n\t"
"pand %%mm4, %%mm1 \n\t"
"packuswb %%mm0, %%mm0 \n\t"
"packuswb %%mm1, %%mm1 \n\t"
"movd %%mm0, (%4, %%"REG_a") \n\t"
"movd %%mm1, (%3, %%"REG_a") \n\t"
"add $4, %%"REG_a" \n\t"
" js 1b \n\t"
: : "g" (-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width)
: "%"REG_a
);
#else
int i;
for(i=0; i<width; i++)
{
dstU[i]= (src1[4*i + 0] + src2[4*i + 0])>>1;
dstV[i]= (src1[4*i + 2] + src2[4*i + 2])>>1;
}
#endif
}
static inline void RENAME(bgr32ToY)(uint8_t *dst, uint8_t *src, int width)
{
int i;
for(i=0; i<width; i++)
{
int b= ((uint32_t*)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -