📄 swscale.c
字号:
"0: \n\t"
"movq (%%"REG_d", %%"REG_a"), %%mm3\n\t"
"movd (%%"REG_c", %%"REG_S"), %%mm0\n\t"
"movd 1(%%"REG_c", %%"REG_S"), %%mm1\n\t"
"punpcklbw %%mm7, %%mm1 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"pshufw $0xFF, %%mm1, %%mm1 \n\t"
"1: \n\t"
"pshufw $0xFF, %%mm0, %%mm0 \n\t"
"2: \n\t"
"psubw %%mm1, %%mm0 \n\t"
"movl 8(%%"REG_b", %%"REG_a"), %%esi\n\t"
"pmullw %%mm3, %%mm0 \n\t"
"psllw $7, %%mm1 \n\t"
"paddw %%mm1, %%mm0 \n\t"
"movq %%mm0, (%%"REG_D", %%"REG_a")\n\t"
"add $8, %%"REG_a" \n\t"
// End
"9: \n\t"
// "int $3\n\t"
"lea 0b, %0 \n\t"
"lea 1b, %1 \n\t"
"lea 2b, %2 \n\t"
"dec %1 \n\t"
"dec %2 \n\t"
"sub %0, %1 \n\t"
"sub %0, %2 \n\t"
"lea 9b, %3 \n\t"
"sub %0, %3 \n\t"
:"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
"=r" (fragmentLengthA)
);
asm volatile(
"jmp 9f \n\t"
// Begin
"0: \n\t"
"movq (%%"REG_d", %%"REG_a"), %%mm3\n\t"
"movd (%%"REG_c", %%"REG_S"), %%mm0\n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"pshufw $0xFF, %%mm0, %%mm1 \n\t"
"1: \n\t"
"pshufw $0xFF, %%mm0, %%mm0 \n\t"
"2: \n\t"
"psubw %%mm1, %%mm0 \n\t"
"movl 8(%%"REG_b", %%"REG_a"), %%esi\n\t"
"pmullw %%mm3, %%mm0 \n\t"
"psllw $7, %%mm1 \n\t"
"paddw %%mm1, %%mm0 \n\t"
"movq %%mm0, (%%"REG_D", %%"REG_a")\n\t"
"add $8, %%"REG_a" \n\t"
// End
"9: \n\t"
// "int $3\n\t"
"lea 0b, %0 \n\t"
"lea 1b, %1 \n\t"
"lea 2b, %2 \n\t"
"dec %1 \n\t"
"dec %2 \n\t"
"sub %0, %1 \n\t"
"sub %0, %2 \n\t"
"lea 9b, %3 \n\t"
"sub %0, %3 \n\t"
:"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
"=r" (fragmentLengthB)
);
xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
fragmentPos=0;
for(i=0; i<dstW/numSplits; i++)
{
int xx=xpos>>16;
if((i&3) == 0)
{
int a=0;
int b=((xpos+xInc)>>16) - xx;
int c=((xpos+xInc*2)>>16) - xx;
int d=((xpos+xInc*3)>>16) - xx;
filter[i ] = (( xpos & 0xFFFF) ^ 0xFFFF)>>9;
filter[i+1] = (((xpos+xInc ) & 0xFFFF) ^ 0xFFFF)>>9;
filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9;
filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
filterPos[i/2]= xx;
if(d+1<4)
{
int maxShift= 3-(d+1);
int shift=0;
memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);
funnyCode[fragmentPos + imm8OfPShufW1B]=
(a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6);
funnyCode[fragmentPos + imm8OfPShufW2B]=
a | (b<<2) | (c<<4) | (d<<6);
if(i+3>=dstW) shift=maxShift; //avoid overread
else if((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
if(shift && i>=shift)
{
funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift;
funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift;
filterPos[i/2]-=shift;
}
fragmentPos+= fragmentLengthB;
}
else
{
int maxShift= 3-d;
int shift=0;
memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA);
funnyCode[fragmentPos + imm8OfPShufW1A]=
funnyCode[fragmentPos + imm8OfPShufW2A]=
a | (b<<2) | (c<<4) | (d<<6);
if(i+4>=dstW) shift=maxShift; //avoid overread
else if((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align
if(shift && i>=shift)
{
funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift;
funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift;
filterPos[i/2]-=shift;
}
fragmentPos+= fragmentLengthA;
}
funnyCode[fragmentPos]= RET;
}
xpos+=xInc;
}
filterPos[i/2]= xpos>>16; // needed to jump to the next part
}
#endif // ARCH_X86 || ARCH_X86_64
static void globalInit(void){
// generating tables:
int i;
for(i=0; i<768; i++){
int c= FFMIN(FFMAX(i-256, 0), 255);
clip_table[i]=c;
}
}
static SwsFunc getSwsFunc(int flags){
#ifdef RUNTIME_CPUDETECT
#if defined(ARCH_X86) || defined(ARCH_X86_64)
// ordered per speed fasterst first
if(flags & SWS_CPU_CAPS_MMX2)
return swScale_MMX2;
else if(flags & SWS_CPU_CAPS_3DNOW)
return swScale_3DNow;
else if(flags & SWS_CPU_CAPS_MMX)
return swScale_MMX;
else
return swScale_C;
#else
#ifdef ARCH_POWERPC
if(flags & SWS_CPU_CAPS_ALTIVEC)
return swScale_altivec;
else
return swScale_C;
#endif
return swScale_C;
#endif /* defined(ARCH_X86) || defined(ARCH_X86_64) */
#else //RUNTIME_CPUDETECT
#ifdef HAVE_MMX2
return swScale_MMX2;
#elif defined (HAVE_3DNOW)
return swScale_3DNow;
#elif defined (HAVE_MMX)
return swScale_MMX;
#elif defined (HAVE_ALTIVEC)
return swScale_altivec;
#else
return swScale_C;
#endif
#endif //!RUNTIME_CPUDETECT
}
static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], stride_t srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dstParam[], stride_t dstStride[]){
uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
/* Copy Y plane */
if(dstStride[0]==srcStride[0] && srcStride[0] > 0)
memcpy(dst, src[0], srcSliceH*dstStride[0]);
else
{
int i;
uint8_t *srcPtr= src[0];
uint8_t *dstPtr= dst;
for(i=0; i<srcSliceH; i++)
{
memcpy(dstPtr, srcPtr, c->srcW);
srcPtr+= srcStride[0];
dstPtr+= dstStride[0];
}
}
dst = dstParam[1]; //dstParam[0] + dstStride[0]*c->srcH;
if (c->dstFormat == IMGFMT_NV12)
interleaveBytes( src[1],src[2],dst,c->srcW/2,srcSliceH/2,srcStride[1],srcStride[2],dstStride[0] );
else
interleaveBytes( src[2],src[1],dst,c->srcW/2,srcSliceH/2,srcStride[2],srcStride[1],dstStride[0] );
return srcSliceH;
}
static int NV12ToPlanarWrapper(SwsContext *c, uint8_t* srcParam[], stride_t srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], stride_t dstStride[]){
const uint8_t *src=srcParam[0] + srcStride[0]*srcSliceY;
unsigned char *dst_U =dst[1];
unsigned char *dst_V =dst[2];
int swapped=c->srcFormat == IMGFMT_NV12;
int x,y,idx;
if(dstStride[0]==srcStride[0] && srcStride[0]>0)
memcpy(dst[0], src, srcSliceH*dstStride[0]);
else
{
int i;
uint8_t *dstPtr= dst[0];
const uint8_t *srcPtr= src;
for(i=0; i<srcSliceH; i++)
{
memcpy(dstPtr, srcPtr, c->srcW);
srcPtr+= srcStride[0];
dstPtr+= dstStride[0];
}
}
// chroma data is interlaced UVUV... so deinterlace it
idx=0;
src=srcParam[0]+c->srcW*c->srcH;
for(y=0; y<srcSliceH/2;y++,dst_U+=dstStride[1],dst_V+=dstStride[2] ) {
for(x=0;x<c->srcW/2;x++,idx++){
*(dst_U + x) = *(src + (idx<<1) + (swapped ? 1 : 0));
*(dst_V + x) = *(src + (idx<<1) + (swapped ? 0 : 1));
}
}
return srcSliceH;
}
static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], stride_t srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dstParam[], stride_t dstStride[]){
uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
yv12toyuy2( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
return srcSliceH;
}
static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], stride_t srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dstParam[], stride_t dstStride[]){
uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
yv12touyvy( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
return srcSliceH;
}
static int PlanarToYvyuWrapper(SwsContext *c, uint8_t* src[], stride_t srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dstParam[], stride_t dstStride[]){
uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
yv12toyvyu( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
return srcSliceH;
}
static int PlanarToVyuyWrapper(SwsContext *c, uint8_t* src[], stride_t srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dstParam[], stride_t dstStride[]){
uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
yv12tovyuy( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
return srcSliceH;
}
/* {RGB,BGR}{15,16,24,32} -> {RGB,BGR}{15,16,24,32} */
static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], stride_t srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], stride_t dstStride[]){
const int srcFormat= c->srcFormat;
const int dstFormat= c->dstFormat;
const int srcBpp= ((srcFormat&0xFF) + 7)>>3;
const int dstBpp= ((dstFormat&0xFF) + 7)>>3;
const int srcId= (srcFormat&0xFF)>>2; // 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8
const int dstId= (dstFormat&0xFF)>>2;
void (*conv)(const uint8_t *src, uint8_t *dst, stride_t src_size)=NULL;
/* BGR -> BGR */
if( (isBGR(srcFormat) && isBGR(dstFormat))
|| (isRGB(srcFormat) && isRGB(dstFormat))){
switch(srcId | (dstId<<4)){
case 0x34: conv= rgb16to15; break;
case 0x36: conv= rgb24to15; break;
case 0x38: conv= rgb32to15; break;
case 0x43: conv= rgb15to16; break;
case 0x46: conv= rgb24to16; break;
case 0x48: conv= rgb32to16; break;
case 0x63: conv= rgb15to24; break;
case 0x64: conv= rgb16to24; break;
case 0x68: conv= rgb32to24; break;
case 0x83: conv= rgb15to32; break;
case 0x84: conv= rgb16to32; break;
case 0x86: conv= rgb24to32; break;
default: MSG_ERR("swScaler: internal error %s -> %s converter\n",
sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
}
}else if( (isBGR(srcFormat) && isRGB(dstFormat))
|| (isRGB(srcFormat) && isBGR(dstFormat))){
switch(srcId | (dstId<<4)){
case 0x33: conv= rgb15tobgr15; break;
case 0x34: conv= rgb16tobgr15; break;
case 0x36: conv= rgb24tobgr15; break;
case 0x38: conv= rgb32tobgr15; break;
case 0x43: conv= rgb15tobgr16; break;
case 0x44: conv= rgb16tobgr16; break;
case 0x46: conv= rgb24tobgr16; break;
case 0x48: conv= rgb32tobgr16; break;
case 0x63: conv= rgb15tobgr24; break;
case 0x64: conv= rgb16tobgr24; break;
case 0x66: conv= rgb24tobgr24; break;
case 0x68: conv= rgb32tobgr24; break;
case 0x83: conv= rgb15tobgr32; break;
case 0x84: conv= rgb16tobgr32; break;
case 0x86: conv= rgb24tobgr32; break;
case 0x88: conv= rgb32tobgr32; break;
default: MSG_ERR("swScaler: internal error %s -> %s converter\n",
sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
}
}else{
MSG_ERR("swScaler: internal error %s -> %s converter\n",
sws_format_name(srcFormat), sws_format_name(dstFormat));
}
if(dstStride[0]*srcBpp == srcStride[0]*dstBpp && dstStride[0]>0)
conv(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
else
{
int i;
uint8_t *srcPtr= src[0];
uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
for(i=0; i<srcSliceH; i++)
{
conv(srcPtr, dstPtr, c->srcW*srcBpp);
srcPtr+= srcStride[0];
dstPtr+= dstStride[0];
}
}
return srcSliceH;
}
static int bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], stride_t srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], stride_t dstStride[]){
rgb24toyv12(
src[0],
dst[0]+ srcSliceY *dstStride[0],
dst[1]+(srcSliceY>>1)*dstStride[1],
dst[2]+(srcSliceY>>1)*dstStride[2],
c->srcW, srcSliceH,
dstStride[0], dstStride[1], srcStride[0]);
return srcSliceH;
}
static int yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], stride_t srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], stride_t dstStride[]){
int i;
/* copy Y */
if(srcStride[0]==dstStride[0] && srcStride[0] > 0)
memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH);
else{
uint8_t *srcPtr= src[0];
uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
for(i=0; i<srcSliceH; i++)
{
memcpy(dstPtr, srcPtr, c->srcW);
srcPtr+= srcStride[0];
dstPtr+= dstStride[0];
}
}
if(c->dstFormat==IMGFMT_YV12){
planar2x(src[1], dst[1], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[1]);
planar2x(src[2], dst[2], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[2]);
}else{
planar2x(src[1], dst[2], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[2]);
planar2x(src[2], dst[1], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[1]);
}
return srcSliceH;
}
/**
* bring pointers in YUV order instead of YVU
*/
static inline void sws_orderYUV(int format, uint8_t * sortedP[], stride_t sortedStride[], uint8_t * p[], stride_t stride[]){
if(format == IMGFMT_YV12 || format == IMGFMT_YVU9
|| format == IMGFMT_444P || format == IMGFMT_422P || format == IMGFMT_411P){
sortedP[0]= p[0];
sortedP[1]= p[2];
sortedP[2]= p[1];
sortedStride[0]= stride[0];
sortedStride[1]= stride[2];
sortedStride[2]= stride[1];
}
else if(isPacked(format) || isGray(format) || format == IMGFMT_Y8)
{
sortedP[0]= p[0];
sortedP[1]=
sortedP[2]= NULL;
sortedStride[0]= stride[0];
sortedStride[1]=
sortedStride[2]= 0;
}
else if(format == IMGFMT_I420 || format == IMGFMT_IYUV)
{
sortedP[0]= p[0];
sortedP[1]= p[1];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -