📄 swscale.c
字号:
min--; } if (min>minFilterSize) minFilterSize= min; } if (flags & SWS_CPU_CAPS_ALTIVEC) { // we can handle the special case 4, // so we don't want to go to the full 8 if (minFilterSize < 5) filterAlign = 4; // we really don't want to waste our time // doing useless computation, so fall-back on // the scalar C code for very small filter. // vectorizing is worth it only if you have // decent-sized vector. if (minFilterSize < 3) filterAlign = 1; } if (flags & SWS_CPU_CAPS_MMX) { // special case for unscaled vertical filtering if (minFilterSize == 1 && filterAlign == 2) filterAlign= 1; } assert(minFilterSize > 0); filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1)); assert(filterSize > 0); filter= av_malloc(filterSize*dstW*sizeof(double)); if (filterSize >= MAX_FILTER_SIZE) return -1; *outFilterSize= filterSize; if (flags&SWS_PRINT_INFO) av_log(NULL, AV_LOG_VERBOSE, "SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize); /* try to reduce the filter-size (step2 reduce it) */ for (i=0; i<dstW; i++) { int j; for (j=0; j<filterSize; j++) { if (j>=filter2Size) filter[i*filterSize + j]= 0.0; else filter[i*filterSize + j]= filter2[i*filter2Size + j]; } } av_free(filter2); filter2=NULL; //FIXME try to align filterpos if possible //fix borders for (i=0; i<dstW; i++) { int j; if ((*filterPos)[i] < 0) { // Move filter coeffs left to compensate for filterPos for (j=1; j<filterSize; j++) { int left= FFMAX(j + (*filterPos)[i], 0); filter[i*filterSize + left] += filter[i*filterSize + j]; filter[i*filterSize + j]=0; } (*filterPos)[i]= 0; } if ((*filterPos)[i] + filterSize > srcW) { int shift= (*filterPos)[i] + filterSize - srcW; // Move filter coeffs right to compensate for filterPos for (j=filterSize-2; j>=0; j--) { int right= FFMIN(j + shift, filterSize-1); filter[i*filterSize +right] += filter[i*filterSize +j]; filter[i*filterSize +j]=0; } (*filterPos)[i]= srcW - filterSize; } } // Note the +1 is for the MMXscaler which reads over the end /* align at 16 for AltiVec (needed by hScale_altivec_real) */ *outFilter= av_mallocz(*outFilterSize*(dstW+1)*sizeof(int16_t)); /* Normalize & Store in outFilter */ for (i=0; i<dstW; i++) { int j; double error=0; double sum=0; double scale= one; for (j=0; j<filterSize; j++) { sum+= filter[i*filterSize + j]; } scale/= sum; for (j=0; j<*outFilterSize; j++) { double v= filter[i*filterSize + j]*scale + error; int intV= floor(v + 0.5); (*outFilter)[i*(*outFilterSize) + j]= intV; error = v - intV; } } (*filterPos)[dstW]= (*filterPos)[dstW-1]; // the MMX scaler will read over the end for (i=0; i<*outFilterSize; i++) { int j= dstW*(*outFilterSize); (*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)]; } av_free(filter); return 0;}#ifdef COMPILE_MMX2static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits){ uint8_t *fragmentA; long imm8OfPShufW1A; long imm8OfPShufW2A; long fragmentLengthA; uint8_t *fragmentB; long imm8OfPShufW1B; long imm8OfPShufW2B; long fragmentLengthB; int fragmentPos; int xpos, i; // create an optimized horizontal scaling routine //code fragment asm volatile( "jmp 9f \n\t" // Begin "0: \n\t" "movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t" "movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t" "movd 1(%%"REG_c", %%"REG_S"), %%mm1 \n\t" "punpcklbw %%mm7, %%mm1 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" "pshufw $0xFF, %%mm1, %%mm1 \n\t" "1: \n\t" "pshufw $0xFF, %%mm0, %%mm0 \n\t" "2: \n\t" "psubw %%mm1, %%mm0 \n\t" "movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t" "pmullw %%mm3, %%mm0 \n\t" "psllw $7, %%mm1 \n\t" "paddw %%mm1, %%mm0 \n\t" "movq %%mm0, (%%"REG_D", %%"REG_a") \n\t" "add $8, %%"REG_a" \n\t" // End "9: \n\t"// "int $3 \n\t" "lea " LOCAL_MANGLE(0b) ", %0 \n\t" "lea " LOCAL_MANGLE(1b) ", %1 \n\t" "lea " LOCAL_MANGLE(2b) ", %2 \n\t" "dec %1 \n\t" "dec %2 \n\t" "sub %0, %1 \n\t" "sub %0, %2 \n\t" "lea " LOCAL_MANGLE(9b) ", %3 \n\t" "sub %0, %3 \n\t" :"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A), "=r" (fragmentLengthA) ); asm volatile( "jmp 9f \n\t" // Begin "0: \n\t" "movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t" "movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" "pshufw $0xFF, %%mm0, %%mm1 \n\t" "1: \n\t" "pshufw $0xFF, %%mm0, %%mm0 \n\t" "2: \n\t" "psubw %%mm1, %%mm0 \n\t" "movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t" "pmullw %%mm3, %%mm0 \n\t" "psllw $7, %%mm1 \n\t" "paddw %%mm1, %%mm0 \n\t" "movq %%mm0, (%%"REG_D", %%"REG_a") \n\t" "add $8, %%"REG_a" \n\t" // End "9: \n\t"// "int $3 \n\t" "lea " LOCAL_MANGLE(0b) ", %0 \n\t" "lea " LOCAL_MANGLE(1b) ", %1 \n\t" "lea " LOCAL_MANGLE(2b) ", %2 \n\t" "dec %1 \n\t" "dec %2 \n\t" "sub %0, %1 \n\t" "sub %0, %2 \n\t" "lea " LOCAL_MANGLE(9b) ", %3 \n\t" "sub %0, %3 \n\t" :"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B), "=r" (fragmentLengthB) ); xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers fragmentPos=0; for (i=0; i<dstW/numSplits; i++) { int xx=xpos>>16; if ((i&3) == 0) { int a=0; int b=((xpos+xInc)>>16) - xx; int c=((xpos+xInc*2)>>16) - xx; int d=((xpos+xInc*3)>>16) - xx; filter[i ] = (( xpos & 0xFFFF) ^ 0xFFFF)>>9; filter[i+1] = (((xpos+xInc ) & 0xFFFF) ^ 0xFFFF)>>9; filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9; filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9; filterPos[i/2]= xx; if (d+1<4) { int maxShift= 3-(d+1); int shift=0; memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB); funnyCode[fragmentPos + imm8OfPShufW1B]= (a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6); funnyCode[fragmentPos + imm8OfPShufW2B]= a | (b<<2) | (c<<4) | (d<<6); if (i+3>=dstW) shift=maxShift; //avoid overread else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align if (shift && i>=shift) { funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift; funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift; filterPos[i/2]-=shift; } fragmentPos+= fragmentLengthB; } else { int maxShift= 3-d; int shift=0; memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA); funnyCode[fragmentPos + imm8OfPShufW1A]= funnyCode[fragmentPos + imm8OfPShufW2A]= a | (b<<2) | (c<<4) | (d<<6); if (i+4>=dstW) shift=maxShift; //avoid overread else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align if (shift && i>=shift) { funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift; funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift; filterPos[i/2]-=shift; } fragmentPos+= fragmentLengthA; } funnyCode[fragmentPos]= RET; } xpos+=xInc; } filterPos[i/2]= xpos>>16; // needed to jump to the next part}#endif /* COMPILE_MMX2 */static void globalInit(void){ // generating tables: int i; for (i=0; i<768; i++){ int c= av_clip_uint8(i-256); clip_table[i]=c; }}static SwsFunc getSwsFunc(int flags){#if defined(RUNTIME_CPUDETECT) && defined (CONFIG_GPL)#if defined(ARCH_X86) // ordered per speed fastest first if (flags & SWS_CPU_CAPS_MMX2) return swScale_MMX2; else if (flags & SWS_CPU_CAPS_3DNOW) return swScale_3DNow; else if (flags & SWS_CPU_CAPS_MMX) return swScale_MMX; else return swScale_C;#else#ifdef ARCH_POWERPC if (flags & SWS_CPU_CAPS_ALTIVEC) return swScale_altivec; else return swScale_C;#endif return swScale_C;#endif /* defined(ARCH_X86) */#else //RUNTIME_CPUDETECT#ifdef HAVE_MMX2 return swScale_MMX2;#elif defined (HAVE_3DNOW) return swScale_3DNow;#elif defined (HAVE_MMX) return swScale_MMX;#elif defined (HAVE_ALTIVEC) return swScale_altivec;#else return swScale_C;#endif#endif //!RUNTIME_CPUDETECT}static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dstParam[], int dstStride[]){ uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY; /* Copy Y plane */ if (dstStride[0]==srcStride[0] && srcStride[0] > 0) memcpy(dst, src[0], srcSliceH*dstStride[0]); else { int i; uint8_t *srcPtr= src[0]; uint8_t *dstPtr= dst; for (i=0; i<srcSliceH; i++) { memcpy(dstPtr, srcPtr, c->srcW); srcPtr+= srcStride[0]; dstPtr+= dstStride[0]; } } dst = dstParam[1] + dstStride[1]*srcSliceY/2; if (c->dstFormat == PIX_FMT_NV12) interleaveBytes(src[1], src[2], dst, c->srcW/2, srcSliceH/2, srcStride[1], srcStride[2], dstStride[0]); else interleaveBytes(src[2], src[1], dst, c->srcW/2, srcSliceH/2, srcStride[2], srcStride[1], dstStride[0]); return srcSliceH;}static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dstParam[], int dstStride[]){ uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY; yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]); return srcSliceH;}static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dstParam[], int dstStride[]){ uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY; yv12touyvy(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]); return srcSliceH;}/* {RGB,BGR}{15,16,24,32} -> {RGB,BGR}{15,16,24,32} */static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]){ const int srcFormat= c->srcFormat; const int dstFormat= c->dstFormat; const int srcBpp= (fmt_depth(srcFormat) + 7) >> 3; const int dstBpp= (fmt_depth(dstFormat) + 7) >> 3; const int srcId= fmt_depth(srcFormat) >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */ const int dstId= fmt_depth(dstFormat) >> 2;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -