📄 swscale.c
字号:
{
double d= FFABS(xx - xDstInSrc)/filterSizeInSrc*sizeFactor;
double coeff;
if (flags & SWS_BICUBIC)
{
double B= param[0] != SWS_PARAM_DEFAULT ? param[0] : 0.0;
double C= param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6;
if (d<1.0)
coeff = (12-9*B-6*C)*d*d*d + (-18+12*B+6*C)*d*d + 6-2*B;
else if (d<2.0)
coeff = (-B-6*C)*d*d*d + (6*B+30*C)*d*d + (-12*B-48*C)*d +8*B+24*C;
else
coeff=0.0;
}
/* else if (flags & SWS_X)
{
double p= param ? param*0.01 : 0.3;
coeff = d ? sin(d*PI)/(d*PI) : 1.0;
coeff*= pow(2.0, - p*d*d);
}*/
else if (flags & SWS_X)
{
double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
if (d<1.0)
coeff = cos(d*PI);
else
coeff=-1.0;
if (coeff<0.0) coeff= -pow(-coeff, A);
else coeff= pow( coeff, A);
coeff= coeff*0.5 + 0.5;
}
else if (flags & SWS_AREA)
{
double srcPixelSize= 1.0/xInc1;
if (d + srcPixelSize/2 < 0.5) coeff= 1.0;
else if (d - srcPixelSize/2 < 0.5) coeff= (0.5-d)/srcPixelSize + 0.5;
else coeff=0.0;
}
else if (flags & SWS_GAUSS)
{
double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
coeff = pow(2.0, - p*d*d);
}
else if (flags & SWS_SINC)
{
coeff = d ? sin(d*PI)/(d*PI) : 1.0;
}
else if (flags & SWS_LANCZOS)
{
double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
coeff = d ? sin(d*PI)*sin(d*PI/p)/(d*d*PI*PI/p) : 1.0;
if (d>p) coeff=0;
}
else if (flags & SWS_BILINEAR)
{
coeff= 1.0 - d;
if (coeff<0) coeff=0;
}
else if (flags & SWS_SPLINE)
{
double p=-2.196152422706632;
coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, d);
}
else {
coeff= 0.0; //GCC warning killer
ASSERT(0)
}
filter[i*filterSize + j]= coeff;
xx++;
}
xDstInSrc+= xInc1;
}
}
/* apply src & dst Filter to filter -> filter2
av_free(filter);
*/
ASSERT(filterSize>0)
filter2Size= filterSize;
if (srcFilter) filter2Size+= srcFilter->length - 1;
if (dstFilter) filter2Size+= dstFilter->length - 1;
ASSERT(filter2Size>0)
filter2= av_malloc(filter2Size*dstW*sizeof(double));
for (i=0; i<dstW; i++)
{
int j;
SwsVector scaleFilter;
SwsVector *outVec;
scaleFilter.coeff= filter + i*filterSize;
scaleFilter.length= filterSize;
if (srcFilter) outVec= sws_getConvVec(srcFilter, &scaleFilter);
else outVec= &scaleFilter;
ASSERT(outVec->length == filter2Size)
//FIXME dstFilter
for (j=0; j<outVec->length; j++)
{
filter2[i*filter2Size + j]= outVec->coeff[j];
}
(*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;
if (outVec != &scaleFilter) sws_freeVec(outVec);
}
av_free(filter); filter=NULL;
/* try to reduce the filter-size (step1 find size and shift left) */
// Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
minFilterSize= 0;
for (i=dstW-1; i>=0; i--)
{
int min= filter2Size;
int j;
double cutOff=0.0;
/* get rid off near zero elements on the left by shifting left */
for (j=0; j<filter2Size; j++)
{
int k;
cutOff += FFABS(filter2[i*filter2Size]);
if (cutOff > SWS_MAX_REDUCE_CUTOFF) break;
/* preserve monotonicity because the core can't handle the filter otherwise */
if (i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
// Move filter coeffs left
for (k=1; k<filter2Size; k++)
filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
filter2[i*filter2Size + k - 1]= 0.0;
(*filterPos)[i]++;
}
cutOff=0.0;
/* count near zeros on the right */
for (j=filter2Size-1; j>0; j--)
{
cutOff += FFABS(filter2[i*filter2Size + j]);
if (cutOff > SWS_MAX_REDUCE_CUTOFF) break;
min--;
}
if (min>minFilterSize) minFilterSize= min;
}
if (flags & SWS_CPU_CAPS_ALTIVEC) {
// we can handle the special case 4,
// so we don't want to go to the full 8
if (minFilterSize < 5)
filterAlign = 4;
// we really don't want to waste our time
// doing useless computation, so fall-back on
// the scalar C code for very small filter.
// vectorizing is worth it only if you have
// decent-sized vector.
if (minFilterSize < 3)
filterAlign = 1;
}
if (flags & SWS_CPU_CAPS_MMX) {
// special case for unscaled vertical filtering
if (minFilterSize == 1 && filterAlign == 2)
filterAlign= 1;
}
ASSERT(minFilterSize > 0)
filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
ASSERT(filterSize > 0)
filter= av_malloc(filterSize*dstW*sizeof(double));
if (filterSize >= MAX_FILTER_SIZE)
return -1;
*outFilterSize= filterSize;
if (flags&SWS_PRINT_INFO)
av_log(NULL, AV_LOG_VERBOSE, "SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
/* try to reduce the filter-size (step2 reduce it) */
for (i=0; i<dstW; i++)
{
int j;
for (j=0; j<filterSize; j++)
{
if (j>=filter2Size) filter[i*filterSize + j]= 0.0;
else filter[i*filterSize + j]= filter2[i*filter2Size + j];
}
}
av_free(filter2); filter2=NULL;
//FIXME try to align filterpos if possible
//fix borders
for (i=0; i<dstW; i++)
{
int j;
if ((*filterPos)[i] < 0)
{
// Move filter coeffs left to compensate for filterPos
for (j=1; j<filterSize; j++)
{
int left= FFMAX(j + (*filterPos)[i], 0);
filter[i*filterSize + left] += filter[i*filterSize + j];
filter[i*filterSize + j]=0;
}
(*filterPos)[i]= 0;
}
if ((*filterPos)[i] + filterSize > srcW)
{
int shift= (*filterPos)[i] + filterSize - srcW;
// Move filter coeffs right to compensate for filterPos
for (j=filterSize-2; j>=0; j--)
{
int right= FFMIN(j + shift, filterSize-1);
filter[i*filterSize +right] += filter[i*filterSize +j];
filter[i*filterSize +j]=0;
}
(*filterPos)[i]= srcW - filterSize;
}
}
// Note the +1 is for the MMXscaler which reads over the end
/* align at 16 for AltiVec (needed by hScale_altivec_real) */
*outFilter= av_mallocz(*outFilterSize*(dstW+1)*sizeof(int16_t));
/* Normalize & Store in outFilter */
for (i=0; i<dstW; i++)
{
int j;
double error=0;
double sum=0;
double scale= one;
for (j=0; j<filterSize; j++)
{
sum+= filter[i*filterSize + j];
}
scale/= sum;
for (j=0; j<*outFilterSize; j++)
{
double v= filter[i*filterSize + j]*scale + error;
int intV= floor(v + 0.5);
(*outFilter)[i*(*outFilterSize) + j]= intV;
error = v - intV;
}
}
(*filterPos)[dstW]= (*filterPos)[dstW-1]; // the MMX scaler will read over the end
for (i=0; i<*outFilterSize; i++)
{
int j= dstW*(*outFilterSize);
(*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)];
}
av_free(filter);
return 0;
}
#ifdef COMPILE_MMX2
static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
{
uint8_t *fragmentA;
long imm8OfPShufW1A;
long imm8OfPShufW2A;
long fragmentLengthA;
uint8_t *fragmentB;
long imm8OfPShufW1B;
long imm8OfPShufW2B;
long fragmentLengthB;
int fragmentPos;
int xpos, i;
// create an optimized horizontal scaling routine
//code fragment
asm volatile(
"jmp 9f \n\t"
// Begin
"0: \n\t"
"movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t"
"movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t"
"movd 1(%%"REG_c", %%"REG_S"), %%mm1 \n\t"
"punpcklbw %%mm7, %%mm1 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"pshufw $0xFF, %%mm1, %%mm1 \n\t"
"1: \n\t"
"pshufw $0xFF, %%mm0, %%mm0 \n\t"
"2: \n\t"
"psubw %%mm1, %%mm0 \n\t"
"movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t"
"pmullw %%mm3, %%mm0 \n\t"
"psllw $7, %%mm1 \n\t"
"paddw %%mm1, %%mm0 \n\t"
"movq %%mm0, (%%"REG_D", %%"REG_a") \n\t"
"add $8, %%"REG_a" \n\t"
// End
"9: \n\t"
// "int $3 \n\t"
"lea 0b, %0 \n\t"
"lea 1b, %1 \n\t"
"lea 2b, %2 \n\t"
"dec %1 \n\t"
"dec %2 \n\t"
"sub %0, %1 \n\t"
"sub %0, %2 \n\t"
"lea 9b, %3 \n\t"
"sub %0, %3 \n\t"
:"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
"=r" (fragmentLengthA)
);
asm volatile(
"jmp 9f \n\t"
// Begin
"0: \n\t"
"movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t"
"movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"pshufw $0xFF, %%mm0, %%mm1 \n\t"
"1: \n\t"
"pshufw $0xFF, %%mm0, %%mm0 \n\t"
"2: \n\t"
"psubw %%mm1, %%mm0 \n\t"
"movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t"
"pmullw %%mm3, %%mm0 \n\t"
"psllw $7, %%mm1 \n\t"
"paddw %%mm1, %%mm0 \n\t"
"movq %%mm0, (%%"REG_D", %%"REG_a") \n\t"
"add $8, %%"REG_a" \n\t"
// End
"9: \n\t"
// "int $3 \n\t"
"lea 0b, %0 \n\t"
"lea 1b, %1 \n\t"
"lea 2b, %2 \n\t"
"dec %1 \n\t"
"dec %2 \n\t"
"sub %0, %1 \n\t"
"sub %0, %2 \n\t"
"lea 9b, %3 \n\t"
"sub %0, %3 \n\t"
:"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
"=r" (fragmentLengthB)
);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -