📄 swscale.c
字号:
/* Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*//* supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR24, BGR16, BGR15, RGB32, RGB24, Y8/Y800, YVU9/IF09 supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09 {BGR,RGB}{1,4,8,15,16} support dithering unscaled special converters (YV12=I420=IYUV, Y800=Y8) YV12 -> {BGR,RGB}{1,4,8,15,16,24,32} x -> x YUV9 -> YV12 YUV9/YV12 -> Y800 Y800 -> YUV9/YV12 BGR24 -> BGR32 & RGB24 -> RGB32 BGR32 -> BGR24 & RGB32 -> RGB24 BGR15 -> BGR16*//* tested special converters (most are tested actually but i didnt write it down ...) YV12 -> BGR16 YV12 -> YV12 BGR15 -> BGR16 BGR16 -> BGR16 YVU9 -> YV12untested special converters YV12/I420 -> BGR15/BGR24/BGR32 (its the yuv2rgb stuff, so it should be ok) YV12/I420 -> YV12/I420 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format BGR24 -> BGR32 & RGB24 -> RGB32 BGR32 -> BGR24 & RGB32 -> RGB24 BGR24 -> YV12*/#include <inttypes.h>#include <string.h>#include <math.h>#include <stdio.h>#include "config.h"#include <assert.h>#ifdef HAVE_MALLOC_H#include <malloc.h>#else#include <stdlib.h>#endif#include "swscale.h"#include "swscale_internal.h"#include "common.h"#include "rgb2rgb.h"#define RUNTIME_CPUDETECT 1#undef MOVNTQ#undef PAVGB//#undef HAVE_MMX2//#define HAVE_3DNOW//#undef HAVE_MMX//#undef ARCH_X86//#define WORDS_BIGENDIAN#define DITHER1XBPP#define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit#define RET 0xC3 //near return opcode for X86#ifdef MP_DEBUG#define ASSERT(x) assert(x);#else#define ASSERT(x) ;#endif#ifdef M_PI#define PI M_PI#else#define PI 3.14159265358979323846#endif//FIXME replace this with something faster#define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YVU9 \ || (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P)#define isYUV(x) ((x)==IMGFMT_UYVY || (x)==IMGFMT_YUY2 || isPlanarYUV(x))#define isGray(x) ((x)==IMGFMT_Y800)#define isRGB(x) (((x)&IMGFMT_RGB_MASK)==IMGFMT_RGB)#define isBGR(x) (((x)&IMGFMT_BGR_MASK)==IMGFMT_BGR)#define isSupportedIn(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY\ || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15\ || (x)==IMGFMT_RGB32|| (x)==IMGFMT_RGB24\ || (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9\ || (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P)#define isSupportedOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY\ || (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P\ || isRGB(x) || isBGR(x)\ || (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9)#define isPacked(x) ((x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY ||isRGB(x) || isBGR(x))#define RGB2YUV_SHIFT 16#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))extern const int32_t Inverse_Table_6_9[8][4];/*NOTESSpecial versions: fast Y 1:1 scaling (no interpolation in y direction)TODOmore intelligent missalignment avoidance for the horizontal scalerwrite special vertical cubic upscale versionOptimize C code (yv12 / minmax)add support for packed pixel yuv input & outputadd support for Y8 outputoptimize bgr24 & bgr32add BGR4 output supportwrite special BGR->BGR scaler*/#define ABS(a) ((a) > 0 ? (a) : (-(a)))#define MIN(a,b) ((a) > (b) ? (b) : (a))#define MAX(a,b) ((a) < (b) ? (b) : (a))#ifdef ARCH_X86static uint64_t attribute_used __attribute__((aligned(8))) bF8= 0xF8F8F8F8F8F8F8F8LL;static uint64_t attribute_used __attribute__((aligned(8))) bFC= 0xFCFCFCFCFCFCFCFCLL;static uint64_t __attribute__((aligned(8))) w10= 0x0010001000100010LL;static uint64_t attribute_used __attribute__((aligned(8))) w02= 0x0002000200020002LL;static uint64_t attribute_used __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL;static uint64_t attribute_used __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL;static uint64_t attribute_used __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL;static uint64_t attribute_used __attribute__((aligned(8))) bm01010101=0x00FF00FF00FF00FFLL;static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither;static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither;static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither;static volatile uint64_t attribute_used __attribute__((aligned(8))) r5Dither;static uint64_t __attribute__((aligned(8))) dither4[2]={ 0x0103010301030103LL, 0x0200020002000200LL,};static uint64_t __attribute__((aligned(8))) dither8[2]={ 0x0602060206020602LL, 0x0004000400040004LL,};static uint64_t __attribute__((aligned(8))) b16Mask= 0x001F001F001F001FLL;static uint64_t attribute_used __attribute__((aligned(8))) g16Mask= 0x07E007E007E007E0LL;static uint64_t attribute_used __attribute__((aligned(8))) r16Mask= 0xF800F800F800F800LL;static uint64_t __attribute__((aligned(8))) b15Mask= 0x001F001F001F001FLL;static uint64_t attribute_used __attribute__((aligned(8))) g15Mask= 0x03E003E003E003E0LL;static uint64_t attribute_used __attribute__((aligned(8))) r15Mask= 0x7C007C007C007C00LL;static uint64_t attribute_used __attribute__((aligned(8))) M24A= 0x00FF0000FF0000FFLL;static uint64_t attribute_used __attribute__((aligned(8))) M24B= 0xFF0000FF0000FF00LL;static uint64_t attribute_used __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL;#ifdef FAST_BGR2YV12static const uint64_t bgr2YCoeff attribute_used __attribute__((aligned(8))) = 0x000000210041000DULL;static const uint64_t bgr2UCoeff attribute_used __attribute__((aligned(8))) = 0x0000FFEEFFDC0038ULL;static const uint64_t bgr2VCoeff attribute_used __attribute__((aligned(8))) = 0x00000038FFD2FFF8ULL;#elsestatic const uint64_t bgr2YCoeff attribute_used __attribute__((aligned(8))) = 0x000020E540830C8BULL;static const uint64_t bgr2UCoeff attribute_used __attribute__((aligned(8))) = 0x0000ED0FDAC23831ULL;static const uint64_t bgr2VCoeff attribute_used __attribute__((aligned(8))) = 0x00003831D0E6F6EAULL;#endifstatic const uint64_t bgr2YOffset attribute_used __attribute__((aligned(8))) = 0x1010101010101010ULL;static const uint64_t bgr2UVOffset attribute_used __attribute__((aligned(8)))= 0x8080808080808080ULL;static const uint64_t w1111 attribute_used __attribute__((aligned(8))) = 0x0001000100010001ULL;#endif// clipping helper table for C implementations:static unsigned char clip_table[768];static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b); extern const uint8_t dither_2x2_4[2][8];extern const uint8_t dither_2x2_8[2][8];extern const uint8_t dither_8x8_32[8][8];extern const uint8_t dither_8x8_73[8][8];extern const uint8_t dither_8x8_220[8][8];#ifdef ARCH_X86void in_asm_used_var_warning_killer(){ volatile int i= bF8+bFC+w10+ bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+ M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]+bm01010101; if(i) i=0;}#endifstatic inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW){ //FIXME Optimize (just quickly writen not opti..) int i; for(i=0; i<dstW; i++) { int val=1<<18; int j; for(j=0; j<lumFilterSize; j++) val += lumSrc[j][i] * lumFilter[j]; dest[i]= MIN(MAX(val>>19, 0), 255); } if(uDest != NULL) for(i=0; i<chrDstW; i++) { int u=1<<18; int v=1<<18; int j; for(j=0; j<chrFilterSize; j++) { u += chrSrc[j][i] * chrFilter[j]; v += chrSrc[j][i + 2048] * chrFilter[j]; } uDest[i]= MIN(MAX(u>>19, 0), 255); vDest[i]= MIN(MAX(v>>19, 0), 255); }}#define YSCALE_YUV_2_PACKEDX_C(type) \ for(i=0; i<(dstW>>1); i++){\ int j;\ int Y1=1<<18;\ int Y2=1<<18;\ int U=1<<18;\ int V=1<<18;\ type *r, *b, *g;\ const int i2= 2*i;\ \ for(j=0; j<lumFilterSize; j++)\ {\ Y1 += lumSrc[j][i2] * lumFilter[j];\ Y2 += lumSrc[j][i2+1] * lumFilter[j];\ }\ for(j=0; j<chrFilterSize; j++)\ {\ U += chrSrc[j][i] * chrFilter[j];\ V += chrSrc[j][i+2048] * chrFilter[j];\ }\ Y1>>=19;\ Y2>>=19;\ U >>=19;\ V >>=19;\ if((Y1|Y2|U|V)&256)\ {\ if(Y1>255) Y1=255;\ else if(Y1<0)Y1=0;\ if(Y2>255) Y2=255;\ else if(Y2<0)Y2=0;\ if(U>255) U=255;\ else if(U<0) U=0;\ if(V>255) V=255;\ else if(V<0) V=0;\ } #define YSCALE_YUV_2_RGBX_C(type) \ YSCALE_YUV_2_PACKEDX_C(type)\ r = c->table_rV[V];\ g = c->table_gU[U] + c->table_gV[V];\ b = c->table_bU[U];\#define YSCALE_YUV_2_PACKED2_C \ for(i=0; i<(dstW>>1); i++){\ const int i2= 2*i;\ int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19;\ int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;\ int U= (uvbuf0[i ]*uvalpha1+uvbuf1[i ]*uvalpha)>>19;\ int V= (uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19;\#define YSCALE_YUV_2_RGB2_C(type) \ YSCALE_YUV_2_PACKED2_C\ type *r, *b, *g;\ r = c->table_rV[V];\ g = c->table_gU[U] + c->table_gV[V];\ b = c->table_bU[U];\#define YSCALE_YUV_2_PACKED1_C \ for(i=0; i<(dstW>>1); i++){\ const int i2= 2*i;\ int Y1= buf0[i2 ]>>7;\ int Y2= buf0[i2+1]>>7;\ int U= (uvbuf1[i ])>>7;\ int V= (uvbuf1[i+2048])>>7;\#define YSCALE_YUV_2_RGB1_C(type) \ YSCALE_YUV_2_PACKED1_C\ type *r, *b, *g;\ r = c->table_rV[V];\ g = c->table_gU[U] + c->table_gV[V];\ b = c->table_bU[U];\#define YSCALE_YUV_2_PACKED1B_C \ for(i=0; i<(dstW>>1); i++){\ const int i2= 2*i;\ int Y1= buf0[i2 ]>>7;\ int Y2= buf0[i2+1]>>7;\ int U= (uvbuf0[i ] + uvbuf1[i ])>>8;\ int V= (uvbuf0[i+2048] + uvbuf1[i+2048])>>8;\#define YSCALE_YUV_2_RGB1B_C(type) \ YSCALE_YUV_2_PACKED1B_C\ type *r, *b, *g;\ r = c->table_rV[V];\ g = c->table_gU[U] + c->table_gV[V];\ b = c->table_bU[U];\#define YSCALE_YUV_2_ANYRGB_C(func, func2)\ switch(c->dstFormat)\ {\ case IMGFMT_BGR32:\ case IMGFMT_RGB32:\ func(uint32_t)\ ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\ ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\ } \ break;\ case IMGFMT_RGB24:\ func(uint8_t)\ ((uint8_t*)dest)[0]= r[Y1];\ ((uint8_t*)dest)[1]= g[Y1];\ ((uint8_t*)dest)[2]= b[Y1];\ ((uint8_t*)dest)[3]= r[Y2];\ ((uint8_t*)dest)[4]= g[Y2];\ ((uint8_t*)dest)[5]= b[Y2];\ dest+=6;\ }\ break;\ case IMGFMT_BGR24:\ func(uint8_t)\ ((uint8_t*)dest)[0]= b[Y1];\ ((uint8_t*)dest)[1]= g[Y1];\ ((uint8_t*)dest)[2]= r[Y1];\ ((uint8_t*)dest)[3]= b[Y2];\ ((uint8_t*)dest)[4]= g[Y2];\ ((uint8_t*)dest)[5]= r[Y2];\ dest+=6;\ }\ break;\ case IMGFMT_RGB16:\ case IMGFMT_BGR16:\ {\ const int dr1= dither_2x2_8[y&1 ][0];\ const int dg1= dither_2x2_4[y&1 ][0];\ const int db1= dither_2x2_8[(y&1)^1][0];\ const int dr2= dither_2x2_8[y&1 ][1];\ const int dg2= dither_2x2_4[y&1 ][1];\ const int db2= dither_2x2_8[(y&1)^1][1];\ func(uint16_t)\ ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\ ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\ }\ }\ break;\ case IMGFMT_RGB15:\ case IMGFMT_BGR15:\ {\ const int dr1= dither_2x2_8[y&1 ][0];\ const int dg1= dither_2x2_8[y&1 ][1];\ const int db1= dither_2x2_8[(y&1)^1][0];\ const int dr2= dither_2x2_8[y&1 ][1];\ const int dg2= dither_2x2_8[y&1 ][0];\ const int db2= dither_2x2_8[(y&1)^1][1];\ func(uint16_t)\ ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\ ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\ }\ }\ break;\ case IMGFMT_RGB8:\ case IMGFMT_BGR8:\ {\ const uint8_t * const d64= dither_8x8_73[y&7];\ const uint8_t * const d32= dither_8x8_32[y&7];\ func(uint8_t)\ ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\ ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\ }\ }\ break;\ case IMGFMT_RGB4:\ case IMGFMT_BGR4:\ {\ const uint8_t * const d64= dither_8x8_73 [y&7];\ const uint8_t * const d128=dither_8x8_220[y&7];\ func(uint8_t)\ ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\ + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\ }\ }\ break;\ case IMGFMT_RG4B:\ case IMGFMT_BG4B:\ {\ const uint8_t * const d64= dither_8x8_73 [y&7];\ const uint8_t * const d128=dither_8x8_220[y&7];\ func(uint8_t)\ ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\ ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\ }\ }\ break;\ case IMGFMT_RGB1:\ case IMGFMT_BGR1:\ {\ const uint8_t * const d128=dither_8x8_220[y&7];\ uint8_t *g= c->table_gU[128] + c->table_gV[128];\ for(i=0; i<dstW-7; i+=8){\ int acc;\ acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\ acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\ acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\ acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\ acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\ acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\ acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\ acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -