📄 swscale.c
字号:
/*
Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
Modified to support multi-thread related features
by Haruhiko Yamagata <h.yamagata@nifty.com> in 2006.
This modification is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
*/
/*
supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR24, BGR16, BGR15, RGB32, RGB24, Y8/Y800, YVU9/IF09
supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
{BGR,RGB}{1,4,8,15,16} support dithering
unscaled special converters (YV12=I420=IYUV, Y800=Y8)
YV12 -> {BGR,RGB}{1,4,8,15,16,24,32}
x -> x
YUV9 -> YV12
YUV9/YV12 -> Y800
Y800 -> YUV9/YV12
BGR24 -> BGR32 & RGB24 -> RGB32
BGR32 -> BGR24 & RGB32 -> RGB24
BGR15 -> BGR16
*/
/*
tested special converters (most are tested actually but i didnt write it down ...)
YV12 -> BGR16
YV12 -> YV12
BGR15 -> BGR16
BGR16 -> BGR16
YVU9 -> YV12
untested special converters
YV12/I420 -> BGR15/BGR24/BGR32 (its the yuv2rgb stuff, so it should be ok)
YV12/I420 -> YV12/I420
YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
BGR24 -> BGR32 & RGB24 -> RGB32
BGR32 -> BGR24 & RGB32 -> RGB24
BGR24 -> YV12
*/
//based on libswscale Rev 19211
#include <windows.h>
#include <inttypes.h>
#include <string.h>
#include <math.h>
#include <stdio.h>
#include "config.h"
#include "../mangle.h"
#include <assert.h>
#include "swscale.h"
#include "swscale_internal.h"
#include "../ffmpeg/libavutil/x86_cpu.h"
#include "../ffmpeg/libavutil/bswap.h"
#include "ffImgfmt.h"
#include "rgb2rgb.h"
#include "../libvo/fastmemcpy.h"
#include "ffdebug.h"
#undef MOVNTQ
#undef PAVGB
//#undef HAVE_MMX2
//#define HAVE_3DNOW
//#undef HAVE_MMX
//#undef ARCH_X86
//#define WORDS_BIGENDIAN
#define DITHER1XBPP
#define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
#define RET 0xC3 //near return opcode for X86
#ifdef MP_DEBUG
#define ASSERT(x) assert(x);
#else
#define ASSERT(x) ;
#endif
#ifdef M_PI
#define PI M_PI
#else
#define PI 3.14159265358979323846
#endif
//FIXME replace this with something faster
#define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YVU9 \
|| (x)==IMGFMT_NV12 || (x)==IMGFMT_NV21 \
|| (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P)
#define isYUV(x) ((x)==IMGFMT_UYVY || (x)==IMGFMT_YUY2 || (x)==IMGFMT_YVYU || (x)==IMGFMT_VYUY || isPlanarYUV(x))
#define isGray(x) ((x)==IMGFMT_Y800)
#define isRGB(x) (((x)&IMGFMT_RGB_MASK)==IMGFMT_RGB)
#define isBGR(x) (((x)&IMGFMT_BGR_MASK)==IMGFMT_BGR)
#define isSupportedIn(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY ||(x)==IMGFMT_YVYU || (x)==IMGFMT_VYUY \
|| isRGB(x) || isBGR(x) \
|| (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9\
|| (x)==IMGFMT_NV12 || (x)==IMGFMT_NV21\
|| (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P)
#define isSupportedOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY || (x)==IMGFMT_YVYU || (x)==IMGFMT_VYUY \
|| (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P\
|| isRGB(x) || isBGR(x)\
|| (x)==IMGFMT_NV12 || (x)==IMGFMT_NV21\
|| (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9)
#define isPacked(x) ((x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY || (x)==IMGFMT_YVYU || (x)==IMGFMT_VYUY || isRGB(x) || isBGR(x))
#define RGB2YUV_SHIFT 16
#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
extern const int32_t Inverse_Table_6_9[8][4];
/*
NOTES
Special versions: fast Y 1:1 scaling (no interpolation in y direction)
TODO
more intelligent missalignment avoidance for the horizontal scaler
write special vertical cubic upscale version
Optimize C code (yv12 / minmax)
add support for packed pixel yuv input & output
add support for Y8 output
optimize bgr24 & bgr32
add BGR4 output support
write special BGR->BGR scaler
*/
#define ABS(a) ((a) > 0 ? (a) : (-(a)))
#define FFMAX(a,b) ((a) > (b) ? (a) : (b))
#define FFMIN(a,b) ((a) > (b) ? (b) : (a))
#if defined(ARCH_X86) || defined(ARCH_X86_64)
static uint64_t attribute_used __attribute__((aligned(8))) bF8= 0xF8F8F8F8F8F8F8F8LL;
static uint64_t attribute_used __attribute__((aligned(8))) bFC= 0xFCFCFCFCFCFCFCFCLL;
static uint64_t __attribute__((aligned(8))) w10= 0x0010001000100010LL;
static uint64_t attribute_used __attribute__((aligned(8))) w02= 0x0002000200020002LL;
static uint64_t attribute_used __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL;
static uint64_t attribute_used __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL;
static uint64_t attribute_used __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL;
static uint64_t attribute_used __attribute__((aligned(8))) bm01010101=0x00FF00FF00FF00FFLL;
static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither;
static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither;
static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither;
static volatile uint64_t attribute_used __attribute__((aligned(8))) r5Dither;
static uint64_t __attribute__((aligned(8))) dither4[2]={
0x0103010301030103LL,
0x0200020002000200LL,};
static uint64_t __attribute__((aligned(8))) dither8[2]={
0x0602060206020602LL,
0x0004000400040004LL,};
static uint64_t __attribute__((aligned(8))) b16Mask= 0x001F001F001F001FLL;
static uint64_t attribute_used __attribute__((aligned(8))) g16Mask= 0x07E007E007E007E0LL;
static uint64_t attribute_used __attribute__((aligned(8))) r16Mask= 0xF800F800F800F800LL;
static uint64_t __attribute__((aligned(8))) b15Mask= 0x001F001F001F001FLL;
static uint64_t attribute_used __attribute__((aligned(8))) g15Mask= 0x03E003E003E003E0LL;
static uint64_t attribute_used __attribute__((aligned(8))) r15Mask= 0x7C007C007C007C00LL;
static uint64_t attribute_used __attribute__((aligned(8))) M24A= 0x00FF0000FF0000FFLL;
static uint64_t attribute_used __attribute__((aligned(8))) M24B= 0xFF0000FF0000FF00LL;
static uint64_t attribute_used __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL;
#ifdef FAST_BGR2YV12
static const uint64_t bgr2YCoeff attribute_used __attribute__((aligned(8))) = 0x000000210041000DULL;
static const uint64_t bgr2UCoeff attribute_used __attribute__((aligned(8))) = 0x0000FFEEFFDC0038ULL;
static const uint64_t bgr2VCoeff attribute_used __attribute__((aligned(8))) = 0x00000038FFD2FFF8ULL;
#else
static const uint64_t bgr2YCoeff attribute_used __attribute__((aligned(8))) = 0x000020E540830C8BULL;
static const uint64_t bgr2UCoeff attribute_used __attribute__((aligned(8))) = 0x0000ED0FDAC23831ULL;
static const uint64_t bgr2VCoeff attribute_used __attribute__((aligned(8))) = 0x00003831D0E6F6EAULL;
#endif /* FAST_BGR2YV12 */
static const uint64_t bgr2YOffset attribute_used __attribute__((aligned(8))) = 0x1010101010101010ULL;
static const uint64_t bgr2UVOffset attribute_used __attribute__((aligned(8)))= 0x8080808080808080ULL;
static const uint64_t w1111 attribute_used __attribute__((aligned(8))) = 0x0001000100010001ULL;
#endif /* defined(ARCH_X86) || defined(ARCH_X86_64) */
// clipping helper table for C implementations:
static unsigned char clip_table[768];
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);
extern const uint8_t dither_2x2_4[2][8];
extern const uint8_t dither_2x2_8[2][8];
extern const uint8_t dither_8x8_32[8][8];
extern const uint8_t dither_8x8_73[8][8];
extern const uint8_t dither_8x8_220[8][8];
#if !defined(HAVE_THREADS)
int sws_thread_init(SwsContext *s, int thread_count)
{
return -1;
}
void sws_thread_free(struct SwsContext *s) {}
int GetCPUCount(void)
{
return 1;
}
int isP4HT (void)
{
return 0;
}
#else
#include "isP4HT.c"
#endif
int sws_default_execute(SwsContext *c, int (*func)(SwsContext *c2), int *ret, int count){
int i;
for(i=0; i<count; i++){
int r= func(&c[i]);
if(ret) ret[i]= r;
}
return 0;
}
char *sws_format_name(int format)
{
static char fmt_name[64];
char *res;
static int buffer;
res = fmt_name + buffer * 32;
buffer = 1 - buffer;
_snprintf(res, 32, "0x%x (%c%c%c%c)", format,
format >> 24, (format >> 16) & 0xFF,
(format >> 8) & 0xFF,
format & 0xFF);
return res;
}
#if defined(ARCH_X86) || defined(ARCH_X86_64)
void in_asm_used_var_warning_killer()
{
volatile int i= bF8+bFC+w10+
bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+
M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]+bm01010101;
if(i) i=0;
}
#endif
static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
{
//FIXME Optimize (just quickly writen not opti..)
int i;
for(i=0; i<dstW; i++)
{
int val=1<<18;
int j;
for(j=0; j<lumFilterSize; j++)
val += lumSrc[j][i] * lumFilter[j];
dest[i]= FFMIN(FFMAX(val>>19, 0), 255);
}
if(uDest != NULL)
for(i=0; i<chrDstW; i++)
{
int u=1<<18;
int v=1<<18;
int j;
for(j=0; j<chrFilterSize; j++)
{
u += chrSrc[j][i] * chrFilter[j];
v += chrSrc[j][i + 2048] * chrFilter[j];
}
uDest[i]= FFMIN(FFMAX(u>>19, 0), 255);
vDest[i]= FFMIN(FFMAX(v>>19, 0), 255);
}
}
static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
{
//FIXME Optimize (just quickly writen not opti..)
int i;
for(i=0; i<dstW; i++)
{
int val=1<<18;
int j;
for(j=0; j<lumFilterSize; j++)
val += lumSrc[j][i] * lumFilter[j];
dest[i]= FFMIN(FFMAX(val>>19, 0), 255);
}
if(uDest == NULL)
return;
if(dstFormat == IMGFMT_NV12)
for(i=0; i<chrDstW; i++)
{
int u=1<<18;
int v=1<<18;
int j;
for(j=0; j<chrFilterSize; j++)
{
u += chrSrc[j][i] * chrFilter[j];
v += chrSrc[j][i + 2048] * chrFilter[j];
}
uDest[2*i]= FFMIN(FFMAX(u>>19, 0), 255);
uDest[2*i+1]= FFMIN(FFMAX(v>>19, 0), 255);
}
else
for(i=0; i<chrDstW; i++)
{
int u=1<<18;
int v=1<<18;
int j;
for(j=0; j<chrFilterSize; j++)
{
u += chrSrc[j][i] * chrFilter[j];
v += chrSrc[j][i + 2048] * chrFilter[j];
}
uDest[2*i]= FFMIN(FFMAX(v>>19, 0), 255);
uDest[2*i+1]= FFMIN(FFMAX(u>>19, 0), 255);
}
}
#define YSCALE_YUV_2_PACKEDX_C(type) \
for(i=0; i<(dstW>>1); i++){\
int j;\
int Y1=1<<18;\
int Y2=1<<18;\
int U=1<<18;\
int V=1<<18;\
type *r, *b, *g;\
const int i2= 2*i;\
\
for(j=0; j<lumFilterSize; j++)\
{\
Y1 += lumSrc[j][i2] * lumFilter[j];\
Y2 += lumSrc[j][i2+1] * lumFilter[j];\
}\
for(j=0; j<chrFilterSize; j++)\
{\
U += chrSrc[j][i] * chrFilter[j];\
V += chrSrc[j][i+2048] * chrFilter[j];\
}\
Y1>>=19;\
Y2>>=19;\
U >>=19;\
V >>=19;\
if((Y1|Y2|U|V)&256)\
{\
if(Y1>255) Y1=255;\
else if(Y1<0)Y1=0;\
if(Y2>255) Y2=255;\
else if(Y2<0)Y2=0;\
if(U>255) U=255;\
else if(U<0) U=0;\
if(V>255) V=255;\
else if(V<0) V=0;\
}
#define YSCALE_YUV_2_RGBX_C(type) \
YSCALE_YUV_2_PACKEDX_C(type)\
r = c->table_rV[V];\
g = c->table_gU[U] + c->table_gV[V];\
b = c->table_bU[U];\
#define YSCALE_YUV_2_PACKED2_C \
for(i=0; i<(dstW>>1); i++){\
const int i2= 2*i;\
int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19;\
int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;\
int U= (uvbuf0[i ]*uvalpha1+uvbuf1[i ]*uvalpha)>>19;\
int V= (uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19;\
#define YSCALE_YUV_2_RGB2_C(type) \
YSCALE_YUV_2_PACKED2_C\
type *r, *b, *g;\
r = c->table_rV[V];\
g = c->table_gU[U] + c->table_gV[V];\
b = c->table_bU[U];\
#define YSCALE_YUV_2_PACKED1_C \
for(i=0; i<(dstW>>1); i++){\
const int i2= 2*i;\
int Y1= buf0[i2 ]>>7;\
int Y2= buf0[i2+1]>>7;\
int U= (uvbuf1[i ])>>7;\
int V= (uvbuf1[i+2048])>>7;\
#define YSCALE_YUV_2_RGB1_C(type) \
YSCALE_YUV_2_PACKED1_C\
type *r, *b, *g;\
r = c->table_rV[V];\
g = c->table_gU[U] + c->table_gV[V];\
b = c->table_bU[U];\
#define YSCALE_YUV_2_PACKED1B_C \
for(i=0; i<(dstW>>1); i++){\
const int i2= 2*i;\
int Y1= buf0[i2 ]>>7;\
int Y2= buf0[i2+1]>>7;\
int U= (uvbuf0[i ] + uvbuf1[i ])>>8;\
int V= (uvbuf0[i+2048] + uvbuf1[i+2048])>>8;\
#define YSCALE_YUV_2_RGB1B_C(type) \
YSCALE_YUV_2_PACKED1B_C\
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -