swscale.c

来自「从FFMPEG转换而来的H264解码程序,VC下编译..」· C语言代码 · 共 2,189 行 · 第 1/5 页
2,189 行
/*
    Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/

/*
    Modified to support multi-thread related features
    by Haruhiko Yamagata <h.yamagata@nifty.com> in 2006.

    This modification is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
*/

/*
  supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR24, BGR16, BGR15, RGB32, RGB24, Y8/Y800, YVU9/IF09
  supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
  {BGR,RGB}{1,4,8,15,16} support dithering

  unscaled special converters (YV12=I420=IYUV, Y800=Y8)
  YV12 -> {BGR,RGB}{1,4,8,15,16,24,32}
  x -> x
  YUV9 -> YV12
  YUV9/YV12 -> Y800
  Y800 -> YUV9/YV12
  BGR24 -> BGR32 & RGB24 -> RGB32
  BGR32 -> BGR24 & RGB32 -> RGB24
  BGR15 -> BGR16
*/

/*
tested special converters (most are tested actually but i didnt write it down ...)
 YV12 -> BGR16
 YV12 -> YV12
 BGR15 -> BGR16
 BGR16 -> BGR16
 YVU9 -> YV12

untested special converters
  YV12/I420 -> BGR15/BGR24/BGR32 (its the yuv2rgb stuff, so it should be ok)
  YV12/I420 -> YV12/I420
  YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
  BGR24 -> BGR32 & RGB24 -> RGB32
  BGR32 -> BGR24 & RGB32 -> RGB24
  BGR24 -> YV12
*/

//based on libswscale Rev 19211

#include <windows.h>
#include <inttypes.h>
#include <string.h>
#include <math.h>
#include <stdio.h>
#include "config.h"
#include "../mangle.h"
#include <assert.h>
#include "swscale.h"
#include "swscale_internal.h"
#include "../ffmpeg/libavutil/x86_cpu.h"
#include "../ffmpeg/libavutil/bswap.h"
#include "ffImgfmt.h"
#include "rgb2rgb.h"
#include "../libvo/fastmemcpy.h"
#include "ffdebug.h"

#undef MOVNTQ
#undef PAVGB

//#undef HAVE_MMX2
//#define HAVE_3DNOW
//#undef HAVE_MMX
//#undef ARCH_X86
//#define WORDS_BIGENDIAN
#define DITHER1XBPP

#define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit

#define RET 0xC3 //near return opcode for X86

#ifdef MP_DEBUG
#define ASSERT(x) assert(x);
#else
#define ASSERT(x) ;
#endif

#ifdef M_PI
#define PI M_PI
#else
#define PI 3.14159265358979323846
#endif

//FIXME replace this with something faster
#define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YVU9 \
			|| (x)==IMGFMT_NV12 || (x)==IMGFMT_NV21 \
			|| (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P)
#define isYUV(x)       ((x)==IMGFMT_UYVY || (x)==IMGFMT_YUY2 || (x)==IMGFMT_YVYU || (x)==IMGFMT_VYUY || isPlanarYUV(x))
#define isGray(x)      ((x)==IMGFMT_Y800)
#define isRGB(x)       (((x)&IMGFMT_RGB_MASK)==IMGFMT_RGB)
#define isBGR(x)       (((x)&IMGFMT_BGR_MASK)==IMGFMT_BGR)
#define isSupportedIn(x)  ((x)==IMGFMT_YV12 || (x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY ||(x)==IMGFMT_YVYU || (x)==IMGFMT_VYUY \
			|| isRGB(x) || isBGR(x) \
			|| (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9\
			|| (x)==IMGFMT_NV12 || (x)==IMGFMT_NV21\
			|| (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P)
#define isSupportedOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY || (x)==IMGFMT_YVYU || (x)==IMGFMT_VYUY \
			|| (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P\
			|| isRGB(x) || isBGR(x)\
			|| (x)==IMGFMT_NV12 || (x)==IMGFMT_NV21\
			|| (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9)
#define isPacked(x)    ((x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY || (x)==IMGFMT_YVYU || (x)==IMGFMT_VYUY || isRGB(x) || isBGR(x))

#define RGB2YUV_SHIFT 16
#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))

extern const int32_t Inverse_Table_6_9[8][4];

/*
NOTES
Special versions: fast Y 1:1 scaling (no interpolation in y direction)

TODO
more intelligent missalignment avoidance for the horizontal scaler
write special vertical cubic upscale version
Optimize C code (yv12 / minmax)
add support for packed pixel yuv input & output
add support for Y8 output
optimize bgr24 & bgr32
add BGR4 output support
write special BGR->BGR scaler
*/

#define ABS(a) ((a) > 0 ? (a) : (-(a)))
#define FFMAX(a,b) ((a) > (b) ? (a) : (b))
#define FFMIN(a,b) ((a) > (b) ? (b) : (a))

#if defined(ARCH_X86) || defined(ARCH_X86_64)
static uint64_t attribute_used __attribute__((aligned(8))) bF8=       0xF8F8F8F8F8F8F8F8LL;
static uint64_t attribute_used __attribute__((aligned(8))) bFC=       0xFCFCFCFCFCFCFCFCLL;
static uint64_t __attribute__((aligned(8))) w10=       0x0010001000100010LL;
static uint64_t attribute_used __attribute__((aligned(8))) w02=       0x0002000200020002LL;
static uint64_t attribute_used __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL;
static uint64_t attribute_used __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL;
static uint64_t attribute_used __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL;
static uint64_t attribute_used __attribute__((aligned(8))) bm01010101=0x00FF00FF00FF00FFLL;

static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither;
static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither;
static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither;
static volatile uint64_t attribute_used __attribute__((aligned(8))) r5Dither;

static uint64_t __attribute__((aligned(8))) dither4[2]={
	0x0103010301030103LL,
	0x0200020002000200LL,};

static uint64_t __attribute__((aligned(8))) dither8[2]={
	0x0602060206020602LL,
	0x0004000400040004LL,};

static uint64_t __attribute__((aligned(8))) b16Mask=   0x001F001F001F001FLL;
static uint64_t attribute_used __attribute__((aligned(8))) g16Mask=   0x07E007E007E007E0LL;
static uint64_t attribute_used __attribute__((aligned(8))) r16Mask=   0xF800F800F800F800LL;
static uint64_t __attribute__((aligned(8))) b15Mask=   0x001F001F001F001FLL;
static uint64_t attribute_used __attribute__((aligned(8))) g15Mask=   0x03E003E003E003E0LL;
static uint64_t attribute_used __attribute__((aligned(8))) r15Mask=   0x7C007C007C007C00LL;

static uint64_t attribute_used __attribute__((aligned(8))) M24A=   0x00FF0000FF0000FFLL;
static uint64_t attribute_used __attribute__((aligned(8))) M24B=   0xFF0000FF0000FF00LL;
static uint64_t attribute_used __attribute__((aligned(8))) M24C=   0x0000FF0000FF0000LL;

#ifdef FAST_BGR2YV12
static const uint64_t bgr2YCoeff  attribute_used __attribute__((aligned(8))) = 0x000000210041000DULL;
static const uint64_t bgr2UCoeff  attribute_used __attribute__((aligned(8))) = 0x0000FFEEFFDC0038ULL;
static const uint64_t bgr2VCoeff  attribute_used __attribute__((aligned(8))) = 0x00000038FFD2FFF8ULL;
#else
static const uint64_t bgr2YCoeff  attribute_used __attribute__((aligned(8))) = 0x000020E540830C8BULL;
static const uint64_t bgr2UCoeff  attribute_used __attribute__((aligned(8))) = 0x0000ED0FDAC23831ULL;
static const uint64_t bgr2VCoeff  attribute_used __attribute__((aligned(8))) = 0x00003831D0E6F6EAULL;
#endif /* FAST_BGR2YV12 */
static const uint64_t bgr2YOffset attribute_used __attribute__((aligned(8))) = 0x1010101010101010ULL;
static const uint64_t bgr2UVOffset attribute_used __attribute__((aligned(8)))= 0x8080808080808080ULL;
static const uint64_t w1111       attribute_used __attribute__((aligned(8))) = 0x0001000100010001ULL;
#endif /* defined(ARCH_X86) || defined(ARCH_X86_64) */

// clipping helper table for C implementations:
static unsigned char clip_table[768];

static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);

extern const uint8_t dither_2x2_4[2][8];
extern const uint8_t dither_2x2_8[2][8];
extern const uint8_t dither_8x8_32[8][8];
extern const uint8_t dither_8x8_73[8][8];
extern const uint8_t dither_8x8_220[8][8];

#if !defined(HAVE_THREADS)
int sws_thread_init(SwsContext *s, int thread_count)
{
    return -1;
}
void sws_thread_free(struct SwsContext *s) {}

int GetCPUCount(void)
{
    return 1;
}
int isP4HT (void)
{
    return 0;
}
#else
#include "isP4HT.c"
#endif

int sws_default_execute(SwsContext *c, int (*func)(SwsContext *c2), int *ret, int count){
    int i;

    for(i=0; i<count; i++){
        int r= func(&c[i]);
        if(ret) ret[i]= r;
    }
    return 0;
}

char *sws_format_name(int format)
{
    static char fmt_name[64];
    char *res;
    static int buffer;

    res = fmt_name + buffer * 32;
    buffer = 1 - buffer;
    _snprintf(res, 32, "0x%x (%c%c%c%c)", format,
		    format >> 24, (format >> 16) & 0xFF,
		    (format >> 8) & 0xFF,
		    format & 0xFF);

    return res;
}

#if defined(ARCH_X86) || defined(ARCH_X86_64)
void in_asm_used_var_warning_killer()
{
 volatile int i= bF8+bFC+w10+
 bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+
 M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]+bm01010101;
 if(i) i=0;
}
#endif

static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
				    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
				    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
{
	//FIXME Optimize (just quickly writen not opti..)
	int i;
	for(i=0; i<dstW; i++)
	{
		int val=1<<18;
		int j;
		for(j=0; j<lumFilterSize; j++)
			val += lumSrc[j][i] * lumFilter[j];

		dest[i]= FFMIN(FFMAX(val>>19, 0), 255);
	}

	if(uDest != NULL)
		for(i=0; i<chrDstW; i++)
		{
			int u=1<<18;
			int v=1<<18;
			int j;
			for(j=0; j<chrFilterSize; j++)
			{
				u += chrSrc[j][i] * chrFilter[j];
				v += chrSrc[j][i + 2048] * chrFilter[j];
			}

			uDest[i]= FFMIN(FFMAX(u>>19, 0), 255);
			vDest[i]= FFMIN(FFMAX(v>>19, 0), 255);
		}
}

static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
				int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
				uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
{
	//FIXME Optimize (just quickly writen not opti..)
	int i;
	for(i=0; i<dstW; i++)
	{
		int val=1<<18;
		int j;
		for(j=0; j<lumFilterSize; j++)
			val += lumSrc[j][i] * lumFilter[j];

		dest[i]= FFMIN(FFMAX(val>>19, 0), 255);
	}

	if(uDest == NULL)
		return;

	if(dstFormat == IMGFMT_NV12)
		for(i=0; i<chrDstW; i++)
		{
			int u=1<<18;
			int v=1<<18;
			int j;
			for(j=0; j<chrFilterSize; j++)
			{
				u += chrSrc[j][i] * chrFilter[j];
				v += chrSrc[j][i + 2048] * chrFilter[j];
			}

			uDest[2*i]= FFMIN(FFMAX(u>>19, 0), 255);
			uDest[2*i+1]= FFMIN(FFMAX(v>>19, 0), 255);
		}
	else
		for(i=0; i<chrDstW; i++)
		{
			int u=1<<18;
			int v=1<<18;
			int j;
			for(j=0; j<chrFilterSize; j++)
			{
				u += chrSrc[j][i] * chrFilter[j];
				v += chrSrc[j][i + 2048] * chrFilter[j];
			}

			uDest[2*i]= FFMIN(FFMAX(v>>19, 0), 255);
			uDest[2*i+1]= FFMIN(FFMAX(u>>19, 0), 255);
		}
}

#define YSCALE_YUV_2_PACKEDX_C(type) \
		for(i=0; i<(dstW>>1); i++){\
			int j;\
			int Y1=1<<18;\
			int Y2=1<<18;\
			int U=1<<18;\
			int V=1<<18;\
			type *r, *b, *g;\
			const int i2= 2*i;\
			\
			for(j=0; j<lumFilterSize; j++)\
			{\
				Y1 += lumSrc[j][i2] * lumFilter[j];\
				Y2 += lumSrc[j][i2+1] * lumFilter[j];\
			}\
			for(j=0; j<chrFilterSize; j++)\
			{\
				U += chrSrc[j][i] * chrFilter[j];\
				V += chrSrc[j][i+2048] * chrFilter[j];\
			}\
			Y1>>=19;\
			Y2>>=19;\
			U >>=19;\
			V >>=19;\
			if((Y1|Y2|U|V)&256)\
			{\
				if(Y1>255)   Y1=255;\
				else if(Y1<0)Y1=0;\
				if(Y2>255)   Y2=255;\
				else if(Y2<0)Y2=0;\
				if(U>255)    U=255;\
				else if(U<0) U=0;\
				if(V>255)    V=255;\
				else if(V<0) V=0;\
			}

#define YSCALE_YUV_2_RGBX_C(type) \
			YSCALE_YUV_2_PACKEDX_C(type)\
			r = c->table_rV[V];\
			g = c->table_gU[U] + c->table_gV[V];\
			b = c->table_bU[U];\

#define YSCALE_YUV_2_PACKED2_C \
		for(i=0; i<(dstW>>1); i++){\
			const int i2= 2*i;\
			int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;\
			int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;\
			int U= (uvbuf0[i     ]*uvalpha1+uvbuf1[i     ]*uvalpha)>>19;\
			int V= (uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19;\

#define YSCALE_YUV_2_RGB2_C(type) \
			YSCALE_YUV_2_PACKED2_C\
			type *r, *b, *g;\
			r = c->table_rV[V];\
			g = c->table_gU[U] + c->table_gV[V];\
			b = c->table_bU[U];\

#define YSCALE_YUV_2_PACKED1_C \
		for(i=0; i<(dstW>>1); i++){\
			const int i2= 2*i;\
			int Y1= buf0[i2  ]>>7;\
			int Y2= buf0[i2+1]>>7;\
			int U= (uvbuf1[i     ])>>7;\
			int V= (uvbuf1[i+2048])>>7;\

#define YSCALE_YUV_2_RGB1_C(type) \
			YSCALE_YUV_2_PACKED1_C\
			type *r, *b, *g;\
			r = c->table_rV[V];\
			g = c->table_gU[U] + c->table_gV[V];\
			b = c->table_bU[U];\

#define YSCALE_YUV_2_PACKED1B_C \
		for(i=0; i<(dstW>>1); i++){\
			const int i2= 2*i;\
			int Y1= buf0[i2  ]>>7;\
			int Y2= buf0[i2+1]>>7;\
			int U= (uvbuf0[i     ] + uvbuf1[i     ])>>8;\
			int V= (uvbuf0[i+2048] + uvbuf1[i+2048])>>8;\

#define YSCALE_YUV_2_RGB1B_C(type) \
			YSCALE_YUV_2_PACKED1B_C\
swscale.c - 源码说明

本页面展示了「从FFMPEG转换而来的H264解码程序,VC下编译..」中的 swscale.c 源码文件，采用 C语言编程语言编写，共 2,189 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与FFMPEG相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?