dct.cpp

来自「MPEG4音频视频压缩编码(含G.711/ACC/H.261等)」· C++ 代码 · 共 2,018 行 · 第 1/5 页
CPP
2,018 行
 * * This routine does not take a quantization table, since the H.261 * inverse quantizer is easily implemented via table lookup in the decoder. */void#ifdef INT_64rdct(register short *bp, INT_64 m0, u_char* p, int stride, const u_char* in)#elserdct(register short *bp, u_int m0, u_int m1, u_char* p, int stride, const u_char *in)#endif{	int tmp[64];	int* tp = tmp;	const int* qt = cross_stage;	/*	 * First pass is 1D transform over the rows of the input array.	 */	int i;	for (i = 8; --i >= 0; ) {		if ((m0 & 0xfe) == 0) {			/*			 * All ac terms are zero.			 */			int v = 0;			if (M(0))				v = qt[0] * bp[0];			tp[0] = v;			tp[1] = v;			tp[2] = v;			tp[3] = v;			tp[4] = v;			tp[5] = v;			tp[6] = v;			tp[7] = v;		} else {			int t4 = 0, t5 = 0, t6 = 0, t7 = 0;			if (m0 & 0xaa) {				/* odd part */				if (M(1))					t4 = qt[1] * bp[1];				if (M(3))					t5 = qt[3] * bp[3];				if (M(5))					t6 = qt[5] * bp[5];				if (M(7))					t7 = qt[7] * bp[7];				int x0 = t6 - t5;				t6 += t5;				int x1 = t4 - t7;				t7 += t4;				t5 = FP_MUL(t7 - t6, A3);				t7 += t6;				t4 = FP_MUL(x1 + x0, A5);				t6 = FP_MUL(x1, A4) - t4;				t4 += FP_MUL(x0, A2);				t7 += t6;				t6 += t5;				t5 += t4;			}			int t0 = 0, t1 = 0, t2 = 0, t3 = 0;			if (m0 & 0x55) {				/* even part */				if (M(0))					t0 = qt[0] * bp[0];				if (M(2))					t1 = qt[2] * bp[2];				if (M(4))					t2 = qt[4] * bp[4];				if (M(6))					t3 = qt[6] * bp[6];				int x0 = FP_MUL(t1 - t3, A1);				t3 += t1;				t1 = t0 - t2;				t0 += t2;				t2 = t3 + x0;				t3 = t0 - t2;				t0 += t2;				t2 = t1 - x0;				t1 += x0;			}			tp[0] = t0 + t7;			tp[1] = t1 + t6;			tp[2] = t2 + t5;			tp[3] = t3 + t4;			tp[4] = t3 - t4;			tp[5] = t2 - t5;			tp[6] = t1 - t6;			tp[7] = t0 - t7;		}		qt += 8;		tp += 8;		bp += 8;		m0 >>= 8;#ifndef INT_64		m0 |= m1 << 24;		m1 >>= 8;#endif	}	tp -= 64;	/*	 * Second pass is 1D transform over the rows of the temp array.	 */	for (i = 8; --i >= 0; ) {		int t4 = tp[8*1];		int t5 = tp[8*3];		int t6 = tp[8*5];		int t7 = tp[8*7];		if ((t4|t5|t6|t7) != 0) {			/* odd part */			int x0 = t6 - t5;			t6 += t5;			int x1 = t4 - t7;			t7 += t4;			t5 = FP_MUL(t7 - t6, A3);			t7 += t6;			t4 = FP_MUL(x1 + x0, A5);			t6 = FP_MUL(x1, A4) - t4;			t4 += FP_MUL(x0, A2);			t7 += t6;			t6 += t5;			t5 += t4;		}		int t0 = tp[8*0];		int t1 = tp[8*2];		int t2 = tp[8*4];		int t3 = tp[8*6];		if ((t0|t1|t2|t3) != 0) {			/* even part */			int x0 = FP_MUL(t1 - t3, A1);			t3 += t1;			t1 = t0 - t2;			t0 += t2;			t2 = t3 + x0;			t3 = t0 - t2;			t0 += t2;			t2 = t1 - x0;			t1 += x0;		}		if (in != 0) {			PIXDEF;			DOPIXIN(t0 + t7, 0);			DOPIXIN(t1 + t6, 1);			DOPIXIN(t2 + t5, 2);			DOPIXIN(t3 + t4, 3);			DID4PIX;			DOPIXIN(t3 - t4, 4);			DOPIXIN(t2 - t5, 5);			DOPIXIN(t1 - t6, 6);			DOPIXIN(t0 - t7, 7);			if (oflo & ~0xff) {				int t;				pix = 0;				DOPIXINLIMIT(t0 + t7, 0);				DOPIXINLIMIT(t1 + t6, 1);				DOPIXINLIMIT(t2 + t5, 2);				DOPIXINLIMIT(t3 + t4, 3);				DID4PIX;				DOPIXINLIMIT(t3 - t4, 4);				DOPIXINLIMIT(t2 - t5, 5);				DOPIXINLIMIT(t1 - t6, 6);				DOPIXINLIMIT(t0 - t7, 7);			}			PSTORE;			in += stride;		} else {			PIXDEF;			DOPIX(t0 + t7, 0);			DOPIX(t1 + t6, 1);			DOPIX(t2 + t5, 2);			DOPIX(t3 + t4, 3);			DID4PIX;			DOPIX(t3 - t4, 4);			DOPIX(t2 - t5, 5);			DOPIX(t1 - t6, 6);			DOPIX(t0 - t7, 7);			if (oflo & ~0xff) {				int t;				pix = 0;				DOPIXLIMIT(t0 + t7, 0);				DOPIXLIMIT(t1 + t6, 1);				DOPIXLIMIT(t2 + t5, 2);				DOPIXLIMIT(t3 + t4, 3);				DID4PIX;				DOPIXLIMIT(t3 - t4, 4);				DOPIXLIMIT(t2 - t5, 5);				DOPIXLIMIT(t1 - t6, 6);				DOPIXLIMIT(t0 - t7, 7);			}			PSTORE;		}		tp += 1;		p += stride;	}}/* * This macro does the combined descale-and-quantize * multiply.  It truncates rather than rounds to give * the behavior required for the h.261 deadband quantizer. */#define FWD_DandQ(v, iq) short((v) * qt[iq])void dct_fdct(const u_char* in, int stride, short* out, const float* qt){	float tmp[64];	float* tp = tmp;	int i;	for (i = 8; --i >= 0; ) {		float x0, x1, x2, x3, t0, t1, t2, t3, t4, t5, t6, t7;		t0 = float(in[0] + in[7]);		t7 = float(in[0] - in[7]);		t1 = float(in[1] + in[6]);		t6 = float(in[1] - in[6]);		t2 = float(in[2] + in[5]);		t5 = float(in[2] - in[5]);		t3 = float(in[3] + in[4]);		t4 = float(in[3] - in[4]);		/* even part */		x0 = t0 + t3;		x2 = t1 + t2;		tp[8*0] = x0 + x2;		tp[8*4] = x0 - x2;		x1 = t0 - t3;		x3 = t1 - t2;		t0 = (x1 + x3) * FA1;		tp[8*2] = x1 + t0;		tp[8*6] = x1 - t0;		/* odd part */		x0 = t4 + t5;		x1 = t5 + t6;		x2 = t6 + t7;		t3 = x1 * FA1;		t4 = t7 - t3;		t0 = (x0 - x2) * FA5;		t1 = x0 * FA2 + t0;		tp[8*3] = t4 - t1;		tp[8*5] = t4 + t1;		t7 += t3;		t2 = x2 * FA4 + t0;		tp[8*1] = t7 + t2;		tp[8*7] = t7 - t2;		in += stride;		tp += 1;	}	tp -= 8;	for (i = 8; --i >= 0; ) {		float x0, x1, x2, x3, t0, t1, t2, t3, t4, t5, t6, t7;		t0 = tp[0] + tp[7];		t7 = tp[0] - tp[7];		t1 = tp[1] + tp[6];		t6 = tp[1] - tp[6];		t2 = tp[2] + tp[5];		t5 = tp[2] - tp[5];		t3 = tp[3] + tp[4];		t4 = tp[3] - tp[4];		/* even part */		x0 = t0 + t3;		x2 = t1 + t2;		out[0] = FWD_DandQ(x0 + x2, 0);		out[4] = FWD_DandQ(x0 - x2, 4);		x1 = t0 - t3;		x3 = t1 - t2;		t0 = (x1 + x3) * FA1;		out[2] = FWD_DandQ(x1 + t0, 2);		out[6] = FWD_DandQ(x1 - t0, 6);		/* odd part */		x0 = t4 + t5;		x1 = t5 + t6;		x2 = t6 + t7;		t3 = x1 * FA1;		t4 = t7 - t3;		t0 = (x0 - x2) * FA5;		t1 = x0 * FA2 + t0;		out[3] = FWD_DandQ(t4 - t1, 3);		out[5] = FWD_DandQ(t4 + t1, 5);		t7 += t3;		t2 = x2 * FA4 + t0;		out[1] = FWD_DandQ(t7 + t2, 1);		out[7] = FWD_DandQ(t7 - t2, 7);		out += 8;		tp += 8;		qt += 8;	}}/* * decimate the *rows* of the two input 8x8 DCT matrices into * a single output matrix.  we decimate rows rather than * columns even though we want column decimation because * the DCTs are stored in column order. */#if 0voiddct_decimate(const short* in0, const short* in1, short* o){	for (int k = 0; k < 8; ++k) {		int x00 = in0[0];		int x01 = in0[1];		int x02 = in0[2];		int x03 = in0[3];		int x10 = in1[0];		int x11 = in1[1];		int x12 = in1[2];		int x13 = in1[3];#define X_N 4#define X_5(v)  ((v) << (X_N - 1))#define X_25(v)  ((v) << (X_N - 2))#define X_125(v)  ((v) << (X_N - 3))#define X_0625(v)  ((v) << (X_N - 4))#define X_375(v) (X_25(v) + X_125(v))#define X_625(v) (X_5(v) + X_125(v))#define X_75(v) (X_5(v) + X_25(v))#define X_6875(v) (X_5(v) + X_125(v) + X_0625(v))#define X_1875(v) (X_125(v) + X_0625(v))#define X_NORM(v) ((v) >> X_N)		/*		 * 0.50000000  0.09011998  0.00000000 0.10630376		 * 	0.50000000  0.09011998  0.00000000  0.10630376		 * 0.45306372  0.28832037  0.03732892 0.08667963		 * 	-0.45306372  0.11942621  0.10630376 -0.06764951		 * 0.00000000  0.49039264  0.17677670 0.00000000		 * 	0.00000000 -0.49039264 -0.17677670  0.00000000		 * -0.15909482  0.34009707  0.38408888 0.05735049		 *	0.15909482  0.43576792 -0.09011998 -0.13845632		 * 0.00000000 -0.03732892  0.46193977 0.25663998		 * 	0.00000000 -0.03732892  0.46193977  0.25663998		 * 0.10630376 -0.18235049  0.25663998 0.42361940		 *	-0.10630376 -0.16332037 -0.45306372 -0.01587282		 * 0.00000000  0.00000000 -0.07322330 0.41573481		 * 	0.00000000  0.00000000  0.07322330 -0.41573481		 * -0.09011998  0.13399123 -0.18766514 0.24442621		 *	0.09011998  0.13845632  0.15909482  0.47539609		 */		o[0] = X_NORM(X_5(x00 + x10) + X_0625(x01 + x11) +			      X_125(x03 + x13));		o[1] = X_NORM(X_5(x00 - x10) + X_25(x01) + X_0625(x03) +			      X_125(x11 + x12));		o[2] = X_NORM(X_5(x01 - x11) + X_1875(x02 + x12));		o[3] = X_NORM(X_1875(x10 - x00) + X_375(x01 + x02) +			      X_5(x11) - X_125(x13));		o[4] = X_NORM(X_5(x02 + x12) + X_25(x03 + x13));		o[5] = X_NORM(X_125(x00 - x10) - X_1875(x01 + x11) +			      X_25(x02) + X_5(x03 - x12));		o[6] = X_NORM(X_625(x12 - x02) + X_375(x03 + x13));		o[7] = X_NORM(X_125(x01 - x00 + x11 + x10 + x12) +			      X_1875(x02) + X_25(x03) + X_5(x13));		o += 8;		in0 += 8;		in1 += 8;	}}#endif#ifdef _MSC_VER// 'initializing' : truncation from 'const double to float#pragma warning(default: 4305)#endif/* What follows is the assembly language implementation of the mmx dct */ /*************************************************************************** * *      This program has been developed by Intel Corporation. *      You have Intel's permission to incorporate this code *      into your product, royalty free.  Intel has various *      intellectual property rights which it may assert under *      certain circumstances, such as if another manufacturer's *      processor mis-identifies itself as being "GenuineIntel" *      when the CPUID instruction is executed. * *      Intel specifically disclaims all warranties, express or *      implied, and all liability, including consequential and *      other indirect damages, for the use of this code, *      including liability for infringement of any proprietary *      rights, and including the warranties of merchantability *      and fitness for a particular purpose.  Intel does not *      assume any responsibility for any errors which may
dct.cpp - 源码说明

本页面展示了「MPEG4音频视频压缩编码(含G.711/ACC/H.261等)」中的 dct.cpp 源码文件，采用 C++ 编程语言编写，共 2,018 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与MPEG4相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?