📄 dct.cxx

📁 sloedgy open sip stack source code
💻 CXX
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
		t3 += t0;

		t2 = FP_MUL(A1, t2);
		t3 += t2;

		t0 = x0 + t3;
		t1 = x1 + t2;
		t2 = x1 - t2;
		t3 = x0 - t3;

		PIXDEF;
		DOJPIX(t0 + t7, 0);
		DOJPIX(t1 + t6, 1);
		DOJPIX(t2 + t5, 2);
		DOJPIX(t3 + t4, 3);
		DID4PIX;
		DOJPIX(t3 - t4, 4);
		DOJPIX(t2 - t5, 5);
		DOJPIX(t1 - t6, 6);
		DOJPIX(t0 - t7, 7);
		if (oflo & ~0xff) {
			int t;
			pix = 0;
			DOJPIXLIMIT(t0 + t7, 0);
			DOJPIXLIMIT(t1 + t6, 1);
			DOJPIXLIMIT(t2 + t5, 2);
			DOJPIXLIMIT(t3 + t4, 3);
			DID4PIX;
			DOJPIXLIMIT(t3 - t4, 4);
			DOJPIXLIMIT(t2 - t5, 5);
			DOJPIXLIMIT(t1 - t6, 6);
			DOJPIXLIMIT(t0 - t7, 7);
		}
		PSTORE;

		++tp;
		p += stride;
	}
}

/*
 * Inverse 2-D transform, similar to routine above (see comment above),
 * but more appropriate for H.261 instead of JPEG.  This routine does
 * not bias the output by 128, and has an additional argument which is
 * an input array which gets summed together with the inverse-transform.
 * For example, this allows motion-compensation to be folded in here,
 * saving an extra traversal of the block.  The input pointer can be
 * null, if motion-compensation is not needed.
 *
 * This routine does not take a quantization table, since the H.261
 * inverse quantizer is easily implemented via table lookup in the decoder.
 */
void
#ifdef INT_64
rdct(register short *bp, INT_64 m0, u_char* p, int stride, const u_char* in)
#else
rdct(register short *bp, u_int m0, u_int m1, u_char* p, int stride, const u_char *in)
#endif
{
	int tmp[64];
	int* tp = tmp;
	const int* qt = cross_stage;
	/*
	 * First pass is 1D transform over the rows of the input array.
	 */
	int i;
	for (i = 8; --i >= 0; ) {
		if ((m0 & 0xfe) == 0) {
			/*
			 * All ac terms are zero.
			 */
			int v = 0;
			if (M(0))
				v = qt[0] * bp[0];
			tp[0] = v;
			tp[1] = v;
			tp[2] = v;
			tp[3] = v;
			tp[4] = v;
			tp[5] = v;
			tp[6] = v;
			tp[7] = v;
		} else {
			int t4 = 0, t5 = 0, t6 = 0, t7 = 0;
			if (m0 & 0xaa) {
				/* odd part */
				if (M(1))
					t4 = qt[1] * bp[1];
				if (M(3))
					t5 = qt[3] * bp[3];
				if (M(5))
					t6 = qt[5] * bp[5];
				if (M(7))
					t7 = qt[7] * bp[7];

				int x0 = t6 - t5;
				t6 += t5;
				int x1 = t4 - t7;
				t7 += t4;

				t5 = FP_MUL(t7 - t6, A3);
				t7 += t6;

				t4 = FP_MUL(x1 + x0, A5);
				t6 = FP_MUL(x1, A4) - t4;
				t4 += FP_MUL(x0, A2);

				t7 += t6;
				t6 += t5;
				t5 += t4;
			}
			int t0 = 0, t1 = 0, t2 = 0, t3 = 0;
			if (m0 & 0x55) {
				/* even part */
				if (M(0))
					t0 = qt[0] * bp[0];
				if (M(2))
					t1 = qt[2] * bp[2];
				if (M(4))
					t2 = qt[4] * bp[4];
				if (M(6))
					t3 = qt[6] * bp[6];

				int x0 = FP_MUL(t1 - t3, A1);
				t3 += t1;
				t1 = t0 - t2;
				t0 += t2;
				t2 = t3 + x0;
				t3 = t0 - t2;
				t0 += t2;
				t2 = t1 - x0;
				t1 += x0;
			}
			tp[0] = t0 + t7;
			tp[1] = t1 + t6;
			tp[2] = t2 + t5;
			tp[3] = t3 + t4;
			tp[4] = t3 - t4;
			tp[5] = t2 - t5;
			tp[6] = t1 - t6;
			tp[7] = t0 - t7;
		}
		qt += 8;
		tp += 8;
		bp += 8;
		m0 >>= 8;
#ifndef INT_64
		m0 |= m1 << 24;
		m1 >>= 8;
#endif
	}
	tp -= 64;
	/*
	 * Second pass is 1D transform over the rows of the temp array.
	 */
	for (i = 8; --i >= 0; ) {
		int t4 = tp[8*1];
		int t5 = tp[8*3];
		int t6 = tp[8*5];
		int t7 = tp[8*7];
		if ((t4|t5|t6|t7) != 0) {
			/* odd part */
			int x0 = t6 - t5;
			t6 += t5;
			int x1 = t4 - t7;
			t7 += t4;

			t5 = FP_MUL(t7 - t6, A3);
			t7 += t6;

			t4 = FP_MUL(x1 + x0, A5);
			t6 = FP_MUL(x1, A4) - t4;
			t4 += FP_MUL(x0, A2);

			t7 += t6;
			t6 += t5;
			t5 += t4;
		}
		int t0 = tp[8*0];
		int t1 = tp[8*2];
		int t2 = tp[8*4];
		int t3 = tp[8*6];
		if ((t0|t1|t2|t3) != 0) {
			/* even part */
			int x0 = FP_MUL(t1 - t3, A1);
			t3 += t1;
			t1 = t0 - t2;
			t0 += t2;
			t2 = t3 + x0;
			t3 = t0 - t2;
			t0 += t2;
			t2 = t1 - x0;
			t1 += x0;
		}
		if (in != 0) {
			PIXDEF;
			DOPIXIN(t0 + t7, 0);
			DOPIXIN(t1 + t6, 1);
			DOPIXIN(t2 + t5, 2);
			DOPIXIN(t3 + t4, 3);
			DID4PIX;
			DOPIXIN(t3 - t4, 4);
			DOPIXIN(t2 - t5, 5);
			DOPIXIN(t1 - t6, 6);
			DOPIXIN(t0 - t7, 7);
			if (oflo & ~0xff) {
				int t;
				pix = 0;
				DOPIXINLIMIT(t0 + t7, 0);
				DOPIXINLIMIT(t1 + t6, 1);
				DOPIXINLIMIT(t2 + t5, 2);
				DOPIXINLIMIT(t3 + t4, 3);
				DID4PIX;
				DOPIXINLIMIT(t3 - t4, 4);
				DOPIXINLIMIT(t2 - t5, 5);
				DOPIXINLIMIT(t1 - t6, 6);
				DOPIXINLIMIT(t0 - t7, 7);
			}
			PSTORE;
			in += stride;
		} else {
			PIXDEF;
			DOPIX(t0 + t7, 0);
			DOPIX(t1 + t6, 1);
			DOPIX(t2 + t5, 2);
			DOPIX(t3 + t4, 3);
			DID4PIX;
			DOPIX(t3 - t4, 4);
			DOPIX(t2 - t5, 5);
			DOPIX(t1 - t6, 6);
			DOPIX(t0 - t7, 7);
			if (oflo & ~0xff) {
				int t;
				pix = 0;
				DOPIXLIMIT(t0 + t7, 0);
				DOPIXLIMIT(t1 + t6, 1);
				DOPIXLIMIT(t2 + t5, 2);
				DOPIXLIMIT(t3 + t4, 3);
				DID4PIX;
				DOPIXLIMIT(t3 - t4, 4);
				DOPIXLIMIT(t2 - t5, 5);
				DOPIXLIMIT(t1 - t6, 6);
				DOPIXLIMIT(t0 - t7, 7);
			}
			PSTORE;
		}
		tp += 1;
		p += stride;
	}
}

/*
 * This macro does the combined descale-and-quantize
 * multiply.  It truncates rather than rounds to give
 * the behavior required for the h.261 deadband quantizer.
 */
 
#define FWD_DandQ(v, iq) short((v) * qt[iq])

void fdct(const u_char* in, int stride, short* out, const float* qt)
{
	float tmp[64];
	float* tp = tmp;

	int i;
	for (i = 8; --i >= 0; ) {
               float x0, x1, x2, x3, t0, t1, t2, t3, t4, t5, t6, t7;
               t0 = float(in[0] + in[7]);
               t7 = float(in[0] - in[7]);
               t1 = float(in[1] + in[6]);
               t6 = float(in[1] - in[6]);
               t2 = float(in[2] + in[5]);
               t5 = float(in[2] - in[5]);
               t3 = float(in[3] + in[4]);
               t4 = float(in[3] - in[4]);


		/* even part */
		x0 = t0 + t3;
		x2 = t1 + t2;
		tp[8*0] = x0 + x2;
		tp[8*4] = x0 - x2;
    
		x1 = t0 - t3;
		x3 = t1 - t2;
		t0 = (x1 + x3) * FA1;
		tp[8*2] = x1 + t0;
		tp[8*6] = x1 - t0;

		/* odd part */
		x0 = t4 + t5;
		x1 = t5 + t6;
		x2 = t6 + t7;

		t3 = x1 * FA1;
		t4 = t7 - t3;

		t0 = (x0 - x2) * FA5;
		t1 = x0 * FA2 + t0;
		tp[8*3] = t4 - t1;
		tp[8*5] = t4 + t1;

		t7 += t3;
		t2 = x2 * FA4 + t0;
		tp[8*1] = t7 + t2;
		tp[8*7] = t7 - t2;
		
		in += stride;
		tp += 1;
	}
	tp -= 8;

	for (i = 8; --i >= 0; ) {
		float x0, x1, x2, x3, t0, t1, t2, t3, t4, t5, t6, t7;
		t0 = tp[0] + tp[7];
		t7 = tp[0] - tp[7];
		t1 = tp[1] + tp[6];
		t6 = tp[1] - tp[6];
		t2 = tp[2] + tp[5];
		t5 = tp[2] - tp[5];
		t3 = tp[3] + tp[4];
		t4 = tp[3] - tp[4];

		/* even part */
		x0 = t0 + t3;
		x2 = t1 + t2;
		out[0] = FWD_DandQ(x0 + x2, 0);
		out[4] = FWD_DandQ(x0 - x2, 4);
    
		x1 = t0 - t3;
		x3 = t1 - t2;
		t0 = (x1 + x3) * FA1;
		out[2] = FWD_DandQ(x1 + t0, 2);
		out[6] = FWD_DandQ(x1 - t0, 6);

		/* odd part */
		x0 = t4 + t5;
		x1 = t5 + t6;
		x2 = t6 + t7;

		t3 = x1 * FA1;
		t4 = t7 - t3;

		t0 = (x0 - x2) * FA5;
		t1 =  x0 * FA2 + t0;
		out[3] = FWD_DandQ(t4 - t1, 3);
		out[5] = FWD_DandQ(t4 + t1, 5);

		t7 += t3;
		t2 = x2 * FA4 + t0;
		out[1] = FWD_DandQ(t7 + t2, 1);
		out[7] = FWD_DandQ(t7 - t2, 7);

		out += 8;
		tp += 8;
		qt += 8;
	}
}

/*
 * decimate the *rows* of the two input 8x8 DCT matrices into
 * a single output matrix.  we decimate rows rather than
 * columns even though we want column decimation because
 * the DCTs are stored in column order.
 */
void
dct_decimate(const short* in0, const short* in1, short* o)
{
	for (int k = 0; k < 8; ++k) {
		int x00 = in0[0];
		int x01 = in0[1];
		int x02 = in0[2];
		int x03 = in0[3];
		int x10 = in1[0];
		int x11 = in1[1];
		int x12 = in1[2];
		int x13 = in1[3];
#define X_N 4
#define X_5(v)  ((v) << (X_N - 1))
#define X_25(v)  ((v) << (X_N - 2))
#define X_125(v)  ((v) << (X_N - 3))
#define X_0625(v)  ((v) << (X_N - 4))
#define X_375(v) (X_25(v) + X_125(v))
#define X_625(v) (X_5(v) + X_125(v))
#define X_75(v) (X_5(v) + X_25(v))
#define X_6875(v) (X_5(v) + X_125(v) + X_0625(v))
#define X_1875(v) (X_125(v) + X_0625(v))
#define X_NORM(v) ((v) >> X_N)

		/*
		 * 0.50000000  0.09011998  0.00000000 0.10630376 
		 * 	0.50000000  0.09011998  0.00000000  0.10630376
		 * 0.45306372  0.28832037  0.03732892 0.08667963
		 * 	-0.45306372  0.11942621  0.10630376 -0.06764951
		 * 0.00000000  0.49039264  0.17677670 0.00000000
		 * 	0.00000000 -0.49039264 -0.17677670  0.00000000 
		 * -0.15909482  0.34009707  0.38408888 0.05735049
		 *	0.15909482  0.43576792 -0.09011998 -0.13845632
		 * 0.00000000 -0.03732892  0.46193977 0.25663998
		 * 	0.00000000 -0.03732892  0.46193977  0.25663998
		 * 0.10630376 -0.18235049  0.25663998 0.42361940 
		 *	-0.10630376 -0.16332037 -0.45306372 -0.01587282
		 * 0.00000000  0.00000000 -0.07322330 0.41573481
		 * 	0.00000000  0.00000000  0.07322330 -0.41573481
		 * -0.09011998  0.13399123 -0.18766514 0.24442621
		 *	0.09011998  0.13845632  0.15909482  0.47539609
		 */

		o[0] = X_NORM(X_5(x00 + x10) + X_0625(x01 + x11) +
			      X_125(x03 + x13));
		o[1] = X_NORM(X_5(x00 - x10) + X_25(x01) + X_0625(x03) +
			      X_125(x11 + x12));
		o[2] = X_NORM(X_5(x01 - x11) + X_1875(x02 + x12));
		o[3] = X_NORM(X_1875(x10 - x00) + X_375(x01 + x02) + 
			      X_5(x11) - X_125(x13));
		o[4] = X_NORM(X_5(x02 + x12) + X_25(x03 + x13));
		o[5] = X_NORM(X_125(x00 - x10) - X_1875(x01 + x11) +
			      X_25(x02) + X_5(x03 - x12));
		o[6] = X_NORM(X_625(x12 - x02) + X_375(x03 + x13));
		o[7] = X_NORM(X_125(x01 - x00 + x11 + x10 + x12) +
			      X_1875(x02) + X_25(x03) + X_5(x13));

		o += 8;
		in0 += 8;
		in1 += 8;
	}
}
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -