📄 dct.cpp
字号:
* * This routine does not take a quantization table, since the H.261 * inverse quantizer is easily implemented via table lookup in the decoder. */void#ifdef INT_64rdct(register short *bp, INT_64 m0, u_char* p, int stride, const u_char* in)#elserdct(register short *bp, u_int m0, u_int m1, u_char* p, int stride, const u_char *in)#endif{ int tmp[64]; int* tp = tmp; const int* qt = cross_stage; /* * First pass is 1D transform over the rows of the input array. */ int i; for (i = 8; --i >= 0; ) { if ((m0 & 0xfe) == 0) { /* * All ac terms are zero. */ int v = 0; if (M(0)) v = qt[0] * bp[0]; tp[0] = v; tp[1] = v; tp[2] = v; tp[3] = v; tp[4] = v; tp[5] = v; tp[6] = v; tp[7] = v; } else { int t4 = 0, t5 = 0, t6 = 0, t7 = 0; if (m0 & 0xaa) { /* odd part */ if (M(1)) t4 = qt[1] * bp[1]; if (M(3)) t5 = qt[3] * bp[3]; if (M(5)) t6 = qt[5] * bp[5]; if (M(7)) t7 = qt[7] * bp[7]; int x0 = t6 - t5; t6 += t5; int x1 = t4 - t7; t7 += t4; t5 = FP_MUL(t7 - t6, A3); t7 += t6; t4 = FP_MUL(x1 + x0, A5); t6 = FP_MUL(x1, A4) - t4; t4 += FP_MUL(x0, A2); t7 += t6; t6 += t5; t5 += t4; } int t0 = 0, t1 = 0, t2 = 0, t3 = 0; if (m0 & 0x55) { /* even part */ if (M(0)) t0 = qt[0] * bp[0]; if (M(2)) t1 = qt[2] * bp[2]; if (M(4)) t2 = qt[4] * bp[4]; if (M(6)) t3 = qt[6] * bp[6]; int x0 = FP_MUL(t1 - t3, A1); t3 += t1; t1 = t0 - t2; t0 += t2; t2 = t3 + x0; t3 = t0 - t2; t0 += t2; t2 = t1 - x0; t1 += x0; } tp[0] = t0 + t7; tp[1] = t1 + t6; tp[2] = t2 + t5; tp[3] = t3 + t4; tp[4] = t3 - t4; tp[5] = t2 - t5; tp[6] = t1 - t6; tp[7] = t0 - t7; } qt += 8; tp += 8; bp += 8; m0 >>= 8;#ifndef INT_64 m0 |= m1 << 24; m1 >>= 8;#endif } tp -= 64; /* * Second pass is 1D transform over the rows of the temp array. */ for (i = 8; --i >= 0; ) { int t4 = tp[8*1]; int t5 = tp[8*3]; int t6 = tp[8*5]; int t7 = tp[8*7]; if ((t4|t5|t6|t7) != 0) { /* odd part */ int x0 = t6 - t5; t6 += t5; int x1 = t4 - t7; t7 += t4; t5 = FP_MUL(t7 - t6, A3); t7 += t6; t4 = FP_MUL(x1 + x0, A5); t6 = FP_MUL(x1, A4) - t4; t4 += FP_MUL(x0, A2); t7 += t6; t6 += t5; t5 += t4; } int t0 = tp[8*0]; int t1 = tp[8*2]; int t2 = tp[8*4]; int t3 = tp[8*6]; if ((t0|t1|t2|t3) != 0) { /* even part */ int x0 = FP_MUL(t1 - t3, A1); t3 += t1; t1 = t0 - t2; t0 += t2; t2 = t3 + x0; t3 = t0 - t2; t0 += t2; t2 = t1 - x0; t1 += x0; } if (in != 0) { PIXDEF; DOPIXIN(t0 + t7, 0); DOPIXIN(t1 + t6, 1); DOPIXIN(t2 + t5, 2); DOPIXIN(t3 + t4, 3); DID4PIX; DOPIXIN(t3 - t4, 4); DOPIXIN(t2 - t5, 5); DOPIXIN(t1 - t6, 6); DOPIXIN(t0 - t7, 7); if (oflo & ~0xff) { int t; pix = 0; DOPIXINLIMIT(t0 + t7, 0); DOPIXINLIMIT(t1 + t6, 1); DOPIXINLIMIT(t2 + t5, 2); DOPIXINLIMIT(t3 + t4, 3); DID4PIX; DOPIXINLIMIT(t3 - t4, 4); DOPIXINLIMIT(t2 - t5, 5); DOPIXINLIMIT(t1 - t6, 6); DOPIXINLIMIT(t0 - t7, 7); } PSTORE; in += stride; } else { PIXDEF; DOPIX(t0 + t7, 0); DOPIX(t1 + t6, 1); DOPIX(t2 + t5, 2); DOPIX(t3 + t4, 3); DID4PIX; DOPIX(t3 - t4, 4); DOPIX(t2 - t5, 5); DOPIX(t1 - t6, 6); DOPIX(t0 - t7, 7); if (oflo & ~0xff) { int t; pix = 0; DOPIXLIMIT(t0 + t7, 0); DOPIXLIMIT(t1 + t6, 1); DOPIXLIMIT(t2 + t5, 2); DOPIXLIMIT(t3 + t4, 3); DID4PIX; DOPIXLIMIT(t3 - t4, 4); DOPIXLIMIT(t2 - t5, 5); DOPIXLIMIT(t1 - t6, 6); DOPIXLIMIT(t0 - t7, 7); } PSTORE; } tp += 1; p += stride; }}/* * This macro does the combined descale-and-quantize * multiply. It truncates rather than rounds to give * the behavior required for the h.261 deadband quantizer. */#define FWD_DandQ(v, iq) short((v) * qt[iq])void dct_fdct(const u_char* in, int stride, short* out, const float* qt){ float tmp[64]; float* tp = tmp; int i; for (i = 8; --i >= 0; ) { float x0, x1, x2, x3, t0, t1, t2, t3, t4, t5, t6, t7; t0 = float(in[0] + in[7]); t7 = float(in[0] - in[7]); t1 = float(in[1] + in[6]); t6 = float(in[1] - in[6]); t2 = float(in[2] + in[5]); t5 = float(in[2] - in[5]); t3 = float(in[3] + in[4]); t4 = float(in[3] - in[4]); /* even part */ x0 = t0 + t3; x2 = t1 + t2; tp[8*0] = x0 + x2; tp[8*4] = x0 - x2; x1 = t0 - t3; x3 = t1 - t2; t0 = (x1 + x3) * FA1; tp[8*2] = x1 + t0; tp[8*6] = x1 - t0; /* odd part */ x0 = t4 + t5; x1 = t5 + t6; x2 = t6 + t7; t3 = x1 * FA1; t4 = t7 - t3; t0 = (x0 - x2) * FA5; t1 = x0 * FA2 + t0; tp[8*3] = t4 - t1; tp[8*5] = t4 + t1; t7 += t3; t2 = x2 * FA4 + t0; tp[8*1] = t7 + t2; tp[8*7] = t7 - t2; in += stride; tp += 1; } tp -= 8; for (i = 8; --i >= 0; ) { float x0, x1, x2, x3, t0, t1, t2, t3, t4, t5, t6, t7; t0 = tp[0] + tp[7]; t7 = tp[0] - tp[7]; t1 = tp[1] + tp[6]; t6 = tp[1] - tp[6]; t2 = tp[2] + tp[5]; t5 = tp[2] - tp[5]; t3 = tp[3] + tp[4]; t4 = tp[3] - tp[4]; /* even part */ x0 = t0 + t3; x2 = t1 + t2; out[0] = FWD_DandQ(x0 + x2, 0); out[4] = FWD_DandQ(x0 - x2, 4); x1 = t0 - t3; x3 = t1 - t2; t0 = (x1 + x3) * FA1; out[2] = FWD_DandQ(x1 + t0, 2); out[6] = FWD_DandQ(x1 - t0, 6); /* odd part */ x0 = t4 + t5; x1 = t5 + t6; x2 = t6 + t7; t3 = x1 * FA1; t4 = t7 - t3; t0 = (x0 - x2) * FA5; t1 = x0 * FA2 + t0; out[3] = FWD_DandQ(t4 - t1, 3); out[5] = FWD_DandQ(t4 + t1, 5); t7 += t3; t2 = x2 * FA4 + t0; out[1] = FWD_DandQ(t7 + t2, 1); out[7] = FWD_DandQ(t7 - t2, 7); out += 8; tp += 8; qt += 8; }}/* * decimate the *rows* of the two input 8x8 DCT matrices into * a single output matrix. we decimate rows rather than * columns even though we want column decimation because * the DCTs are stored in column order. */#if 0voiddct_decimate(const short* in0, const short* in1, short* o){ for (int k = 0; k < 8; ++k) { int x00 = in0[0]; int x01 = in0[1]; int x02 = in0[2]; int x03 = in0[3]; int x10 = in1[0]; int x11 = in1[1]; int x12 = in1[2]; int x13 = in1[3];#define X_N 4#define X_5(v) ((v) << (X_N - 1))#define X_25(v) ((v) << (X_N - 2))#define X_125(v) ((v) << (X_N - 3))#define X_0625(v) ((v) << (X_N - 4))#define X_375(v) (X_25(v) + X_125(v))#define X_625(v) (X_5(v) + X_125(v))#define X_75(v) (X_5(v) + X_25(v))#define X_6875(v) (X_5(v) + X_125(v) + X_0625(v))#define X_1875(v) (X_125(v) + X_0625(v))#define X_NORM(v) ((v) >> X_N) /* * 0.50000000 0.09011998 0.00000000 0.10630376 * 0.50000000 0.09011998 0.00000000 0.10630376 * 0.45306372 0.28832037 0.03732892 0.08667963 * -0.45306372 0.11942621 0.10630376 -0.06764951 * 0.00000000 0.49039264 0.17677670 0.00000000 * 0.00000000 -0.49039264 -0.17677670 0.00000000 * -0.15909482 0.34009707 0.38408888 0.05735049 * 0.15909482 0.43576792 -0.09011998 -0.13845632 * 0.00000000 -0.03732892 0.46193977 0.25663998 * 0.00000000 -0.03732892 0.46193977 0.25663998 * 0.10630376 -0.18235049 0.25663998 0.42361940 * -0.10630376 -0.16332037 -0.45306372 -0.01587282 * 0.00000000 0.00000000 -0.07322330 0.41573481 * 0.00000000 0.00000000 0.07322330 -0.41573481 * -0.09011998 0.13399123 -0.18766514 0.24442621 * 0.09011998 0.13845632 0.15909482 0.47539609 */ o[0] = X_NORM(X_5(x00 + x10) + X_0625(x01 + x11) + X_125(x03 + x13)); o[1] = X_NORM(X_5(x00 - x10) + X_25(x01) + X_0625(x03) + X_125(x11 + x12)); o[2] = X_NORM(X_5(x01 - x11) + X_1875(x02 + x12)); o[3] = X_NORM(X_1875(x10 - x00) + X_375(x01 + x02) + X_5(x11) - X_125(x13)); o[4] = X_NORM(X_5(x02 + x12) + X_25(x03 + x13)); o[5] = X_NORM(X_125(x00 - x10) - X_1875(x01 + x11) + X_25(x02) + X_5(x03 - x12)); o[6] = X_NORM(X_625(x12 - x02) + X_375(x03 + x13)); o[7] = X_NORM(X_125(x01 - x00 + x11 + x10 + x12) + X_1875(x02) + X_25(x03) + X_5(x13)); o += 8; in0 += 8; in1 += 8; }}#endif#ifdef _MSC_VER// 'initializing' : truncation from 'const double to float#pragma warning(default: 4305)#endif/* What follows is the assembly language implementation of the mmx dct */ /*************************************************************************** * * This program has been developed by Intel Corporation. * You have Intel's permission to incorporate this code * into your product, royalty free. Intel has various * intellectual property rights which it may assert under * certain circumstances, such as if another manufacturer's * processor mis-identifies itself as being "GenuineIntel" * when the CPUID instruction is executed. * * Intel specifically disclaims all warranties, express or * implied, and all liability, including consequential and * other indirect damages, for the use of this code, * including liability for infringement of any proprietary * rights, and including the warranties of merchantability * and fitness for a particular purpose. Intel does not * assume any responsibility for any errors which may
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -