📄 dct.cxx
字号:
t3 += t0;
t2 = FP_MUL(A1, t2);
t3 += t2;
t0 = x0 + t3;
t1 = x1 + t2;
t2 = x1 - t2;
t3 = x0 - t3;
PIXDEF;
DOJPIX(t0 + t7, 0);
DOJPIX(t1 + t6, 1);
DOJPIX(t2 + t5, 2);
DOJPIX(t3 + t4, 3);
DID4PIX;
DOJPIX(t3 - t4, 4);
DOJPIX(t2 - t5, 5);
DOJPIX(t1 - t6, 6);
DOJPIX(t0 - t7, 7);
if (oflo & ~0xff) {
int t;
pix = 0;
DOJPIXLIMIT(t0 + t7, 0);
DOJPIXLIMIT(t1 + t6, 1);
DOJPIXLIMIT(t2 + t5, 2);
DOJPIXLIMIT(t3 + t4, 3);
DID4PIX;
DOJPIXLIMIT(t3 - t4, 4);
DOJPIXLIMIT(t2 - t5, 5);
DOJPIXLIMIT(t1 - t6, 6);
DOJPIXLIMIT(t0 - t7, 7);
}
PSTORE;
++tp;
p += stride;
}
}
/*
* Inverse 2-D transform, similar to routine above (see comment above),
* but more appropriate for H.261 instead of JPEG. This routine does
* not bias the output by 128, and has an additional argument which is
* an input array which gets summed together with the inverse-transform.
* For example, this allows motion-compensation to be folded in here,
* saving an extra traversal of the block. The input pointer can be
* null, if motion-compensation is not needed.
*
* This routine does not take a quantization table, since the H.261
* inverse quantizer is easily implemented via table lookup in the decoder.
*/
void
#ifdef INT_64
rdct(register short *bp, INT_64 m0, u_char* p, int stride, const u_char* in)
#else
rdct(register short *bp, u_int m0, u_int m1, u_char* p, int stride, const u_char *in)
#endif
{
int tmp[64];
int* tp = tmp;
const int* qt = cross_stage;
/*
* First pass is 1D transform over the rows of the input array.
*/
int i;
for (i = 8; --i >= 0; ) {
if ((m0 & 0xfe) == 0) {
/*
* All ac terms are zero.
*/
int v = 0;
if (M(0))
v = qt[0] * bp[0];
tp[0] = v;
tp[1] = v;
tp[2] = v;
tp[3] = v;
tp[4] = v;
tp[5] = v;
tp[6] = v;
tp[7] = v;
} else {
int t4 = 0, t5 = 0, t6 = 0, t7 = 0;
if (m0 & 0xaa) {
/* odd part */
if (M(1))
t4 = qt[1] * bp[1];
if (M(3))
t5 = qt[3] * bp[3];
if (M(5))
t6 = qt[5] * bp[5];
if (M(7))
t7 = qt[7] * bp[7];
int x0 = t6 - t5;
t6 += t5;
int x1 = t4 - t7;
t7 += t4;
t5 = FP_MUL(t7 - t6, A3);
t7 += t6;
t4 = FP_MUL(x1 + x0, A5);
t6 = FP_MUL(x1, A4) - t4;
t4 += FP_MUL(x0, A2);
t7 += t6;
t6 += t5;
t5 += t4;
}
int t0 = 0, t1 = 0, t2 = 0, t3 = 0;
if (m0 & 0x55) {
/* even part */
if (M(0))
t0 = qt[0] * bp[0];
if (M(2))
t1 = qt[2] * bp[2];
if (M(4))
t2 = qt[4] * bp[4];
if (M(6))
t3 = qt[6] * bp[6];
int x0 = FP_MUL(t1 - t3, A1);
t3 += t1;
t1 = t0 - t2;
t0 += t2;
t2 = t3 + x0;
t3 = t0 - t2;
t0 += t2;
t2 = t1 - x0;
t1 += x0;
}
tp[0] = t0 + t7;
tp[1] = t1 + t6;
tp[2] = t2 + t5;
tp[3] = t3 + t4;
tp[4] = t3 - t4;
tp[5] = t2 - t5;
tp[6] = t1 - t6;
tp[7] = t0 - t7;
}
qt += 8;
tp += 8;
bp += 8;
m0 >>= 8;
#ifndef INT_64
m0 |= m1 << 24;
m1 >>= 8;
#endif
}
tp -= 64;
/*
* Second pass is 1D transform over the rows of the temp array.
*/
for (i = 8; --i >= 0; ) {
int t4 = tp[8*1];
int t5 = tp[8*3];
int t6 = tp[8*5];
int t7 = tp[8*7];
if ((t4|t5|t6|t7) != 0) {
/* odd part */
int x0 = t6 - t5;
t6 += t5;
int x1 = t4 - t7;
t7 += t4;
t5 = FP_MUL(t7 - t6, A3);
t7 += t6;
t4 = FP_MUL(x1 + x0, A5);
t6 = FP_MUL(x1, A4) - t4;
t4 += FP_MUL(x0, A2);
t7 += t6;
t6 += t5;
t5 += t4;
}
int t0 = tp[8*0];
int t1 = tp[8*2];
int t2 = tp[8*4];
int t3 = tp[8*6];
if ((t0|t1|t2|t3) != 0) {
/* even part */
int x0 = FP_MUL(t1 - t3, A1);
t3 += t1;
t1 = t0 - t2;
t0 += t2;
t2 = t3 + x0;
t3 = t0 - t2;
t0 += t2;
t2 = t1 - x0;
t1 += x0;
}
if (in != 0) {
PIXDEF;
DOPIXIN(t0 + t7, 0);
DOPIXIN(t1 + t6, 1);
DOPIXIN(t2 + t5, 2);
DOPIXIN(t3 + t4, 3);
DID4PIX;
DOPIXIN(t3 - t4, 4);
DOPIXIN(t2 - t5, 5);
DOPIXIN(t1 - t6, 6);
DOPIXIN(t0 - t7, 7);
if (oflo & ~0xff) {
int t;
pix = 0;
DOPIXINLIMIT(t0 + t7, 0);
DOPIXINLIMIT(t1 + t6, 1);
DOPIXINLIMIT(t2 + t5, 2);
DOPIXINLIMIT(t3 + t4, 3);
DID4PIX;
DOPIXINLIMIT(t3 - t4, 4);
DOPIXINLIMIT(t2 - t5, 5);
DOPIXINLIMIT(t1 - t6, 6);
DOPIXINLIMIT(t0 - t7, 7);
}
PSTORE;
in += stride;
} else {
PIXDEF;
DOPIX(t0 + t7, 0);
DOPIX(t1 + t6, 1);
DOPIX(t2 + t5, 2);
DOPIX(t3 + t4, 3);
DID4PIX;
DOPIX(t3 - t4, 4);
DOPIX(t2 - t5, 5);
DOPIX(t1 - t6, 6);
DOPIX(t0 - t7, 7);
if (oflo & ~0xff) {
int t;
pix = 0;
DOPIXLIMIT(t0 + t7, 0);
DOPIXLIMIT(t1 + t6, 1);
DOPIXLIMIT(t2 + t5, 2);
DOPIXLIMIT(t3 + t4, 3);
DID4PIX;
DOPIXLIMIT(t3 - t4, 4);
DOPIXLIMIT(t2 - t5, 5);
DOPIXLIMIT(t1 - t6, 6);
DOPIXLIMIT(t0 - t7, 7);
}
PSTORE;
}
tp += 1;
p += stride;
}
}
/*
* This macro does the combined descale-and-quantize
* multiply. It truncates rather than rounds to give
* the behavior required for the h.261 deadband quantizer.
*/
#define FWD_DandQ(v, iq) short((v) * qt[iq])
void fdct(const u_char* in, int stride, short* out, const float* qt)
{
float tmp[64];
float* tp = tmp;
int i;
for (i = 8; --i >= 0; ) {
float x0, x1, x2, x3, t0, t1, t2, t3, t4, t5, t6, t7;
t0 = float(in[0] + in[7]);
t7 = float(in[0] - in[7]);
t1 = float(in[1] + in[6]);
t6 = float(in[1] - in[6]);
t2 = float(in[2] + in[5]);
t5 = float(in[2] - in[5]);
t3 = float(in[3] + in[4]);
t4 = float(in[3] - in[4]);
/* even part */
x0 = t0 + t3;
x2 = t1 + t2;
tp[8*0] = x0 + x2;
tp[8*4] = x0 - x2;
x1 = t0 - t3;
x3 = t1 - t2;
t0 = (x1 + x3) * FA1;
tp[8*2] = x1 + t0;
tp[8*6] = x1 - t0;
/* odd part */
x0 = t4 + t5;
x1 = t5 + t6;
x2 = t6 + t7;
t3 = x1 * FA1;
t4 = t7 - t3;
t0 = (x0 - x2) * FA5;
t1 = x0 * FA2 + t0;
tp[8*3] = t4 - t1;
tp[8*5] = t4 + t1;
t7 += t3;
t2 = x2 * FA4 + t0;
tp[8*1] = t7 + t2;
tp[8*7] = t7 - t2;
in += stride;
tp += 1;
}
tp -= 8;
for (i = 8; --i >= 0; ) {
float x0, x1, x2, x3, t0, t1, t2, t3, t4, t5, t6, t7;
t0 = tp[0] + tp[7];
t7 = tp[0] - tp[7];
t1 = tp[1] + tp[6];
t6 = tp[1] - tp[6];
t2 = tp[2] + tp[5];
t5 = tp[2] - tp[5];
t3 = tp[3] + tp[4];
t4 = tp[3] - tp[4];
/* even part */
x0 = t0 + t3;
x2 = t1 + t2;
out[0] = FWD_DandQ(x0 + x2, 0);
out[4] = FWD_DandQ(x0 - x2, 4);
x1 = t0 - t3;
x3 = t1 - t2;
t0 = (x1 + x3) * FA1;
out[2] = FWD_DandQ(x1 + t0, 2);
out[6] = FWD_DandQ(x1 - t0, 6);
/* odd part */
x0 = t4 + t5;
x1 = t5 + t6;
x2 = t6 + t7;
t3 = x1 * FA1;
t4 = t7 - t3;
t0 = (x0 - x2) * FA5;
t1 = x0 * FA2 + t0;
out[3] = FWD_DandQ(t4 - t1, 3);
out[5] = FWD_DandQ(t4 + t1, 5);
t7 += t3;
t2 = x2 * FA4 + t0;
out[1] = FWD_DandQ(t7 + t2, 1);
out[7] = FWD_DandQ(t7 - t2, 7);
out += 8;
tp += 8;
qt += 8;
}
}
/*
* decimate the *rows* of the two input 8x8 DCT matrices into
* a single output matrix. we decimate rows rather than
* columns even though we want column decimation because
* the DCTs are stored in column order.
*/
void
dct_decimate(const short* in0, const short* in1, short* o)
{
for (int k = 0; k < 8; ++k) {
int x00 = in0[0];
int x01 = in0[1];
int x02 = in0[2];
int x03 = in0[3];
int x10 = in1[0];
int x11 = in1[1];
int x12 = in1[2];
int x13 = in1[3];
#define X_N 4
#define X_5(v) ((v) << (X_N - 1))
#define X_25(v) ((v) << (X_N - 2))
#define X_125(v) ((v) << (X_N - 3))
#define X_0625(v) ((v) << (X_N - 4))
#define X_375(v) (X_25(v) + X_125(v))
#define X_625(v) (X_5(v) + X_125(v))
#define X_75(v) (X_5(v) + X_25(v))
#define X_6875(v) (X_5(v) + X_125(v) + X_0625(v))
#define X_1875(v) (X_125(v) + X_0625(v))
#define X_NORM(v) ((v) >> X_N)
/*
* 0.50000000 0.09011998 0.00000000 0.10630376
* 0.50000000 0.09011998 0.00000000 0.10630376
* 0.45306372 0.28832037 0.03732892 0.08667963
* -0.45306372 0.11942621 0.10630376 -0.06764951
* 0.00000000 0.49039264 0.17677670 0.00000000
* 0.00000000 -0.49039264 -0.17677670 0.00000000
* -0.15909482 0.34009707 0.38408888 0.05735049
* 0.15909482 0.43576792 -0.09011998 -0.13845632
* 0.00000000 -0.03732892 0.46193977 0.25663998
* 0.00000000 -0.03732892 0.46193977 0.25663998
* 0.10630376 -0.18235049 0.25663998 0.42361940
* -0.10630376 -0.16332037 -0.45306372 -0.01587282
* 0.00000000 0.00000000 -0.07322330 0.41573481
* 0.00000000 0.00000000 0.07322330 -0.41573481
* -0.09011998 0.13399123 -0.18766514 0.24442621
* 0.09011998 0.13845632 0.15909482 0.47539609
*/
o[0] = X_NORM(X_5(x00 + x10) + X_0625(x01 + x11) +
X_125(x03 + x13));
o[1] = X_NORM(X_5(x00 - x10) + X_25(x01) + X_0625(x03) +
X_125(x11 + x12));
o[2] = X_NORM(X_5(x01 - x11) + X_1875(x02 + x12));
o[3] = X_NORM(X_1875(x10 - x00) + X_375(x01 + x02) +
X_5(x11) - X_125(x13));
o[4] = X_NORM(X_5(x02 + x12) + X_25(x03 + x13));
o[5] = X_NORM(X_125(x00 - x10) - X_1875(x01 + x11) +
X_25(x02) + X_5(x03 - x12));
o[6] = X_NORM(X_625(x12 - x02) + X_375(x03 + x13));
o[7] = X_NORM(X_125(x01 - x00 + x11 + x10 + x12) +
X_1875(x02) + X_25(x03) + X_5(x13));
o += 8;
in0 += 8;
in1 += 8;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -