📄 img_idct_8x8.h
字号:
/* The IDCTs cannot be performed completely in-place due to the */
/* transpose that each pass performs. In order to save data memory, */
/* the horizontal pass works from the end of the array towards the */
/* begining, writing its result one IDCT block later in memory, */
/* thus performing the IDCT nearly-in-place. The vertical pass */
/* performs its IDCTs in the opposite direction, working from the */
/* start of the array towards the end, writing the results in-place. */
/* A nice side effect of this is that the pointer values at the */
/* end of the horizontal loop are a fixed offset relative to their */
/* required values for the vertical loop, regardless of the number */
/* of IDCTs performed. This makes the pointer reinitialization */
/* exceptionally cheap. */
/* */
/* Additional section-specific optimization notes are provided below. */
/* */
/* ASSUMPTIONS */
/* The input array must be aligned on a word boundary, and one */
/* extra block's worth of storage must be present after the list */
/* of IDCT input blocks. */
/* */
/* MEMORY NOTE */
/* No bank conflicts occur. The code requires 16 words of stack */
/* space to save Save-On-Entry (SOE) registers, CSR, IRP, and a */
/* spill value. For correct operation, the input array must be */
/* aligned to a word boundary. */
/* */
/* Bank usage on C6201: */
/* */
/* Horiz loop accesses: 1 of 4 banks for 80% of cycles */
/* 4 of 4 banks for 20% of cycles */
/* */
/* Vert loop accesses: 1 of 4 banks for 73% of cycles */
/* 4 of 4 banks for 18% of cycles */
/* 0 of 4 banks for 9% of cycles */
/* */
/* NOTES */
/* This is a LITTLE ENDIAN implementation. */
/* */
/* This code masks interrupts for nearly its entire duration. */
/* Interrupts are locked out for '53 + 168 * num_idcts' cycles. As */
/* a result, the code is interrupt-tolerant, but not interruptible. */
/* */
/* The cosine terms have all been scaled by sqrt(2), so that the */
/* "c4" term is basically an even power of 2. */
/* */
/* The precision of the final results can be changed by modifying */
/* the constants at the top of the code and reassembling. Usually, */
/* modifying the final-shift constants in the "Symbolic Constants" */
/* section is sufficient. */
/* */
/* CYCLES */
/* cycles = 62 + 168 * num_idcts (or 35 cycles if num_idcts == 0) */
/* */
/* For num_idcts == 6, cycles = 1070. */
/* For num_idcts == 24, cycles = 4094. */
/* */
/* CODESIZE */
/* 1344 bytes */
/* */
/* SOURCE */
/* Even-Odd Decomposition IDCT. */
/* ------------------------------------------------------------------------ */
/* Copyright (c) 2002 Texas Instruments, Incorporated. */
/* All Rights Reserved. */
/* ======================================================================== */
/*#ifndef IMG_IDCT_8X8_H_
#define IMG_IDCT_8X8_H_ 1
void IMG_idct_8x8(short idct_data[], unsigned num_idcts);
#endif */
/* ======================================================================== */
/* End of file: img_idct_8x8.h */
/* ------------------------------------------------------------------------ */
/* Copyright (c) 2002 Texas Instruments, Incorporated. */
/* All Rights Reserved. */
/* ======================================================================== */
void IMG_idct_8x8(short *idct_data, unsigned num_dcts)
{
const short c1 = 0x0B19, c2 = 0x0A74, c3 = 0x0968;
const short c5 = 0x0649, c6 = 0x0454, c7 = 0x0235;
const int c4_shift = 11;
const int round1 = 256, round2 = 32768;
const int trunc1 = 9, trunc2 = 16;
short *i_ptr;
// short *o_ptr;
unsigned i, j;
short X0, X1, X2, X3, X4, X5, X6, X7; // Freq domain terms //
int P0, P1, p0, p1, r0, r1; // Even-half temp //
int g0, g1, h1, h0; // Even-half result //
int g2, g3, h3, h2; // Odd-half result //
int x0, x1, x2, x3, x4, x5, x6, x7; // Resulting samples //
int x0t,x1t,x2t,x3t,x4t,x5t,x6t,x7t; // Truncated result //
int x0s,x1s,x2s,x3s,x4s,x5s,x6s,x7s; // Saturated result //
// ---------------------------------------------------------- //
// Avoid running the code if we don't have any IDCTs to do. //
// ---------------------------------------------------------- //
if (!num_dcts) return;
// ---------------------------------------------------------- //
// Set up pointers. //
// ---------------------------------------------------------- //
i_ptr = idct_data + num_dcts * 64 - 8;
// o_ptr = idct_data + num_dcts * 64 + 7;
for (j = 0; j < num_dcts; j++)
{
// -------------------------------------------------------- //
// Perform Horizontal 1-D IDCT on each 8x8 block. Store //
// out the results transposed. //
// -------------------------------------------------------- //
for (i = 0; i < 8; i++)
{
// ---------------------------------------------------- //
// Load the freq-domain coefficients. //
// ---------------------------------------------------- //
X0 = i_ptr[0];
X1 = i_ptr[1];
X2 = i_ptr[2];
X3 = i_ptr[3];
X4 = i_ptr[4];
X5 = i_ptr[5];
X6 = i_ptr[6];
X7 = i_ptr[7];
//i_ptr -= 8; // decr pointer to next row //
// ---------------------------------------------------- //
// Even part of decomp. Add rounding to DC term. //
// ---------------------------------------------------- //
P0 = (((int)X0) << c4_shift) + round1;
P1 = (((int)X4) << c4_shift);
p0 = P0 + P1;
p1 = P0 - P1;
r1 = X2*c6 - X6*c2;
r0 = X2*c2 + X6*c6;
g0 = p0 + r0;
g1 = p1 + r1;
h1 = p1 - r1;
h0 = p0 - r0;
// ---------------------------------------------------- //
// Odd part of decomp. //
// ---------------------------------------------------- //
g2 = (X1*c7 - X3*c5) + (X5*c3 - X7*c1);
g3 = (X1*c5 - X3*c1) + (X5*c7 + X7*c3);
h3 = (X1*c3 - X3*c7) - (X5*c1 + X7*c5);
h2 = (X1*c1 + X3*c3) + (X5*c5 + X7*c7);
// ---------------------------------------------------- //
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -