📄 img_fdct_8x8.h
字号:
/* ======================================================================== */
/* */
/* TEXAS INSTRUMENTS, INC. */
/* */
/* NAME */
/* IMG_fdct_8x8 -- 8x8 Block FDCT With Rounding, Endian Neutral */
/* */
/* */
/* REVISION DATE */
/* 28-Oct-1999 */
/* */
/* USAGE */
/* This routine is C callable, and has the following C prototype: */
/* */
/* void IMG_fdct_8x8(short fdct_data[], unsigned num_fdcts) */
/* */
/* The fdct routine accepts a list of 8x8 pixel blocks and performs */
/* FDCTs on each. The array should be laid out identically to */
/* "fdct_data[num_fdcts][8][8]". All operations in this array are */
/* performed entirely in-place. */
/* */
/* Input values are stored in shorts, and may be in the range */
/* [-512,511]. Larger input values may result in overflow. */
/* */
/* This code requires '48 + 160 * num_fdcts' cycles to process */
/* 'num_fdcts' blocks, including function call overhead. When */
/* 'num_fdcts' is zero, an early exit is taken and the function */
/* runs for only 13 cycles (again, including call overhead). */
/* */
/* DESCRIPTION */
/* The IMG_fdct_8x8 function implements a Chen FDCT. Output values are */
/* rounded, providing improved accuracy. Input terms are expected */
/* to be signed 11Q0 values, producing signed 15Q0 results. (A */
/* smaller dynamic range may be used on the input, producing a */
/* correspondingly smaller output range. Typical applications */
/* include processing signed 9Q0 and unsigned 8Q0 pixel data, */
/* producing signed 13Q0 or 12Q0 outputs, respectively.) No */
/* saturation is performed. */
/* */
/* Note: This code guarantees correct operation, even in the case */
/* that 'num_fdcts == 0'. In this case, the function runs for only */
/* 13 cycles (counting 6 cycles of function-call overhead), due to */
/* early-exit code. The early-exit case performs no accesses to the */
/* fdct_data[] array and minimal access to the stack. */
/* */
/* TECHNIQUES */
/* The loop nest in the vertical pass has been collapsed into a */
/* single-level loop. Both vertical and horizontal loops have */
/* been software pipelined. */
/* */
/* For performance, portions of the code outside the loops have been */
/* inter-scheduled with the prolog and epilog code of the loops. */
/* Also, twin stack-pointers are used to accelerate stack accesses. */
/* Finally, pointer values and cosine term registers are reused */
/* between the horizontal and vertical loops to reduce the impact of */
/* pointer and constant reinitialization. */
/* */
/* To save codesize, prolog and epilog collapsing have been performed */
/* to the extent that it does not impact performance. Also, code */
/* outside the loops has been scheduled to pack as tightly into */
/* fetch packets as possible to avoid alignment padding NOPs. */
/* */
/* To reduce register pressure and save some code, the horizontal */
/* loop uses the same pair of pointer register for both reading and */
/* writing. The pointer increments are on the LDs to permit prolog */
/* and epilog collapsing, since LDs can be speculated. */
/* */
/* Additional section-specific optimization notes are provided below. */
/* */
/* ASSUMPTIONS */
/* Stack is aligned to a word boundary. */
/* */
/* MEMORY NOTE */
/* No bank conflicts occur, regardless of fdct_data[]'s alignment. */
/* */
/* The code requires 16 words of stack space to save Save-On-Entry */
/* (SOE) registers, CSR, IRP, and a spill value. */
/* */
/* Bank usage on C6201: 1 of 4 banks for 40% of loop cycles */
/* 2 of 4 banks for 60% of loop cycles */
/* */
/* Nearly every cycle of this function performs at least one */
/* memory access. */
/* */
/* NOTES */
/* This code masks interrupts for nearly its entire duration. */
/* Interrupts are locked out for '40 + 160 * num_fdcts' cycles. As */
/* a result, the code is interrupt-tolerant, but not interruptible. */
/* */
/* The cosine terms have all been scaled by sqrt(2), so that the */
/* "c4" term is basically an even power of 2. */
/* */
/* The code is completely ENDIAN NEUTRAL. */
/* */
/* CYCLES */
/* cycles = 48 + 160 * num_fdcts */
/* */
/* For num_fdcts = 6, cycles = 1008. */
/* For num_fdcts = 24, cycles = 3888. */
/* */
/* CODESIZE */
/* 1216 bytes. */
/* */
/* SOURCE */
/* Chen FDCT. */
/* */
/* ------------------------------------------------------------------------ */
/* Copyright (c) 2002 Texas Instruments, Incorporated. */
/* All Rights Reserved. */
/* ======================================================================== */
/*#ifndef IMG_FDCT_8X8_H_
#define IMG_FDCT_8X8_H_ 1
void IMG_fdct_8x8(short fdct_data[], unsigned num_fdcts);
#endif */
/* ======================================================================== */
/* End of file: img_fdct_8x8.h */
/* ------------------------------------------------------------------------ */
/* Copyright (c) 2002 Texas Instruments, Incorporated. */
/* All Rights Reserved. */
/* ======================================================================== */
void IMG_fdct_8x8(short *dct_data, unsigned num_fdcts)
{
// -------------------------------------------------------- //
// Set up the cosine coefficients c0..c7. //
// -------------------------------------------------------- //
const unsigned short c1 = 0x2C62, c3 = 0x25A0;
const unsigned short c5 = 0x1924, c7 = 0x08D4;
const unsigned short c0 = 0xB505, c2 = 0x29CF;
const unsigned short c6 = 0x1151;
// -------------------------------------------------------- //
// Intermediate calculations. //
// -------------------------------------------------------- //
short f0, f1, f2, f3,
f4, f5, f6, f7; // Spatial domain samples. //
int g0, g1, h0, h1,
p0, p1; // Even-half intermediate. //
short r0, r1; // Even-half intermediate. //
int P0, P1, R0, R1; // Even-half intermediate. //
short g2, g3, h2, h3; // Odd-half intermediate. //
short q0a,s0a,q0, q1,
s0, s1; // Odd-half intermediate. //
short Q0, Q1, S0, S1; // Odd-half intermediate. //
int F0, F1, F2, F3,
F4, F5, F6, F7; // Freq. domain results. //
int F0r,F1r,F2r,F3r,
F4r,F5r,F6r,F7r; // Rounded, truncated results. //
// -------------------------------------------------------- //
// Input and output pointers, loop control. //
// -------------------------------------------------------- //
unsigned i, j;
short *dct_io_ptr;
// -------------------------------------------------------- //
// Outer vertical loop -- Process each 8x8 block. //
// -------------------------------------------------------- //
dct_io_ptr = dct_data;
for (i = 0; i < num_fdcts; i++)
{
// ---------------------------------------------------- //
// Perform Vert 1-D FDCT on columns within each block. //
// ---------------------------------------------------- //
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -