📄 fdct_8x8.h
字号:
/* f6 = dct_io_ptr[6]; */
/* f7 = dct_io_ptr[7]; */
/* */
/* // ---------------------------------------------------- // */
/* // Stage 1: Separate into even and odd halves. // */
/* // ---------------------------------------------------- // */
/* g0 = f0 + f7; h2 = f0 - f7; */
/* g1 = f1 + f6; h3 = f1 - f6; */
/* h1 = f2 + f5; g3 = f2 - f5; */
/* h0 = f3 + f4; g2 = f3 - f4; */
/* */
/* // ---------------------------------------------------- // */
/* // Stage 2 // */
/* // ---------------------------------------------------- // */
/* p0 = g0 + h0; r0 = g0 - h0; */
/* p1 = g1 + h1; r1 = g1 - h1; */
/* q1 = g2; s1 = h2; */
/* */
/* s0a= h3 + g3; q0a= h3 - g3; */
/* q0 = (q0a * c0 + 0x7FFF) >> 16; */
/* s0 = (s0a * c0 + 0x7FFF) >> 16; */
/* */
/* // ---------------------------------------------------- // */
/* // Stage 3 // */
/* // ---------------------------------------------------- // */
/* P0 = p0 + p1; P1 = p0 - p1; */
/* R1 = c6 * r1 + c2 * r0; R0 = c6 * r0 - c2 * r1; */
/* */
/* Q1 = q1 + q0; Q0 = q1 - q0; */
/* S1 = s1 + s0; S0 = s1 - s0; */
/* */
/* // ---------------------------------------------------- // */
/* // Stage 4 // */
/* // ---------------------------------------------------- // */
/* F0 = P0; F4 = P1; */
/* F2 = R1; F6 = R0; */
/* */
/* F1 = c7 * Q1 + c1 * S1; F7 = c7 * S1 - c1 * Q1; */
/* F5 = c3 * Q0 + c5 * S0; F3 = c3 * S0 - c5 * Q0; */
/* */
/* // ---------------------------------------------------- // */
/* // Round and truncate values. // */
/* // // */
/* // Note: F0 and F4 have different rounding since no // */
/* // MPYs have been applied to either term. Also, F0's // */
/* // rounding is slightly different to offset the // */
/* // truncation effects from the horizontal pass (which // */
/* // does not round). // */
/* // ---------------------------------------------------- // */
/* F0r = (F0 + 0x0006) >> 3; */
/* F1r = (F1 + 0x7FFF) >> 16; */
/* F2r = (F2 + 0x7FFF) >> 16; */
/* F3r = (F3 + 0x7FFF) >> 16; */
/* F4r = (F4 + 0x0004) >> 3; */
/* F5r = (F5 + 0x7FFF) >> 16; */
/* F6r = (F6 + 0x7FFF) >> 16; */
/* F7r = (F7 + 0x7FFF) >> 16; */
/* */
/* // ---------------------------------------------------- // */
/* // Store the results // */
/* // ---------------------------------------------------- // */
/* dct_io_ptr[0] = F0r; */
/* dct_io_ptr[1] = F1r; */
/* dct_io_ptr[2] = F2r; */
/* dct_io_ptr[3] = F3r; */
/* dct_io_ptr[4] = F4r; */
/* dct_io_ptr[5] = F5r; */
/* dct_io_ptr[6] = F6r; */
/* dct_io_ptr[7] = F7r; */
/* */
/* // ---------------------------------------------------- // */
/* // Update pointer to next FDCT row. // */
/* // ---------------------------------------------------- // */
/* dct_io_ptr += 8; */
/* } */
/* */
/* return; */
/* } */
/* */
/* */
/* Note: This code guarantees correct operation, even in the case */
/* that 'num_fdcts == 0'. In this case, the function runs for only */
/* 13 cycles (counting 6 cycles of function-call overhead), due to */
/* early-exit code. The early-exit case performs no accesses to the */
/* fdct_data[] array and minimal access to the stack. */
/* */
/* TECHNIQUES */
/* The loop nest in the vertical pass has been collapsed into a */
/* single-level loop. Both vertical and horizontal loops have */
/* been software pipelined. */
/* */
/* For performance, portions of the code outside the loops have been */
/* inter-scheduled with the prolog and epilog code of the loops. */
/* Also, twin stack-pointers are used to accelerate stack accesses. */
/* Finally, pointer values and cosine term registers are reused */
/* between the horizontal and vertical loops to reduce the impact of */
/* pointer and constant reinitialization. */
/* */
/* To save codesize, prolog and epilog collapsing have been performed */
/* to the extent that it does not impact performance. Also, code */
/* outside the loops has been scheduled to pack as tightly into */
/* fetch packets as possible to avoid alignment padding NOPs. */
/* */
/* To reduce register pressure and save some code, the horizontal */
/* loop uses the same pair of pointer register for both reading and */
/* writing. The pointer increments are on the LDs to permit prolog */
/* and epilog collapsing, since LDs can be speculated. */
/* */
/* Additional section-specific optimization notes are provided below. */
/* */
/* ASSUMPTIONS */
/* Stack is aligned to a word boundary. */
/* */
/* MEMORY NOTE */
/* No bank conflicts occur, regardless of fdct_data[]'s alignment. */
/* */
/* The code requires 16 words of stack space to save Save-On-Entry */
/* (SOE) registers, CSR, IRP, and a spill value. */
/* */
/* Bank usage on C6201: 1 of 4 banks for 40% of loop cycles */
/* 2 of 4 banks for 60% of loop cycles */
/* */
/* Nearly every cycle of this function performs at least one */
/* memory access. */
/* */
/* NOTES */
/* This code masks interrupts for nearly its entire duration. */
/* Interrupts are locked out for '40 + 160 * num_fdcts' cycles. As */
/* a result, the code is interrupt-tolerant, but not interruptible. */
/* */
/* The cosine terms have all been scaled by sqrt(2), so that the */
/* "c4" term is basically an even power of 2. */
/* */
/* The code is completely ENDIAN NEUTRAL. */
/* */
/* CYCLES */
/* cycles = 48 + 160 * num_fdcts */
/* */
/* For num_fdcts = 6, cycles = 1008. */
/* For num_fdcts = 24, cycles = 3888. */
/* */
/* CODESIZE */
/* 1216 bytes. */
/* */
/* SOURCE */
/* Chen FDCT. */
/* */
/* ------------------------------------------------------------------------ */
/* Copyright (c) 1999 Texas Instruments, Incorporated. */
/* All Rights Reserved. */
/* ======================================================================== */
#ifndef _FDCT_8X8_H
#define _FDCT_8X8_H
void fdct_8x8(short *dct_data, unsigned num_dcts);
#endif /*_FDCT_8X8_H*/
/* ======================================================================== */
/* End of file: fdct_8x8.h */
/* ------------------------------------------------------------------------ */
/* Copyright (c) 1999 Texas Instruments, Incorporated. */
/* All Rights Reserved. */
/* ======================================================================== */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -