📄 img_fdct_8x8.h

📁 DM642的mpeg4编码
💻 H
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
/*              r0 = g0 - h0;                   //  Results in Q2.5 //      */
/*              r1 = g1 - h1;                   //  Results in Q2.5 //      */
/*                                                                          */
/*              q1a= (g2 * C4 + 0x8000) >> 16;  //  q1a now in Q2   //      */
/*              s1a= (h2 * C4 + 0x8000) >> 16;  //  s1a now in Q2   //      */
/*              q1 = q1a + q1a;                 //  Results in Q3   //      */
/*              s1 = s1a + s1a;                 //  Results in Q3   //      */
/*                                                                          */
/*              s0 = h3 + g3;                   //  Results in Q3   //      */
/*              q0 = h3 - g3;                   //  Results in Q3   //      */
/*                                                                          */
/*              // ------------------------------------------------ //      */
/*              //  Stage 3                                         //      */
/*              //                                                  //      */
/*              //  Now, the even-half becomes Q0.  This happens    //      */
/*              //  on P0 and P1 because the multiply-by-c4 was     //      */
/*              //  canceled with an upward scaling by sqrt(2),     //      */
/*              //  yielding Q3 intermediate values.  The final     //      */
/*              //  >> 3 leaves these at Q0.  On R0 and R1, this    //      */
/*              //  happens because c2 and c6 are at Q13.5,         //      */
/*              //  yielding Q16 intermediate values.  The final    //      */
/*              //  >> 16 then leaves those values at Q0.           //      */
/*              // ------------------------------------------------ //      */
/*              P0 = ((short)(p0 + p1)) >> 3;   //  Results in Q0   //      */
/*              P1 = ((short)(p0 - p1)) >> 3;   //  Results in Q0   //      */
/*              R1 = (c6 * r1 + c2 * r0 + 0x8000) >> 16; // .. Q0   //      */
/*              R0 = (c6 * r0 - c2 * r1 + 0x8000) >> 16; // .. Q0   //      */
/*                                                                          */
/*              Q1 = q1 + q0;                   //  Results in Q3   //      */
/*              Q0 = q1 - q0;                   //  Results in Q3   //      */
/*              S1 = s1 + s0;                   //  Results in Q3   //      */
/*              S0 = s1 - s0;                   //  Results in Q3   //      */
/*                                                                          */
/*              // ------------------------------------------------ //      */
/*              //  Stage 4                                         //      */
/*              //                                                  //      */
/*              //  Next, the odd-half ends up in Q0. This happens  //      */
/*              //  because our values are in Q3 and our cosine     //      */
/*              //  terms are in Q13, giving us Q16 intermediate    //      */
/*              //  values. The final >> 16 leaves us a Q0 result.  //      */
/*              // ------------------------------------------------ //      */
/*              F0 = P0;                        //  Results in Q0   //      */
/*              F4 = P1;                        //  Results in Q0   //      */
/*              F2 = R1;                        //  Results in Q0   //      */
/*              F6 = R0;                        //  Results in Q0   //      */
/*                                                                          */
/*              F1 = (c7 * Q1 + c1 * S1 + 0x8000) >> 16; // .. Q0   //      */
/*              F7 = (c7 * S1 - c1 * Q1 + 0x8000) >> 16; // .. Q0   //      */
/*              F5 = (c3 * Q0 + c5 * S0 + 0x8000) >> 16; // .. Q0   //      */
/*              F3 = (c3 * S0 - c5 * Q0 + 0x8000) >> 16; // .. Q0   //      */
/*                                                                          */
/*              // ------------------------------------------------ //      */
/*              //  Store the results                               //      */
/*              // ------------------------------------------------ //      */
/*              dct_io_ptr[0] = F0;                                         */
/*              dct_io_ptr[1] = F1;                                         */
/*              dct_io_ptr[2] = F2;                                         */
/*              dct_io_ptr[3] = F3;                                         */
/*              dct_io_ptr[4] = F4;                                         */
/*              dct_io_ptr[5] = F5;                                         */
/*              dct_io_ptr[6] = F6;                                         */
/*              dct_io_ptr[7] = F7;                                         */
/*                                                                          */
/*              // ------------------------------------------------ //      */
/*              //  Update pointer to next FDCT row.                //      */
/*              // ------------------------------------------------ //      */
/*              dct_io_ptr += 8;                                            */
/*          }                                                               */
/*                                                                          */
/*          return;                                                         */
/*      }                                                                   */
/*                                                                          */
/*                                                                          */
/*  TECHNIQUES                                                              */
/*      The loop nest in the vertical pass has been collapsed into a        */
/*      single-level loop.  Both vertical and horizontal loops have         */
/*      been software pipelined.                                            */
/*                                                                          */
/*      For performance, portions of the code outside the loops have been   */
/*      inter-scheduled with the prolog and epilog code of the loops.       */
/*      Also, twin stack-pointers are used to accelerate stack accesses.    */
/*      Finally, pointer values and cosine term registers are reused        */
/*      between the horizontal and vertical loops to reduce the impact of   */
/*      pointer and constant reinitialization.                              */
/*                                                                          */
/*      To save codesize, prolog and epilog collapsing have been performed  */
/*      to the extent that it does not impact performance.                  */
/*                                                                          */
/*      To reduce register pressure and save some code, the horizontal      */
/*      loop uses the same pair of pointer register for both reading and    */
/*      writing.  The pointer increments are on the LDs to permit prolog    */
/*      and epilog collapsing, since LDs can be speculated.                 */
/*                                                                          */
/*  ASSUMPTIONS                                                             */
/*      Stack is aligned to a double-word boundary.                         */
/*                                                                          */
/*  MEMORY NOTE                                                             */
/*      No bank conflicts occur.                                            */
/*                                                                          */
/*      The "fdct_data[]" array must be aligned on a double-word (8 byte)   */
/*      boundary.                                                           */
/*                                                                          */
/*      The code requires 4 words of stack space to save Save-On-Entry      */
/*      (SOE) registers.                                                    */
/*                                                                          */
/*      Nearly every cycle of this function performs at least one           */
/*      memory access.                                                      */
/*                                                                          */
/*  NOTES                                                                   */
/*      This code is fully interruptible.  Interrupts are blocked only      */
/*      branch delay slots.                                                 */
/*                                                                          */
/*      The cosine terms have all been scaled by sqrt(2), so that the       */
/*      "c4" term is basically an even power of 2.                          */
/*                                                                          */
/*      The code is LITTLE ENDIAN.                                          */
/*                                                                          */
/*  CYCLES                                                                  */
/*      cycles = 50 + 76 * num_fdcts                                        */
/*                                                                          */
/*      For num_fdcts =  6, cycles = 506.                                   */
/*      For num_fdcts = 24, cycles = 1848.                                  */
/*                                                                          */
/*  CODESIZE                                                                */
/*      980 bytes.                                                          */
/*                                                                          */
/*  SOURCE                                                                  */
/*      Chen FDCT.                                                          */
/*                                                                          */
/* ------------------------------------------------------------------------ */
/*            Copyright (c) 2003 Texas Instruments, Incorporated.           */
/*                           All Rights Reserved.                           */
/* ======================================================================== */
#ifndef IMG_FDCT_8X8_H_
#define IMG_FDCT_8X8_H_ 1

void IMG_fdct_8x8(short fdct_data[], unsigned num_fdcts);

#endif
/* ======================================================================== */
/*  End of file:  img_fdct_8x8.h                                            */
/* ------------------------------------------------------------------------ */
/*            Copyright (c) 2003 Texas Instruments, Incorporated.           */
/*                           All Rights Reserved.                           */
/* ======================================================================== */
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -