📄 r8x8invdct_ieee.asm

📁 ADI BF DSP的FFT汇编优化后的代码
💻 ASM
📖 第 1 页 / 共 2 页
字号:
上一页 12
        R7 = (A1 -= R7.H * R6.L), R6 = (A0 -= R7.L * R6.L) (IS) || I2 += M1;
    
/*
*  The following 8 instructions do the Stage 1 computation and writes the data 
    to the buffer in transposed way.
*  Mean while it reads the input data for next iteration.
*  The following operations are performed.
*
*  Element 0 = Element 0 + Element 7.
*  Element 7 = Element 7 - Element 0.
*  Element 2 = Element 2 + Element 5.
*  Element 5 = Element 2 - Element 5.
*  Element 1 = Element 6 - Element 1.
*  Element 6 = Element 6 + Element 1.
*  Element 3 = Element 3 - Element 4.
*  Element 4 = Element 3 + Element 4.
*/
    
        R5.H = R4 + R2 (RND12);
                            // Compute 0
        R5.L = R4 - R2 (RND12)  ||  R4 = [SP + 36];
                            // Compute 7 
        R2.H = R1 + R7 (RND12)  || W[P0++P3] = R5.H;
                            // Compute 2, Write 0 
        R2.L = R1 - R7 (RND12)  || W[P1++P4] = R5.L || R7 = [I3++];
                            // Compute 5, Write 7 
        R5.H = R0 - R3 (RND12)  || W[P0++P3] = R2.H || R1.L = W[I1++];
                            // Compute 1, Write 2 
        R5.L = R0 + R3 (RND12)  || W[P1++P4] = R2.L || R0.H = W[I0++];
                            // Compute 6, Write 5 
        R3.H = R4 - R6 (RND12)  || W[P0++P3] = R5.H || R0.L = W[I2++];
                            // Compute 3, Write 1 
        R3.L = R4 + R6 (RND12)  || W[P1++P4] = R5.L || R1.H = W[I0++];
                            // Compute 4, Write 6 
    
/*
*  This is the same operation as that of initial two operation before the loop.
*  The element 3 and 4 are written
*/
    
        A1 = R7.H * R0.H, A0 = R7.H * R0.H (IS) || W[P0++P2] = R3.H 
        || R1.H = W[I0--];
ROW1_END:
        R3 = (A1 += R7.H * R0.L), R2 = ( A0 -= R7.H * R0.L) (IS) 
        || W[P1++P5] = R3.L || R7 = [I3++];
    
    I0 = B2;                // I0 points to Input Element (0, 0)
    I2 = B2;                // I2 points to Input Element (0, 0)
    I2 += M3 || R0.H = W[I0];
                            // Element 0 is read in R0.H 
    I1 = I2;                // I1 points to input Element (0, 6)
    I1 += 4  || R0.L = W[I2++];
                            // I2 points to input Element (0, 4) 
                            // Element 4 is read in R0.L
    P2 = 8 (X);
    I3 = B3;                // I3 points to Coefficients
    P0 = B0;                // P0 points to array Element (0, 0) for writing 
                            // output
    P1 = B0;
    R7 = [I3++];            // R7.H = C4 and R7.L = C4
    NOP;
    
/*
*   The following operation is done in 2 instructions.
*   A1 = Element 0 * cos(pi/4) 
*   A0 =  Element 0 * cos(pi/4)
*   A1 = A1 + Element 4 * cos(pi/4)
*   A0 = A0 - Element 4 * cos(pi/4)
*   At the same time the value of Element 2 and 6 are read in R1.H and R1.L 
respectively.
*   The coefficient C2 and C6 are read in R7.H and R7.L
*   In the end R3 holds 0th and R2 holds 4th Element.
*/
    
    A1 = R7.H * R0.H, A0 = R7.H * R0.H (IS) || I0 += 4  || R1.L = W[I1++];
    R3 = (A1 += R7.H * R0.L), R2 = ( A0 -= R7.H * R0.L) (IS) || R1.H = W[I0--] 
    || R7 = [I3++];
    
    LSETUP (ROW2_START, ROW2_END) LC0 = P2;
                            //Loop for 8 rows 
    P2 = 112 (X); 
    P1 = P1 + P2;
    P2 = -94(X);
    
ROW2_START:
/*
*   The following two instructions do -
*   A1 = Element 2 * cos(3pi/8) 
*   A0 =  Element 2 * cos(pi/8)
*   A1 = A1 - Element 6 * cos(pi/8)
*   A0 = A0 + Element 6 * cos(3pi/8)
*   Element 1 and 7 are read in R5.H and R5.L.
*   Coefficients C1 and C7 are in register R7.H and R7.L respectively.
*   In the end R1 holds 2nd and R0 hols 6th element.
*/
    
        A1= R7.L * R1.H, A0 = R7.H * R1.H (IS)  || I0 += 4  || R5.H = W[I0];
        R1 = (A1 -= R7.H * R1.L) , R0 = (A0 += R7.L * R1.L) (IS)
        || R5.L = W[I1--] || R7 = [I3++];
    
/*
*   The following two instructions do -
*   Element 0 = Element 0 + Element 6.
*   Element 4 = Element 4 + Element 2.
*   Element 2 = Element 4 - Element 2.
*   Element 6 = Element 0 - Element 6.
*   The register R3 is saved to make it free. Element 3 is read in R6.L
*   At this stage Element 0 is in R3, 4 is in R2, 2 is in R1 and 6 is in R0
*/
    
        R3 = R3 + R0, R0 = R3 - R0;     
        R2 = R2 + R1, R1 = R2 - R1 || [SP + 32] = R3 || R6.L = W[I0--];
    
/*
*  In the following 8 instructions the Stage 4, 3 and 2 computation of butterfly
*  for elements 1, 5, 3 and 7 has been combined.
*  R5.H and R5.L has data 1 and 7 respectively.
*  R6.H and R6.L has data 5 and 3 respectively.
*  For the first two instructions R7.H has C1 and R7.L has C7.
*  For the next four instructions R7.H has C3 and R7.L has C5.
*  For the last two instructions R7.H has C1 and R7.L has C7 again.
*  After combining the stage 4, 3, and 2 the final four equations are
*  obtained. These give the output of Stage 2 straight way.
*
*  Element 1 = C7 * Element1 - C1 * Element 7 + C3 * Element 5 - C5 * Element 3.
*  Element 7 = C1 * Element1 + C7 * Element 7 + C5 * Element 5 + C3 * Element 3.
*  Element 5 = C5 * Element1 + C3 * Element 7 + C7 * Element 5 - C1 * Element 3.
*  Element 3 = C3 * Element1 - C5 * Element 7 - C1 * Element 5 - C7 * Element 3.
*
*  The first 4 instructions implement the first two equations. The next four
*  instructions implement last two equations.
*  In the last the Element 1 is in R3, 7 in R2, 5 in R7 and 3 in R6.
*  Mean while the address offsets are modified.
*/
    
        A1  = R7.L * R5.H, A0  = R7.H * R5.H (IS) || [SP + 36] = R2
        || R6.H = W[I2--];
        A1 -= R7.H * R5.L, A0 += R7.L * R5.L (IS) || I0 -= 4 || R7 = [I3++];
        A1 += R7.H * R6.H, A0 += R7.L * R6.H (IS) || I0 += M1;
        R3 = (A1 -= R7.L * R6.L), R2 = (A0 += R7.H * R6.L) (IS);                
        A1  = R7.L * R5.H, A0  = R7.H * R5.H (IS)  || R4 = [SP + 32];
        A1 += R7.H * R5.L, A0 -= R7.L * R5.L (IS)   || I1 += M1 || R7 = [I3++];
        A1 += R7.L * R6.H, A0 -= R7.H * R6.H (IS);
        R7 = (A1 -= R7.H * R6.L), R6 = (A0 -= R7.L * R6.L) (IS) || I2 += M1;
    
/*
*  The following 8 instructions do the Stage 1 computation and writes the data 
to the 
*  buffer in transposed way.
*  Mean while it reads the input data for next iteration.
*  The following operations are performed.
*
*  Element 0 = Element 0 + Element 7.
*  Element 7 = Element 7 - Element 0.
*  Element 2 = Element 2 + Element 5.
*  Element 5 = Element 2 - Element 5.
*  Element 1 = Element 6 - Element 1.
*  Element 6 = Element 6 + Element 1.
*  Element 3 = Element 3 - Element 4.
*  Element 4 = Element 3 + Element 4.
*/
    
        R5.H = R4 + R2 (RND20);
                            // Compute 0
        R5.L = R4 - R2 (RND20)  ||  R4 = [SP + 36];
                            // Compute 7 
        R2.H = R1 + R7 (RND20)  || W[P0++P3] = R5.H;
                            // Compute 2, Write 0 
        R2.L = R1 - R7 (RND20)  || W[P1++P4] = R5.L || R7 = [I3++];
                            // Compute 5, Write 7 
        R5.H = R0 - R3 (RND20)  || W[P0++P3] = R2.H || R1.L = W[I1++];
                            // Compute 1, Write 2 
        R5.L = R0 + R3 (RND20)  || W[P1++P4] = R2.L || R0.H = W[I0++];
                            // Compute 6, Write 5 
        R3.H = R4 - R6 (RND20)  || W[P0++P3] = R5.H || R0.L = W[I2++];
                            // Compute 3, Write 1 
        R3.L = R4 + R6 (RND20)  || W[P1++P4] = R5.L || R1.H = W[I0++];
                            // Compute 4, Write 6 
    
/*
*  This is the same operation as that of initial two operation before the loop.
*  The element 3 and 4 are written
*/
    
        A1 = R7.H * R0.H, A0 = R7.H * R0.H (IS) || W[P0++P2] = R3.H 
        || R1.H = W[I0--];
ROW2_END: 
        R3 = (A1 += R7.H * R0.L), R2 = ( A0 -= R7.H * R0.L) (IS) 
        || W[P1++P5] = R3.L || R7 = [I3++];
    
TERMINATE:
    (R7:4,P5:3)=[SP++];     //Pop the registers before returning.
    RTS;                    //Return.
    NOP;                    //to avoid one stall if LINK or UNLINK happens to be
                            //the next instruction after RTS in the memory.
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -