⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cfftrad2x2non_scaled.asm

📁 ADI BF DSP的FFT汇编优化后的代码
💻 ASM
📖 第 1 页 / 共 2 页
字号:
                            // instant)
MIDDLE_STAGE:
    P1 = P1 >> 1;              
    CC = P1 <= 2;
    IF CC JUMP LAST_STAGE;
    R0 = B2;
    R0 = R0 << 2 || R1 = [SP + 60];
    M0 = R0;                // Offset for the I row Butterfly inputs (4*2^stage)
    R2 = R0.L * R1.L (IS) || [SP + 8] = P1; 
    M1 = R2;                // Offset for the II row Butterfly inputs 
                            // (4*N*2^stage)
    R2 <<= 1;
    B0 = R2;                // Offset in bytes for the first element of every 
                            // group
    R2 = R0 << 1;
    R1 = R1 << 2 || [SP + 4] = R2;
                            // 4*(2*2^stage) 
    R2 = R1 - R0;           // Offset to bring the butterfly pointer to the 
                            // start ...
    M2 = R2;                // of the next row 4*(N - 2^stage)
    R2 += -4;
    M3 = R2;
    P2 = P1 >> 1;           // group count (N/2*2^stage)
    P0 = P1 << 1;           // Twiddle factor offset  4*(N/2*2^stage)
    R2 = P2;                // group count
    R3 = R3 - R3 (NS) || [SP + 16] = R2;
    P1 = B2;
    
REP_GROUP_1:
    CC = R2 <= R3;          // Outer group loop
    IF CC JUMP GROUP1_END;
    P3 = -1;                    
    
REP_GROUP_2:
    P3 += 1;                // Inner group loop
    CC = P2 <= P3;
    IF CC JUMP GROUP2_END;
    R0 = B0;
    R0 = R0.L*R3.L (IS) || R1 = [SP + 4];
    R4 = P3;
    R1 = R1.L * R4.L (IS) || P4 = [SP];
    R0 = R0 + R1 (NS) || [SP + 12] = P3;
    R1 = B3;
    R0 = R0 + R1;
    I0 = R0;
    I1 = R0;
    I3 = R0;

    LSETUP(ST_BFLY_OUT, END_BFLY_OUT) LC0 = P1;
                            // Loop count - 2^stage 
    I2 = I0;
    R0 = [I1 ++ M1] || I2 -= M2;
    I3 += M1;           
    I3 -= M2 || NOP;
ST_BFLY_OUT:
        P3 = P4;
        R6 = [I1 ++ M0] || R0 = [P4 ++ P0];
        R5 = R5 << 0 || R7 = [I1++] || P5 = [SP];
        A1 = R6.L * R0.H,A0 = R6.L * R0.L || R4 = [I0 ++ M0] || R2 = [P3 ++ P0];
                            // Twiddle factor mult. for the ... 
        R6.H = (A1 += R6.H * R0.L), R6.L = (A0 -= R6.H * R0.H) || R5 = [I0++] 
        || I1 -= M0;
                            // second and third butterfly inputs 
        A1 = R7.L * R2.H, A0 = R7.L * R2.L || I0 -= M0 || R1 = [I3 ++ M2];
                            // Dummy fetch to increment output pointer 
        R7.H =(A1 += R7.H * R2.L), R7.L = (A0 -= R7.H * R2.H) || R1 = [P5 ++ P0]
        || R2 = [I2 ++ M2];
                            // Dummy fetch to increment the twiddle factor 
                            // pointer 
        LSETUP(ST_BFLY_IN, END_BFLY_IN) LC1 = P1;
                            // Loop count - 2^stage 
ST_BFLY_IN:
            R4 = R4 +|+ R5, R5 = R4 -|- R5 (ASR);
                            // Compute butterfly outputs 
            R6 = R6 +|+ R7, R7 = R6 -|- R7 (ASR) || NOP;
            R4 = R4 +|+ R6, R2 = R4 -|- R6 (ASR) || R6 = [I1 ++ M0] 
            || R1 = [P5 ++ P0]; 
            R5 = R5 +|+ R7, R7 = R5 -|- R7 (ASR) || [I3 ++ M0] = R2 
            || R2 = [P3 ++ P0];
            A1 = R6.L * R0.H, A0 = R6.L * R0.L || [I3++] = R7 || I2 += M0;

            R6.H = (A1 += R6.H * R0.L), R6.L = (A0 -= R6.H * R0.H) 
            || R7 = [I1++] || I0 += M0;
            A1 = R7.L * R2.H, A0 = R7.L * R2.L || [I2] = R5 || I2 -= M0;
            R7.H = (A1 += R7.H * R2.L), R7.L = (A0 -= R7.H * R2.H) || R5 = [I0]
            || I0 -= M0;
            A1 = R5.L * R1.H, A0 = R5.L * R1.L || [I2++] = R4 || I3 -= M0;
                            // Twiddle factor mult. for the ... 
END_BFLY_IN:
            R5.H = (A1 += R5.H * R1.L), R5.L = (A0 -= R5.H * R1.H) 
            || R4 = [I0++] || I1 -= M0;
                            // second butterfly input 
END_BFLY_OUT:
        I1 += M3 || R1 = [I0 ++ M3];
    
    P3 = [SP + 12];
    JUMP REP_GROUP_2;
    
GROUP2_END:
    R2 = [SP + 16];
    R3 += 1;
    JUMP REP_GROUP_1;
    
GROUP1_END:
    R0 = B2;
    R0 = R0 << 1 || P1 = [SP + 8];
    B2 = R0;
    JUMP MIDDLE_STAGE;
    
/*********************** LAST STAGE ********************************************
    This section does the butterfly computation for the last stage of the FFT. 
The reason for doing this stage separately rather than with all other stages is 
that the output should not be scaled here unlike the middle stages
*******************************************************************************/
    
LAST_STAGE: 
    I0 = B3;                // I0 and I1 are used for reading the input ...
    I1 = B3;                // to the butterfly
    I3 = B3;                // I2 and I3 are used for writing the output
    R0 = B2;                                        
    R0 = R0 << 2 || R1 = [SP + 60];
    M0 = R0;                // 4*(2^stage)
    R2 = R0.L * R1.L (IS) || P4 = [SP];
    M1 = R2;                // 4*N*2^stage
    P0 = P1 << 1;           // Twiddle factor offset
    P1 = B2;                // Loop count
    R1 <<= 2;
    R2 = R1 - R0 (NS) || I1 += M1;
    M2 = R2;                // 4*(N-2^stage)
    R2 += -4;
    M3 = R2;
    I2 = I0;
    I2 -= M2 || R0 = [I3 ++ M1];
    I3 -= M2 || NOP;

    LSETUP(ST_LAST_STAGE_OUT, END_LAST_STAGE_OUT) LC0 = P1;
                            // Loop count - 2^stage 
ST_LAST_STAGE_OUT:  
        P3 = P4;
        R6 = [I1 ++ M0] || R0 = [P4 ++ P0];
        R5 = R5 << 0 || R7 = [I1++] || P5 = [SP];
        A1 = R6.L * R0.H, A0 = R6.L * R0.L || R4 = [I0 ++ M0] 
        || R2 = [P3 ++ P0]; // Twiddle factor mult. for the ... 
        R6.H = (A1 += R6.H * R0.L), R6.L = (A0 -= R6.H * R0.H) || R5 = [I0++] 
        || I1 -= M0;        // second and third butterfly inputs 
        A1 = R7.L * R2.H, A0 = R7.L * R2.L || I0 -= M0 || R1 = [I3 ++ M2];
                            // Dummy fetch to increment output pointer 
        R7.H =(A1 += R7.H * R2.L), R7.L = (A0 -= R7.H * R2.H) || R1 = [P5 ++ P0]
        || R2 = [I2 ++ M2]; // Dummy fetch to increment the twiddle factor 
                            // pointer 
        LSETUP(ST_LAST_STAGE_IN, END_LAST_STAGE_IN) LC1 = P1;
                            // Loop count - 2^stage 
ST_LAST_STAGE_IN:
            R4 = R4 +|+ R5, R5 = R4 -|- R5 (S);
                            // Compute butterfly outputs 
            R6 = R6 +|+ R7, R7 = R6 -|- R7 (S) || NOP;
            R4 = R4 +|+ R6, R2 = R4 -|- R6 (S) || R6 = [I1 ++ M0] 
            || R1 = [P5 ++ P0]; 
            R5 = R5 +|+ R7, R7 = R5 -|- R7 (S) || [I3 ++ M0] = R2 
            || R2 = [P3 ++ P0];
            A1 = R6.L * R0.H, A0 = R6.L * R0.L || [I3++] = R7 || I2 += M0; 
        
            R6.H = (A1 += R6.H * R0.L), R6.L = (A0 -= R6.H * R0.H) 
            || R7 = [I1++] || I0 += M0;
            A1 = R7.L * R2.H, A0 = R7.L * R2.L || [I2] = R5 || I2 -= M0;
            R7.H = (A1 += R7.H * R2.L), R7.L = (A0 -= R7.H * R2.H) || R5 = [I0] 
            || I0 -= M0;
            A1 = R5.L * R1.H, A0 = R5.L * R1.L || [I2++] = R4 || I3 -= M0;
                            // Twiddle factor mult. for the ... 
END_LAST_STAGE_IN:
            R5.H = (A1 += R5.H * R1.L), R5.L = (A0 -= R5.H * R1.H)
            || R4 = [I0++] || I1 -= M0;
                            // second butterfly input 
END_LAST_STAGE_OUT:
        I1 += M3 || R1 = [I0 ++ M3];
    
FINISH:
    SP += 20;
    (R7:4,P5:3) = [SP++];   // Retrieve Call preserved registers
    RTS;
    NOP;                    //to avoid one stall if LINK or UNLINK happens to be
                            //the next instruction after RTS in the memory.

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -