⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cr2fftn_outplace.asm

📁 ADI BF DSP的FFT汇编优化后的代码
💻 ASM
📖 第 1 页 / 共 2 页
字号:
    
    I0 += M0 (BREV) || R2 = [I0];
                            //R2 reads the data 
    I0 += M0 (BREV)|| R3 = [I0];
    I0 += M0 (BREV)|| R4 = [I0];       
    
    CC = P0 <= 0 (IU);
    If CC Jump ESC_12;
    
    lsetup(Stage12_strt, Stage12_end) LC0 = P0; 
Stage12_strt:
        R2 = R2 +|+ R3,  R3 = R2 -|- R3 (ASR) || I0 += M0 (BREV) || R5 = [I0];
        R4 = R4 +|+ R5, R5 = R4 -|- R5 (ASR, CO)|| I0 += M0 (BREV) || R0 = [I0];
        R2 = R2 +|+ R4, R4 = R2 -|- R4 (ASR) || I0 += M0 (BREV) || R1 = [I0];
        R5 = R3 +|- R5, R3 = R3 -|+ R5 (ASR) || I0 += M0 (BREV) || R6 = [I0];
    
        R0 = R0 +|+ R1, R1 = R0 -|- R1 (ASR) || R7 = [I0]  || [I2++] = R2;
        R6 = R6 +|+ R7, R7 = R6 -|- R7 (ASR, CO)|| I0 += M0 (BREV) 
        || [I2++] = R3;
        R0 = R0 +|+ R6, R6 = R0 -|- R6 (ASR) || R2 = [I0] || [I2++] = R4;
        R7 = R1 +|- R7, R1 = R1 -|+ R7 (ASR) || I0 += M0 (BREV) || [I2++] = R5;
    
        R3 = [I0] || [I2++] = R0;
        I0 += M0 (BREV) || [I2++] = R1;
        R4 = [I0] || [I2++] = R6;        
Stage12_end:
        I0 += M0 (BREV) || [I2++] = R7;
    
ESC_12: 
    R2 = R2 +|+ R3,  R3 = R2 -|- R3 (ASR)       || I0 += M0 (BREV)|| R5 = [I0];
    R4 = R4 +|+ R5, R5 = R4 -|- R5 (ASR, CO)    || I0 += M0 (BREV)|| R0 = [I0];
    R2 = R2 +|+ R4, R4 = R2 -|- R4 (ASR)  || I0 += M0 (BREV)|| R1 = [I0];
    R5 = R3 +|- R5, R3 = R3 -|+ R5 (ASR)  || I0 += M0 (BREV)|| R6 = [I0];
    
    R0 = R0 +|+ R1, R1 = R0 -|- R1 (ASR)  || [I2++] = R2    || R7 = [I0];
    R6 = R6 +|+ R7, R7 = R6 -|- R7 (ASR, CO) || [I2++] = R3;
    R0 = R0 +|+ R6, R6 = R0 -|- R6 (ASR)  || [I2++] = R4;
    R7 = R1 +|- R7, R1 = R1 -|+ R7 (ASR)  || [I2++] = R5;
    
    [I2++] = R0;
    [I2++] = R1;
    [I2++] = R6;
    [I2++] = R7;
    
/*
*  Here the register values are modified for the execution of middle stage. 
This stage is escaped through Esc_mid label if the input size is equal to 8.
*/
    
    R1 = P1;                //R1 = wst.
    R1 = R1 << 2;           //R1 = wst * 4
    P3 = 4;                 //P3 holds the number of lines in each butterfly at 
                            // stage 3.
    R7 = P5;  
    R7.L = R1.L * R7.L (IS);//R7 = wst * 4  * twiddle offset
    R2 = P4;  
    R3 = 8;
    CC = R2 == R3;          //If input array size is equal to 8, then go to last
                            // stage, because
    If CC Jump Esc_mid;     //middle stages does n't occur.
    R0 = 0;                 //Counter for number of stages.
    
Find_m:                     //The computation of number of stages is done here.
    R2 >>= 1;
    R0 += 1;
    CC = R2 == R3;
    If !CC Jump Find_m (BP);//R0 holds the value of m-3 and is never free
    
/************************* Implementation of Middle Part **********************/
    
/*
* First of all, a loop for the number of stages - 3 is set. It is a general 
  implementation of butterfly computation. The first nested loop is set for half
  of the number of butterflies at each stage. The second nested loop is set for 
  the number of lines in each butterfly. The computation is done on the output 
  buffer. The output is stored after dividing by 2 for scaling purpose. In one 
  loop two butterfly data are read and processed.
* The input is read from output buffer and after the computation at this stage 
  the output is written back to  output buffer.
*/
                            //B2 Input, B0 output.
Loopfor_m:
    I0 = B0;
    I2 = B2;
    I3 = B2;                //Address of output array.
    P0 = P3 << 2; 
    M2 = P0;                //M2 holds the offset of counterpart line.
    P0 += -4;
    M0 = P0;
    
    P5 = P5 >> 1;
    R7 = R7 >>> 1 || I3 += M2;
    M1 = R7;
    P3 += -1;
    lsetup(Loop1_strt, Loop1_end) LC0 = P5;
                            //Loop is set for number of the butterfly 
Loop1_strt:                                     
        I1 = B3;            //Address of twiddle factor.
        R2 = [I2++];
        R3 = [I1++M1]  || R4 = [I3++];
    
        lsetup(Loop2_strt, Loop2_end) LC1 = P3;
                            //Loop is set for the number of lines 
Loop2_strt:
            R5 = R2 +|+ R4, R6 = R2 -|- R4 (ASR) || R3 = [I1++M1] 
            || R4 = [I3++];
            A1 = R3.L * R4.H, A0 = R3.L * R4.L  || [I0++M2] = R5 || R2 = [I2++];
Loop2_end:  R4.H = (A1 += R3.H * R4.L), R4.L = ( A0 -= R3.H * R4.H)  
            || I0 -= M0 || [I0] = R6;
    
        R5 = R2 +|+ R4, R6 = R2 -|- R4 (ASR)    || I2 += M2;
        I3 += M2  || [I0++M2] = R5;   
Loop1_end:
        [I0++] = R6;
    
    P3 += 1; 
    P3 = P3 << 1;
    R0 += -1;
    B1 = B0;
    B0 = B2;
    B2 = B1;
    CC = R0 == 0;
    If !CC Jump Loopfor_m (BP);
                            //Loop for m. 
    
/************************* Implementation of Last Part *********************/
    
/*
* This part implements the last stage of the butterfly. The label Esc_mid is 
  used when the size of input data is 8. In this case the computation of middle 
  stages have to be escaped. The increment in the twiddle factor offset is just 
  1. In the last stage there is only one butterfly. The loop is set for n/4. 4 
  data are read and processed at the same time.
*  
* The input is read from the output buffer and after the computation, the final 
  output is written to output buffer.
*/
                            //B2 Input , B0 Output.
Esc_mid:
    I0 = B0;
    I2 = B2;
    I3 = B2;                //Address of output array.
    P0 = P3 << 2; 
    M2 = P0;                //M2 holds the offset of counterpart line.
    P0 += -4;
    M0 = P0;
    R7 = R7 >>> 1 || I3 += M2; 
    M1 = R7;
    P3 += -1;
    I1 = B3;                //Address of twiddle factor.
    R2 = [I2++];
    R3 = [I1++M1]  || R4 = [I3++];
    
    lsetup(Last_strt, Last_end) LC1 = P3;
                            //Loop is set for the number of lines 
Last_strt:
        R5 = R2 +|+ R4, R6 = R2 -|- R4 (ASR) || R3 = [I1++M1]  || R4 = [I3++];
        A1 = R3.L * R4.H, A0 = R3.L * R4.L|| [I0++M2] = R5  || R2 = [I2++]; 
Last_end:
        R4.H = (A1 += R3.H * R4.L), R4.L = ( A0 -= R3.H * R4.H) || I0 -= M0 
        || [I0] = R6;
    
    R5 = R2 +|+ R4, R6 = R2 -|- R4 (ASR);
    [I0++M2] = R5;   
    [I0] = R6;
    
    R0 = B0;
    R1 = M3;
    CC = R0 == R1;
    If CC Jump Terminate;
    I0 = B0;
    I1 = M3;
    R0 = [I0++];
    lsetup(Copy_strt, Copy_strt) LC0 = P4;
Copy_strt:
        [I1++] = R0 || R0 = [I0++];
    
    Terminate:
    (R7:4, P5:3) = [SP++];  //Pop the registers before returning.
    RTS;                    //Return.
    NOP;                    // If link or unlink happens to be the next 
                            // instruction after RTS in memory, RTS takes one 
                            // extra cycle than expected. NOP is put to avoid 
                            // this.

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -