📄 fftspx_h.asm

📁 TMS320bbs（源程序）的c67xfiles文件。用于在CCS2.0集成编译环境下实现TI的c67x系列DSP开发。是用DSP汇编语言
💻 ASM
📖 第 1 页 / 共 4 页
字号:
        .asg    B4,         B_h2_0       ;index into data N/4
        .asg    A6,         A_h2_0       ;index into data N/4
        .asg    B4,         B_p0         ;intermediate butterfly calculation
        .asg    B3,         B_p1         ;intermediate butterfly calculation
        .asg    B1,         B_p00        ;new x[N  +i*2  ]
        .asg    B1,         B_p2         ;intermediate butterfly calculation
        .asg    B6,         B_p3         ;intermediate butterfly calculation
        .asg    B3,         B_p20        ;new x[N  +i*2+1]
        .asg    A6,         A_p4         ;intermediate butterfly calculation
        .asg    A0,         A_p5         ;intermediate butterfly calculation
        .asg    A11,        A_p40        ;new x[N/2  +i*2  ]
        .asg    A7,         A_p6         ;intermediate butterfly calculation
        .asg    A3,         A_p7         ;intermediate butterfly calculation
        .asg    A10,        A_p60        ;new x[N/2  +i*2+1]
        .asg    A3,         A_p8         ;intermediate butterfly calculation
        .asg    B12,        B_p9         ;intermediate butterfly calculation
        .asg    A14,        A_p80        ;new x[3*N/2 +i*2+1]
        .asg    A3,         A_pa         ;intermediate butterfly calculation
        .asg    B10,        B_pb         ;intermediate butterfly calculation
        .asg    A0,         A_pa0        ;new x[3*N/2 +i*2+1]
        .asg    B2,         B_return     ;early return decision variable
* ============================ PIPE LOOP PROLOG ============================== *
        MV    .L1X  B_SP,      A_SP                  ;copy stack pointer
||      STW   .D2T1 A15,       *-B_SP[1]             ;save A15

        STW   .D2T2 B14,       *-B_SP[2]             ;save B14
||      STW   .D1T1 A14,       *-A_SP[3]             ;save A14

        STW   .D2T2 B13,       *-B_SP[4]             ;save B13
||      STW   .D1T1 A13,       *-A_SP[5]             ;save A13

        STW   .D2T2 B12,       *-B_SP[6]             ;save B12
||      STW   .D1T1 A12,       *-A_SP[7]             ;save A12

        STW   .D2T2 B11,       *-B_SP[8]             ;save B11
||      STW   .D1T1 A11,       *-A_SP[9]             ;save A11

        STW   .D2T2 B_n_max,   *-B_SP[10]            ;save B10
||      STW   .D1T1 A_offset,  *-A_SP[11]            ;save A10

        STW   .D2T2 B_radix,   *-B_SP[12]            ;save B8
||      STW   .D1T1 A_brev,    *-A_SP[13]            ;save A8
||      MVC   .S2   CSR,       B_csr                 ;

        STW   .D2T2 B_ptr_y,   *-B_SP[14]            ;save B6
||      STW   .D1T1 A_ptr_w,   *-A_SP[15]            ;save A6
||      AND   .L1X  B_csr,     -2,    A_csr_no_gie   ;disable interupt bit

        STW   .D2T2 B_ptr_x,   *-B_SP[16]            ;save B4
||      STW   .D1T1 A_n,       *-A_SP[17]            ;save A4
||      MVC   .S2X  A_csr_no_gie, CSR                ;diable interupts
||      MV    .L1X  B_csr,     A_csr                 ;copy csr

        STW   .D2T2 B3,        *-B_SP[19]            ;save B3
||      STW   .D1T1 A_csr,     *-A_SP[18]            ;save original CSR
||      ZERO  .S1   A_tw_offset                      ;[2,0]tw_offset=0
||      MV    .S2X  A_n,       B_stride              ;[2,0]stride=n
||      MV    .L2   B_ptr_x,   B_x_                  ;[6,0]x_ = ptr_x

        LDDW  .D2T2 *B_ptr_x[0],B_xp1:B_xp0          ;[3,1]x[2*i], x[2*i+1]  
||      MV    .L2   B_ptr_x,   B_x                   ;[3,1]x = ptr_x 
||      SHRU  .S2   B_stride,  2,          B_stride  ;[6,0]stride>>=2

        LDDW  .D2T1 *++B_x_[B_stride],A_xh2p1:A_xh2p0;[4,1]X[i+N/4]

        LDDW  .D2T2 *++B_x_[B_stride],B_xl1p1:B_xl1p0;[5,1]X[i+N/2]
||      MPYSU .M1X  12,        B_stride,   A_fft_jmp ;[4,0]fft_jmp =3*stride

        LDDW  .D2T1 *++B_x_[B_stride],A_xl2p1:A_xl2p0;[6,1]X[3N/4+i]
||      ADDAH .D1   A_ptr_w,   A_tw_offset,A_w0      ;[5,0]w=ptr_w+tw_offset

        MPYSU .M2   12,        B_stride,   B_fft_jmp ;[7,1]fft_jmp = 3*stride
||      SUB   .S1   A_n,       0,          A_i       ;[6,0]for(i=0; i < n;){
||      ADD   .L1   A_tw_offset,A_fft_jmp,A_tw_offset;[6,0]tw_offset+=fft_jmp

        STW   .D2T1 A_tw_offset, *-B_SP[20]          ;save tw_offset
LOOP_WHILE:
        MPYSU .M1X  3,         B_stride,   A_fft_jmp ;[1,1]fft_jmp=3*stride 

        ADDSP .L2   B_xp1,     B_xl1p1,    B_xh1     ;[10,1]xh1=x[1]+x[l1+1]
||      ZERO  .D1   A_j                              ;[6,0]j=0

        ADDSP .L1   A_xh2p1,   A_xl2p1,    A_xh21    ;[11,1]xh21=x[h2+1]+x[l2+1]
||      ADDSP .L2   B_xp0,     B_xl1p0,    B_xh0     ;[11,1]xh0=x[0]+x[l1]

        SUB   .S1   A_fft_jmp, A_j,        A_prj_    ;[12,1]predj=(j-fft_jmp)
||      SUBSP .L1   A_xh2p0,   A_xl2p0,    A_xl20    ;[12,1]xl20=x[h2]-x[l2]
||      SUBSP .L2   B_xp1,     B_xl1p1,    B_xl1     ;[12,1]xl1=x[1]-x[l1+1]

        SUB   .D1   A_prj_,    3,          A_prj     ;[13,1]predj = predj_
||      SUBSP .L1   A_xh2p1,   A_xl2p1,    A_xl21    ;[13,1]xl21=x[h2+1]-x[l2+1]
||      SUBSP .L2   B_xp0,     B_xl1p0,    B_xl0     ;[13,1]xl0=x[0]-x[l1]

        ADDSP .L1   A_xh2p0,   A_xl2p0,    A_xh20    ;[14,1]xh20=x[h2]+x[l2]
||      MV    .S2   B_x,       B_ptr_x0              ;[14,1]x = ptr_x0 
||      LDDW  .D1T1 *A_w0[A_j],A_si10:A_co10         ;[14,1]si1=w[j+1]co1=w[j]
||      ADD   .S1   A_w0,      8,          A_w0      ;[14,1]j+=1
||[!A_prj]ADDAH.D2  B_x,       B_fft_jmp,  B_x       ;[14,1]if(!predj)x+=fft_jmp
||      MPYSU .M1X  3,         B_stride,   A_fft_jmp ;[1,2]fft_jmp = 3*stride 

        ADD   .S2   B_x,       8,          B_x       ;[15,1]x+=2
||      LDDW  .D1T2 *A_w0[A_j],B_si20:B_co20         ;[15,1]si2=w[j+3]co2=w[j+2]
||      B     .S1   PREF8 + 8                        ;prolog collapse

        LDDW  .D2T2 *B_x[0],   B_xp1:B_xp0           ;[3,2]X[i]
||      MV    .S2   B_x,       B_x_                  ;[3,2]x_ = x 
||      B     .S1   PREF9 + 4                        ;prolog collapse

        SUBSP .L2X  B_xh1,     A_xh21,     B_yt0     ;[17,1]yt0=xh1-xh21
||      ADDSP .L1X  A_xl21,    B_xl0,      A_xt1     ;[17,1]xt1=xl0+xl21
||      LDDW  .D2T1 *++B_x_[B_stride],A_xh2p1:A_xh2p0;[4,2]X[N/4 + i]
||      B     .S1   PREF10                           ;prolog collapse

        ADDSP .L1X  A_xl20,    B_xl1,      A_yt2     ;[18,1]yt2=xl1+xl20
||      SUBSP .L2X  B_xl1,     A_xl20,     B_yt1     ;[18,1]yt1=xl1-xl20
||      LDDW  .D2T2 *++B_x_[B_stride],B_xl1p1:B_xl1p0;[ 5,2]x[N/2+i]
||      B     .S1   PREF11 + 4                       ;prolog collapse

        SUBSP .L2X  B_xh0,     A_xh20,     B_xt0     ;[19,1]xt0=xh0-xh20 
||      LDDW  .D2T1 *++B_x_[B_stride],A_xl2p1:A_xl2p0;[ 6,2]X[3N/4+i]
||      B     .S1   PREF12 + 4                       ;prolog collapse

        SUBSP .L2X  B_xl0,     A_xl21,     B_xt2     ;[20,1]xt2=xl0-xl21 
||      MPYSU .M2   12,        B_stride,   B_fft_jmp ;[ 7,2] fft_jmp = 3*stride
||      B     .S2   LOOP_FOR_A                       ;prolog collapse
* ============================ PIPE LOOP KERNEL ============================== *
LOOP_FOR_A:
        ADD   .S1   A_w0,      -16,        A_w0      ;[26,1] j += 1
||      STW   .D2T2 B_y0,      *B_ptr_x0[0]          ;[26,1]write x[i] 
||      SUB   .D1   A_prj_,    3,          A_prj     ;[13,2]prj = prj_ 
||      SUBSP .L1   A_xh2p1,   A_xl2p1,    A_xl21    ;[13,2]xl21=x[h2+1]-x[l2+1]
||      SUBSP .L2   B_xp0,     B_xl1p0,    B_xl0     ;[13,2]xl0=x[0]-x[l1]

        MPYSU .M2   2,         B_stride,   B_h2_0    ;[27,1]h2_0=stride 
||      SUBSP .L2   B_p2,      B_p3,       B_p20     ;[27,1]co20*yt0-si20*xt0
||      ADDSP .L1   A_xh2p0,   A_xl2p0,    A_xh20    ;[14,2]xh20=x[h2]+x[l2]
||      MV    .S2   B_x,       B_ptr_x0              ;[14,2]ptr_x0 = x 
||      LDDW  .D1T1 *A_w0[A_j],A_si10:A_co10         ;[14,2]si1=w[j+1]co1=w[j]
||      ADD   .S1   A_w0,      8,          A_w0      ;[14,2]j += 1
||[!A_prj]ADDAH.D2  B_x,       B_fft_jmp,  B_x       ;[14,2]if(!predj)x+=fft_jmp
||      MPYSU .M1X  3,         B_stride,   A_fft_jmp ;[ 1,3]fft_jmp = 3*stride

        ADD   .S1X  B_ptr_x1,  -4,         A_ptr_x1  ;[28,1]ptr_x1 -= 1  
||      MPYSP .M1   A_co30,    A_yt2,      A_pa      ;[28,1]pa = co30*yt2 
||      MPYSP .M2X  A_si30,    B_xt2,      B_pb      ;[28,1]pb = si30*xt2 
||      SUBSP .L1   A_p6,      A_p7,       A_p60     ;[28,1]co10*yt1-si10*xt1
||      ADDSP .L2   B_p0,      B_p1,       B_p00     ;[28,1]si20*yt0+co20*xt0
||      ADD   .S2   B_x,       8,          B_x       ;[15,2]x +=2;
||      LDDW  .D1T2 *A_w0[A_j],B_si20:B_co20         ;[15,2]si2=w[j+3]co2=w[j+2]

        MPYSP .M2X  B_xt2,     A_co30,     B_p9      ;[29,1]p9=xt2*co30 
||      MPYSP .M1   A_yt2,     A_si30,     A_p8      ;[29,1]p8=yt2*si30 
||      ADDSP .L1   A_p4,      A_p5,       A_p40     ;[29,1](si10*yt1+co10*xt1)
||      MV    .S1X  B_h2_0,    A_h2_0                ;[29,1]copy h2_0 
||      LDDW  .D2T2 *B_x[0],   B_xp1:B_xp0           ;[ 3,3]X[i]
||      MV    .S2   B_x,       B_x_                  ;[ 3,3]x_ = x 

        SUBSP .L2X  B_xh1,     A_xh21,     B_yt0     ;[17,2]yt0=xh1-xh21
||      ADDSP .L1X  A_xl21,    B_xl0,      A_xt1     ;[17,2]xt1=xl0+xl21
||      LDDW  .D2T1 *++B_x_[B_stride],A_xh2p1:A_xh2p0;[ 4,3]X[N/4+i]

        ADDSP .L1X  A_xl20,    B_xl1,      A_yt2     ;[18,2]yt2=xl1+xl20
||      SUBSP .L2X  B_xl1,     A_xl20,     B_yt1     ;[18,2]yt1=xl1-xl20
||      LDDW  .D2T2 *++B_x_[B_stride],B_xl1p1:B_xl1p0;[ 5,3]x[N/2+i]

        SUB   .D1   A_i,       4,          A_i       ;[32,1]i+=4 
||      SUBSP .L1X  A_pa,      B_pb,       A_pa0     ;[32,1]co30*yt2-si30*xt2
||      SUBSP .L2X  B_xh0,     A_xh20,     B_xt0     ;[19,2]xt0=xh0-xh20 
||      LDDW  .D2T1 *++B_x_[B_stride],A_xl2p1:A_xl2p0;[ 6,3]x[3N/4+i]

  [ A_i]B     .S2   LOOP_FOR_A                       ;[33,1]}/* end for */
||      ADDSP .L1X  B_p9,      A_p8,       A_p80     ;[33,1]si30*yt2+co30*xt2
||      STW   .D1T2 B_p00,     *++A_ptr_x1[A_h2_0]   ;[33,1]save x[N/4+i]
||[ A_i]SUBSP .L2X  B_xl0,     A_xl21,     B_xt2     ;[20,2]xt2=xl0-xl21 
||[ A_i]MPYSU .M2   12,        B_stride,   B_fft_jmp ;[ 7,3]fft_jmp = 3*stride 
PREF8:
        STW   .D2T2 B_p20,     *++B_ptr_x1[B_h2_0]   ;[34,1]save x[N/4+i+1]
||      STW   .D1T1 A_p40,     *++A_ptr_x1[A_h2_0]   ;[34,1]save x[N/2+i]
||[ A_i]MPYSP .M2   B_co20,    B_yt0,      B_p2      ;[21,2]p2=co20*yt0 
||[ A_i]ADDSP .L2X  B_xh1,     A_xh21,     B_y1      ;[21,2]y1=xh1+xh21
||[ A_i]MPYSP .M1   A_co10,    A_xt1,      A_p5      ;[21,2]p5 = co10*xt1 
PREF9:
        STW   .D2T1 A_p60,     *++B_ptr_x1[B_h2_0]   ;[35,1]x[3*N/4+i]
||[ A_i]ADD   .D1   A_w0,      8,          A_w0      ;[22,2]j += 1
||[ A_i]ADDSP .L2X  B_xh0,     A_xh20,     B_y0      ;[22,2]y0=xh0+xh20
||[ A_i]MPYSP .M2   B_si20,    B_yt0,      B_p0      ;[22,2]p0=si20*yt0 
||[ A_i]MPYSP .M1X  A_si10,    B_yt1,      A_p4      ;[22,2]p4=si10*yt1 
PREF10:
  [ A_i]MPYSP .M2   B_si20,    B_xt0,      B_p3      ;[23,2]p3 = si20*xt0 
||[ A_i]ADD   .S1   A_j,       3,          A_j       ;[23,2]j += 1
||[ A_i]LDDW  .D1T1 *A_w0[A_j],A_si30:A_co30         ;[23,2]si3=w[j+5]co3=w[j+4]
||[ A_i]MPYSP .M1X  A_co10,    B_yt1,      A_p6      ;[23,2]p6 = co10*yt1 
||[ A_i]ADDSP .L2   B_xp1,     B_xl1p1,    B_xh1     ;[10,3]xh1=x[1]+x[l1+1]
PREF11:
        STW   .D2T1 A_pa0,     *++B_ptr_x1[B_h2_0]   ;[37,1]save x[3*N/4+i+1]
||[ A_i]MPYSP .M2   B_co20,    B_xt0,      B_p1      ;[24,2]p1=co20*xt0
||[!A_prj]ZERO.S1   A_j                              ;[24,2]if(!predj)j = 0
||[ A_i]MPYSP .M1   A_si10,    A_xt1,      A_p7      ;[24,2]p7=si10*xt1 
||[ A_i]ADDSP .L1   A_xh2p1,   A_xl2p1,    A_xh21    ;[11,3]xh21=x[h2p1]+x[l2p1]
||[ A_i]ADDSP .L2   B_xp0,     B_xl1p0,    B_xh0     ;[11,3]xh0=x[0]+x[l1]
PREF12:
        STW   .D1T1 A_p80,     *++A_ptr_x1[A_h2_0]   ;[38,1]save x[3*N/4+i]
||[ A_i]ADD   .S2   B_ptr_x0,  4,          B_ptr_x1  ;[25,2]ptr_x1 = ptr_x + 1
||[ A_i]STW   .D2T2 B_y1,      *B_ptr_x0[1]          ;[25,2]save x[i+1]
||[ A_i]SUB   .S1   A_fft_jmp, A_j,        A_prj_    ;[12,3]predj=j-fft_jmp
||[ A_i]SUBSP .L1   A_xh2p0,   A_xl2p0,    A_xl20    ;[12,3]xl20=x[h2]-x[l2]
||[ A_i]SUBSP .L2   B_xp1,     B_xl1p1,    B_xl1     ;[12,3]xl1=x[1]-x[l1+1]
* ============================ PIPE LOOP EPILOG ============================== *
        MV    .L1X  B_SP,      A_SP                  ;copy stack pntr
||      LDW   .D2T2 *-B_SP[12],B_radix               ;restore B10

        LDW   .D2T2 *-B_SP[16],B_ptr_x               ;restore ptr_x

        LDW   .D1T1 *-A_SP[17],A_n                   ;restore A_n

        LDW   .D1T1 *-A_SP[20],A_tw_offset           ;restore tw_offset

        LDW   .D1T1 *-A_SP[15],A_ptr_w               ;restore ptr_w
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -