📄 fftspx_h.asm

📁 TMS320bbs（源程序）的c67xfiles文件。用于在CCS2.0集成编译环境下实现TI的c67x系列DSP开发。是用DSP汇编语言
💻 ASM
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34
        CMPGTU.L2   B_stride,  B_radix,    B_while   ;test for last pass
||      SHRU  .S2   B_stride,  2,          B_stride  ;[6,0]stride=stride>>2

  [B_while]B  .S1   LOOP_WHILE                       ;}/* end while */
||[B_while]LDDW.D2T2 *B_ptr_x[0],B_xp1:B_xp0         ;[3,1] X[i]
||[B_while]MV .L2   B_ptr_x,   B_x                   ;[3,1]x = ptr_x
||      MV    .S2   B_ptr_x,   B_x_                  ;[6,0]x_ = ptr_x

 [B_while]LDDW.D2T1 *++B_x_[B_stride],A_xh2p1:A_xh2p0;[4,1]x[N/4+i]

 [B_while]LDDW.D2T2 *++B_x_[B_stride],B_xl1p1:B_xl1p0;[5,1]x[N/2+i]
||[B_while]MPYSU.M1X 12,       B_stride,   A_fft_jmp ;[4,0]fft_jmp=3*stride

 [B_while]LDDW.D2T1 *++B_x_[B_stride],A_xl2p1:A_xl2p0;[6,1]x[3N/4+i]
||[B_while]ADDAH.D1 A_ptr_w,   A_tw_offset,A_w0      ;[5,0]w=ptr_w+tw_offset

 [B_while]MPYSU.M2  12,        B_stride,   B_fft_jmp ;[7,1]fft_jmp=stride*3 
||[B_while]SUB.S1   A_n,       0,          A_i       ;[6,0]for(i=0; i < n){
||[B_while]ADD.L1   A_tw_offset,A_fft_jmp,A_tw_offset;[6,0]tw_offset+=fft_jmp

  [B_while]STW.D2T1 A_tw_offset,*-B_SP[20]           ;save tw_offset 
        ;BRANCH OCCURS

        CMPGTU.L2   B_radix,   4,          B_return  ;check for early exit

 [B_return]B  .S2   EARLY_EXIT                       ;early exit for cache

        LDW   .D2T2 *-B_SP[10],            B_n_max   ;restore n_max
||      LDW   .D1T1 *-A_SP[11],            A_offset  ;restore offset

        LDW   .D2T2 *-B_SP[14],            B_ptr_y   ;restore ptr_y
||      LDW   .D1T1 *-A_SP[13],            A_brev    ;restore brev table

        NOP             3                            ;pipeline latency

        NOP             1                            ;pipeline latency
* ====================== SYMBOLIC REGISTER ASSIGNMENTS ======================= *
        .asg            A4,         A_n       ;number of points in transform
        .asg            B4,         B_ptr_x   ;pntr to in data partial transform
        .asg            B6,         B_ptr_y   ;pntr to final output data
        .asg            A8,         A_brev    ;pointer to bit reverse table
        .asg            B8,         B_radix   ;smallest butterfly size radix=2,4
        .asg            A10,        A_offset  ;index into main fft array
        .asg            B10,        B_n_max   ;maximuk size of all ffts 
        .asg            A2,         A_r2      ;condition whether radix2
        .asg            B8,         B_x       ;pointer to data
        .asg            A7,         A_x       ;pointer to data
        .asg            A11,        A_y0      ;even output data pointer
        .asg            B5,         B_y0      ;odd output data pointer
        .asg            B12,        B_n2      ;n/4
        .asg            A6,         A_n2      ;copy of n2
        .asg            B2,         B_i       ;loop counter
        .asg            B9,         B_l0      ;shift for index bit reverse
        .asg            A1,         A_pro     ;prolog counter
        .asg            A9,         A_j       ;index into data
        .asg            A12,        A_cx3f    ;mask for bit reverse calc.
        .asg            A3,         A_j0      ;lower 6 bits of j
        .asg            A5,         A_j1      ;upper 6 bits of j
        .asg            A0,         A_k0      ;reversed j0
        .asg            A10,        A_k1      ;reversed j1
        .asg            B4,         B_k0_     ;k0 << 6
        .asg            B4,         B_k_      ;k0_ + k1
        .asg            B6,         B_k       ;k_ >> l0
        .asg            A3,         A_k       ;copy of k
        .asg            A5,         A_ptr_y0  ;copy of A_y0
        .asg            B7,         B_ptr_y1  ;copy of B_y0
        .asg            B11,        B_x1      ;partial sums
        .asg            B10,        B_x0      ;partial sums
        .asg            A15,        A_x3      ;partial sums
        .asg            A14,        A_x2      ;partial sums
        .asg            B1,         B_x5      ;partial sums
        .asg            B0,         B_x4      ;partial sums
        .asg            A13,        A_x7      ;partial sums
        .asg            A12,        A_x6      ;partial sums
        .asg            B1,         B_xh0_0   ;partial sums
        .asg            B7,         B_xh1_0   ;partial sums
        .asg            A4,         A_xh0_1   ;partial sums
        .asg            A13,        A_xh1_1   ;partial sums
        .asg            A12,        A_yt0     ;partial sums
        .asg            A5,         A_yt1     ;partial sums
        .asg            B3,         B_yt4     ;partial sums
        .asg            B6,         B_yt5     ;partial sums
        .asg            B3,         B_xl0_0   ;partial sums
        .asg            B4,         B_xl1_0   ;partial sums
        .asg            A5,         A_xl0_1   ;partial sums
        .asg            A14,        A_xl1_1   ;partial sums
        .asg            A3,         A_yt2     ;partial sums
        .asg            B4,         B_yt3     ;partial sums
        .asg            B5,         B_yt6     ;partial sums
        .asg            A4,         A_yt7     ;partial sums
* ============================ PIPE LOOP PROLOG ============================== *
        SHRU  .S1   A_offset,   8,          A_j1    ;[ 1,1]j1 =offset >> 8 
||      NORM  .L2   B_n_max,    B_l0                ;[ 2,0]l0=norm(n_max) 

        LDBU  .D1T1 *A_brev[A_j1],          A_k1    ;[ 2,1]k1=brev[j1] 
||      SHRU  .S1   A_offset,   2,          A_j     ;[ 4,0]j=offset>>2 

        MVK   .S1   03Fh,       A_cx3f              ;[ 2,1]const = 0x3f 

        AND   .S1   A_j,        A_cx3f,     A_j0    ;[ 3,1]j0 = lo6bit of j 

        LDBU  .D1T1 *A_brev[A_j0],          A_k0    ;[ 5,1]k0 = brev[j0]

        ADD   .L2   B_l0,       -16,        B_l0    ;[ 3,0]l0 -=16
||      MV    .S1X  B_ptr_y,    A_y0                ;[ 3,0]y0 = ptr_y

        ADD   .S2   B_l0,       -1,         B_l0    ;[ 4,0]l0=norm(n) - 17
||      ADD   .L1X  B_ptr_x,    8,          A_x     ;[ 4,0]x=ptr_x+1 

        MVK   .S1   1,          A_pro               ;[ 5,0]pro = 1 
||      ADD   .L2X  A_n,        4,          B_i     ;[ 5,0]for(i=0;i<n;){ 
||      SHRU  .S2   B_n_max,    1,          B_n2    ;[ 5,0]n2= n/2 
||      MV    .D2   B_ptr_x,    B_x                 ;[ 5,0]x=ptr_x 
||      SUB   .L1X  B_radix,    2,          A_r2    ;[ 5,0]test if radix2
* ============================ PIPE LOOP KERNEL ============================== *
LOOP_FOR_B:
        MV    .S1X  B_n2,       A_n2                ;[21,1]copy n2
||[!A_r2]MV   .S2   B_x1,       B_xh1_0             ;[21,1]if(!r2)xh1_0=x1
||[!A_r2]MV   .D1   A_x7,       A_xl0_1             ;[21,1]if(!r2)xl0_1=x7
||      LDDW  .D2T2 *B_x++[2],  B_x1:B_x0           ;[ 9,2]X[i]

        ADDSP .L1X  B_xl1_0,    A_xl0_1,    A_yt7   ;[22,1]yt7=xl1_0+xl0_1
||[!A_r2]MV   .S2   B_x0,       B_xh0_0             ;[22,1]if(!r2)xh0_0=x0
||      SUBSP .L2X  B_xl1_0,    A_xl0_1,    B_yt3   ;[22,1]yt3=xl1_0-xl0_1
||[!A_r2]MV   .S1   A_x3,       A_xh1_1             ;[22,1]if(!r2)xh1_1=x3
||      LDDW  .D1T1 *A_x++[2],  A_x3:A_x2           ;[10,2]X[i+2]

  [!A_r2]ADDSP.L2X  B_xl1_0,    A_xl0_1,    B_yt3   ;[23,1](!r2)yt3=xl1_0+xl0_1
||      ADDSP .L1X  B_xh1_0,    A_xh1_1,    A_yt1   ;[23,1]yt1=xh1_0+xh1_1
||[!A_r2]MV   .S1   A_x2,       A_xh0_1             ;[23,1]if(!r2)xh0_1=x2
||      ADD   .D1   A_j,        1,          A_j     ;[11,2] j+=1
||      LDDW  .D2T2 *B_x++[2],  B_x5:B_x4           ;[11,2]X[i+3]

  [!A_r2]MV   .S1   A_x6,       A_xl1_1             ;[24,1]if(!r2)xl1_1=x6
||[!A_r2]MV   .D2   B_x4,       B_xl0_0             ;[24,1]if(!r2)xl0_0=x4
||      ADDSP .L1X  B_xh0_0,    A_xh0_1,    A_yt0   ;[24,1]yt0=xh0_0+xh0_1
||      SUBSP .L2X  B_xh0_0,    A_xh0_1,    B_yt4   ;[24,1]yt4=xh0_0-xh0_1
||      LDDW  .D1T1 *A_x++[2],  A_x7:A_x6           ;[12,2]X[i+4]

        SUBSP .L2X  B_xl0_0,    A_xl1_1,    B_yt6   ;[25,1]yt6=xl0_0-xl1_1
||[!A_r2]SUBSP.L1X  B_xl1_0,    A_xl0_1,    A_yt7   ;[25,1](!r2)yt7=xl1_0-xl0_1
||      SHRU  .S1   A_j,        6,          A_j1    ;[ 1,3]j1 = j >> 6 

        SUB   .S2   B_i,        4,          B_i     ;[26,1]i+= 4
||      ADDAW .D2   B_y0,       B_k,        B_ptr_y1;[26,1]ptr_y1 = y0+k 
||      ADDSP .L1X  B_xl0_0,    A_xl1_1,    A_yt2   ;[26,1]yt2=xl1_0+xl1_1
||      SUBSP .L2X  B_xh1_0,    A_xh1_1,    B_yt5   ;[26,1]yt5=xh1_0-xh1_1
||      LDBU  .D1T1 *A_brev[A_j1],          A_k1    ;[ 2,3]k1 = brev[j1] 
||      MVK   .S1   03Fh,       A_cx3f              ;[ 2,3]const = 0x3f 

        ADDAW .D1   A_y0,       A_k,        A_ptr_y0;[27,1]ptr_y0 = y0 + k 
||[!A_pro]STW .D2T1 A_yt1,      *B_ptr_y1++[B_n2]   ;[27,1]store x[1] 
||[ B_i]B     .S2   LOOP_FOR_B                      ;[27,1]} /* end for */
||[ B_i]AND   .S1   A_j,        A_cx3f,     A_j0    ;[ 3,3]j0 - j & 0x3f 

  [!A_pro]STW .D2T2 B_yt3,      *B_ptr_y1++[B_n2]   ;[28,1]store x[3]
||[!A_pro]STW .D1T1 A_yt0,      *A_ptr_y0++[A_n2]   ;[28,1]store x[0] 
||[ B_i]SHL   .S2X  A_k0,       6,          B_k0_   ;[16,2]k0_ = k0 << 6
||[ B_i]SUBSP .L2   B_x1,       B_x5,       B_xl1_0 ;[16,2]xl1_0=x[1]+x[5]

  [ B_i]ADD   .S2X  B_k0_,      A_k1,       B_k_    ;[17,2]k_=k0_+k1 
||[ B_i]SUBSP .L1   A_x2,       A_x6,       A_xl0_1 ;[17,2]xl0_1=x[2]-x[6]
||[ B_i]ADDSP .L2   B_x1,       B_x5,       B_xh1_0 ;[17,2]xh1_0=x[1]+x[5]
||[ B_i]LDBU  .D1T1 *A_brev[A_j0],          A_k0    ;[ 5,3]k0=brev[j0] 

  [!A_pro]STW .D1T1 A_yt2,      *A_ptr_y0++[A_n2]   ;[30,1]store x[2]
||[!A_pro]STW .D2T2 B_yt5,      *B_ptr_y1++[B_n2]   ;[30,1]store x[5]
||[ B_i]ADDSP .L1   A_x3,       A_x7,       A_xh1_1 ;[18,2]xh1_1=x[3]+x[7]
||[ B_i]ADDSP .L2   B_x0,       B_x4,       B_xh0_0 ;[18,2]xh0_0=x[0]+x[4]
||[ B_i]SHRU  .S2   B_k_,       B_l0,       B_k     ;[18,2]k=k_ >> l0 

  [!A_pro]STW .D2T1 A_yt7,      *B_ptr_y1[0]        ;[31,1]store x[7]
||[!A_pro]STW .D1T2 B_yt4,      *A_ptr_y0++[A_n2]   ;[31,1]store x[4]
||[ B_i]MV    .S1X  B_k,        A_k                 ;[19,2]duplicate k
||[ B_i]ADDSP .L1   A_x2,       A_x6,       A_xh0_1 ;[19,2]xh0_1=x[2]+x[6]

  [ B_i]ZERO  .S1   A_pro                           ;[32,1]pro = 0 
||[!A_pro]STW .D1T2 B_yt6,      *A_ptr_y0[0]        ;[32,1]store x[6]
||[ B_i]ADD   .S2X  A_y0,       4,          B_y0    ;[20,2]y0 += 1 
||[ B_i]SUBSP .L2   B_x0,       B_x4,       B_xl0_0 ;[20,2]xl0_0=x[0]-x[4]
||[ B_i]SUBSP .L1   A_x3,       A_x7,       A_xl1_1 ;[20,2]xl1_1=x[3]-x[7]
||[!A_r2]MV   .D2   B_x5,       B_xl1_0             ;[20,2]if(!r2)xl1_0=x5
* ============================ PIPE LOOP EPILOG ============================== *
EARLY_EXIT:
        MV    .L1X  B_SP,       A_SP                ;copy stack pointer
||      LDW   .D2T2 *-B_SP[19], B3                  ;restore B3

        LDW   .D2T1 *-B_SP[1],  A15                 ;restore A15
||      LDW   .D1T2 *-A_SP[18], B_csr               ;load original CSR

        LDW   .D2T2 *-B_SP[2],  B14                 ;restore B14
||      LDW   .D1T1 *-A_SP[3],  A14                 ;restore A14

        LDW   .D2T2 *-B_SP[4],  B13                 ;restore B13
||      LDW   .D1T1 *-A_SP[5],  A13                 ;restore A13

        LDW   .D2T2 *-B_SP[6],  B12                 ;restore B12
||      LDW   .D1T1 *-A_SP[7],  A12                 ;restore A12

        LDW   .D2T2 *-B_SP[8],  B11                 ;restore B11
||      LDW   .D1T1 *-A_SP[9],  A11                 ;restore A11
||      B     .S2   B3                              ;return to caller
 
        LDW   .D2T2 *-B_SP[10], B10                 ;restore B10    
||      LDW   .D1T1 *-A_SP[11], A10                 ;restore A10

        MVC   .S2   B_csr,      CSR                 ;interuptabilty restored

        NOP         3                               ;wait for branch delay
        ;BRANCH OCCURS HERE
* ============================================================================ *
* End of fftSPxSP assembly code                                                *
*==============================================================================*
*      Copyright (C) 1997-2000 Texas Instruments Incorporated.                 *
*                            All Rights Reserved                               *
*==============================================================================*
上一页 1 2 34
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -