⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dsp_fft32x32_sa.sa

📁 TI c64x的FFT程序
💻 SA
📖 第 1 页 / 共 4 页
字号:
          ; the sub-table                                               ;          ;-------------------------------------------------------------;        MV      .2     B_n,              B_stride                         ZERO    .1     A_tw_offset                            LOOP_WHILE:                       ;-------------------------------------------------------------;          ; "j" is used as an index into the sub-table of twiddle fact- ;          ; ors. Since the pointer to the sub-table of twiddle fators   ;          ; resets with every iteration of the outer loop, the index    ;          ; within the sub-table is also reset to zero. Copies are made ;          ; so that it exists in both data paths.                       ;          ;-------------------------------------------------------------;                                                      ZERO    .1     A_j                                            ZERO    .2     B_j                                              ;-------------------------------------------------------------;          ; "fft_jmp" is a variable that relates the offset between     ;          ; data elements that use the same twiddle factor. It is       ;          ; always 6*stride halfwords, 1.5 * stride double words. It    ;          ; quarters as does stride with every iteration of the outer   ;          ; loop.                                                       ;          ;-------------------------------------------------------------;        MPYSU   .2     6,                B_stride,         B_fft_jmp          SHRU    .1     B_fft_jmp,        3,                A_fft_jmp          ;-------------------------------------------------------------;          ; Determine offsets N/4, N/2, 3N/4 and make copies to both    ;          ; data paths. Also copy input pointer as output pointer.      ;          ;-------------------------------------------------------------;        SHRU    .2     B_stride,         2,                B_h2        MV      .1     B_h2,             A_h2        ADD     .1     A_ptr_x,           -16,              A_x                          ADD     .1     A_ptr_x,           -16,              A_y                          ADDAH   .1     A_ptr_w,           A_tw_offset,      A_w0             SHRU    .1     B_fft_jmp,         1,                A_fft_jmp_1        ADD     .1     A_tw_offset,       A_fft_jmp_1,      A_tw_offset         SHRU    .2     B_stride,          2,                B_stride             ;-------------------------------------------------------------;          ;  Adjustments for BDEC, as it iterates till 0. Deduct 1      ;          ;  from loop trip count of N/4.                               ;          ;-------------------------------------------------------------;        SHRU    .2     B_n,               3,                B_i                SUB     .2     B_i,               1,                B_i          ;-------------------------------------------------------------;          ; Since the stride amount across iterations is variable,      ;          ; it is tough to put an exact stride. However for this        ;          ; loop stride is guranteed to be greater than or equal        ;          ; to 16 complex samples, 32 half words. Since this str-       ;          ; ide is wider than the bank width, of all the banks,         ;          ; stride is specified as zero.                                ;          ;-------------------------------------------------------------;LOOP_Y: .trip 8          ;-------------------------------------------------------------;        ; si10 = w[0] co10 = w[1]  si11 = w[2]  co11 = w[3]           ;        ; si20 = w[4] co20 = w[5]  si21 = w[6]  si21 = w[7]           ;        ; si30 = w[8] co30 = w[9]  si31 = w[a]  co31 = w[b]           ;        ;-------------------------------------------------------------;        ADDAD   .1     A_w0,               A_j,              A_w        MV      .2     A_w,                B_w        LDDW    .D1T2  *A_w[0],      B_co10:B_si10        LDDW    .D2T1  *B_w[1],      A_co20:A_si20        LDDW    .D1T2  *A_w[2],      B_co30:B_si30         LDDW    .D2T1  *B_w[3],      A_co11:A_si11         LDDW    .D1T2  *A_w[4],      B_co21:B_si21         LDDW    .D2T1  *B_w[5],      A_co31:A_si31         ;-------------------------------------------------------------;        ;  x[0]       x[1]       x[2]       x[3]                      ;        ;  x[h2+0]    x[h2+1]    x[h2+2]    x[h2+3]                   ;        ;  x[l1+0]    x[l1+1]    x[l1+2]    x[l1+3]                   ;        ;  x[l2+0]    x[l2+1]    x[l2+2]    x[l2+3]                   ;        ;-------------------------------------------------------------;        MV      .2     A_x,            B_x        LDDW    .D1T2  *++A_x[2],      B_x_1:B_x_0                 LDDW    .D2T1  *++B_x[3],      A_x_3:A_x_2        LDDW    .D1T2  *++A_x[A_h2],   B_xh2_1i:B_xh2_0i         LDDW    .D2T1  *++B_x[B_h2],   A_xh2_3i:A_xh2_2i        LDDW    .D1T2  *++A_x[A_h2],   B_xl1_1i:B_xl1_0i         LDDW    .D2T1  *++B_x[B_h2],   A_xl1_3i:A_xl1_2i        LDDW    .D1T2  *A_x[A_h2],     B_xl2_1i:B_xl2_0i         LDDW    .D2T1  *B_x[B_h2],     A_xl2_3i:A_xl2_2i        SHL     .2     B_h2,  4,       B_2h2        SUB     .1     A_x,   B_2h2,   A_x        ;-------------------------------------------------------------;        ;  if (!(j - fft_jmp))                                        ;        ;  {                                                          ;        ;    j += 12 shorts;                                          ;        ;    x += fft_jmp;                                            ;        ;    j = 0;                                                   ;        ;    x += 4                                                   ;        ;  }                                                          ;        ;-------------------------------------------------------------;        ADD     .1     6,                A_j,              A_j           SUB     .1     A_j,              A_fft_jmp,        A_ifj              [!A_ifj]ADD    .1     A_x,              B_fft_jmp,        A_x   [!A_ifj]ZERO   .1     A_j                                      ;-------------------------------------------------------------;        ; xh0_0 = x[0] + x[l1];    xh1_0 = x[1] + x[l1+1]             ;        ; xh0_1 = x[2] + x[l1+2];  xh1_1 = x[3] + x[l1+3]             ;        ; xl0_0 = x[0] - x[l1];    xl1_0 = x[1] - x[l1+1]             ;        ; xl0_1 = x[2] - x[l1+2];  xl1_1 = x[3] - x[l1+3]             ;        ;-------------------------------------------------------------;        ADDSUB  .2     B_x_0,            B_xl1_0i,            B_xh0_0:B_xl0_0         ADDSUB  .2     B_x_1,            B_xl1_1i,            B_xh1_0:B_xl1_0         ADDSUB  .1     A_x_2,            A_xl1_2i,            A_xh0_1:A_xl0_1         ADDSUB  .1     A_x_3,            A_xl1_3i,            A_xh1_1:A_xl1_1         ;------------------------------------------------------------;        ; xh20_0 = x[h2  ] + x[l2  ]; xh21_0 = x[h2+1] + x[l2+1]     ;        ; xh20_1 = x[h2+2] + x[l2+2]; xh21_1 = x[h2+3] + x[l2+3]     ;        ; xl20_0 = x[h2  ] - x[l2  ]; xl21_0 = x[h2+1] - x[l2+1]     ;        ; xl20_1 = x[h2+2] - x[l2+2]; xl21_1 = x[h2+3] - x[l2+3]     ;        ;------------------------------------------------------------;        ADDSUB  .2     B_xh2_0i,            B_xl2_0i,         B_xh20_0:B_xl20_0         ADDSUB  .2     B_xh2_1i,            B_xl2_1i,         B_xh21_0:B_xl21_0        ADDSUB  .1     A_xh2_2i,            A_xl2_2i,         A_xh20_1:A_xl20_1         ADDSUB  .1     A_xh2_3i,            A_xl2_3i,         A_xh21_1:A_xl21_1        ;-------------------------------------------------------------;        ;  x0[0]  =  xh0_0  +  xh20_0  x0[1]  =  xh1_0  +  xh21_0     ;        ;  x0[2]  =  xh0_1  +  xh20_1  x0[3]  =  xh1_1  +  xh21_1     ;        ;-------------------------------------------------------------;        ADD     .2     B_xh0_0,             B_xh20_0,         B_x_0o        ADD     .2     B_xh1_0,             B_xh21_0,         B_x_1o        ADD     .1     A_xh0_1,             A_xh20_1,         A_x_2o        ADD     .1     A_xh1_1,             A_xh21_1,         A_x_3o        ;-------------------------------------------------------------;        ;   xt0_0 = xh0_0 - xh20_0     yt0_0 = xh1_0 - xh21_0         ;        ;   xt0_1 = xh0_1 - xh20_1     yt0_1 = xh1_1 - xh21_1         ;        ;-------------------------------------------------------------;        SUB     .2     B_xh0_0,             B_xh20_0,         B_xt0_0         SUB     .2     B_xh1_0,             B_xh21_0,         B_yt0_0         SUB     .1     A_xh1_1,             A_xh21_1,         A_yt0_1        SUB     .1     A_xh0_1,             A_xh20_1,         A_xt0_1         ;-------------------------------------------------------------;        ;  xt1_0  =  xl0_0  +  xl21_0   yt2_0  =  xl1_0  +  xl20_0    ;        ;  xt1_1  =  xl0_1  +  xl21_1   yt2_1  =  xl1_1  +  xl20_1    ;        ;  xt2_0  =  xl0_0  -  xl21_0   yt1_0  =  xl1_0  -  xl20_0    ;        ;  xt2_1  =  xl0_1  -  xl21_1   yt1_1  =  xl1_1  -  xl20_1    ;        ;-------------------------------------------------------------;        ADDSUB  .2     B_xl0_0,            B_xl21_0,         B_xt1_0:B_xt2_0          ADDSUB  .2     B_xl1_0,            B_xl20_0,         B_yt2_0:B_yt1_0         ADDSUB  .1     A_xl0_1,            A_xl21_1,         A_xt1_1:A_xt2_1         ADDSUB  .1     A_xl1_1,            A_xl20_1,         A_yt2_1:A_yt1_1         ;-------------------------------------------------------------;        ;   x2[h2  ] = (si10 * yt1_0 + co10 * xt1_0) >> 15            ;        ;   x2[h2+1] = (co10 * yt1_0 - si10 * xt1_0) >> 15            ;        ;   x2[h2+2] = (si11 * yt1_1 + co11 * xt1_1) >> 15            ;        ;   x2[h2+3] = (co11 * yt1_1 - si11 * xt1_1) >> 15            ;        ;-------------------------------------------------------------;        SMPY32  .2     B_si10,        B_yt1_0,          B_p0         SMPY32  .2     B_co10,        B_xt1_0,          B_p1         ADD     .2     B_p0,          B_p1,             B_xh2_0o        SMPY32  .2     B_co10,        B_yt1_0,          B_p2         SMPY32  .2     B_si10,        B_xt1_0,          B_p3         SUB     .2     B_p2,          B_p3,             B_xh2_1o        SMPY32  .1     A_si11,        A_yt1_1,          A_p4         SMPY32  .1     A_co11,        A_xt1_1,          A_p5         ADD     .1     A_p4,          A_p5,             A_xh2_2o        SMPY32  .1     A_co11,        A_yt1_1,          A_p6         SMPY32  .1     A_si11,        A_xt1_1,          A_p7         SUB     .1     A_p6,          A_p7,             A_xh2_3o        ;-------------------------------------------------------------;        ;   x2[l1  ] = (si20 * yt0_0 + co20 * xt0_0) >> 15            ;        ;   x2[l1+1] = (co20 * yt0_0 - si20 * xt0_0) >> 15            ;        ;   x2[l1+2] = (si21 * yt0_1 + co21 * xt0_1) >> 15            ;        ;   x2[l1+3] = (co21 * yt0_1 - si21 * xt0_1) >> 15            ;        ;-------------------------------------------------------------;        SMPY32  .1     A_si20,        B_yt0_0,          A_p8         SMPY32  .2     A_co20,        B_xt0_0,          B_p9         ADD     .2     A_p8,          B_p9,             B_xl1_0o        SMPY32  .2     A_co20,        B_yt0_0,          B_pa         SMPY32  .1     A_si20,        B_xt0_0,          A_pb         SUB     .2     B_pa,          A_pb,             B_xl1_1o        SMPY32  .1     B_si21,        A_yt0_1,          A_pc         SMPY32  .2     B_co21,        A_xt0_1,          B_pd         ADD     .1     A_pc,          B_pd,             A_xl1_2o        SMPY32  .1     B_co21,        A_yt0_1,          A_pe         SMPY32  .2     B_si21,        A_xt0_1,          B_pf         SUB     .1     A_pe,          B_pf,             A_xl1_3o        ;-------------------------------------------------------------;        ;   x2[l2  ] = (si30 * yt2_0 + co30 * xt2_0) >> 15            ;        ;   x2[l2+1] = (co30 * yt2_0 - si30 * xt2_0) >> 15            ;        ;   x2[l2+2] = (si31 * yt2_1 + co31 * xt2_1) >> 15            ;        ;   x2[l2+3] = (co31 * yt2_1 - si31 * xt2_1) >> 15            ;        ;-------------------------------------------------------------;        SMPY32  .2     B_si30,        B_yt2_0,          B_p10         SMPY32  .2     B_co30,        B_xt2_0,          B_p11         ADD     .2     B_p10,         B_p11,            B_xl2_0o        SMPY32  .2     B_co30,        B_yt2_0,          B_p12        SMPY32  .2     B_si30,        B_xt2_0,          B_p13        SUB     .2     B_p12,         B_p13,            B_xl2_1o        SMPY32  .1     A_si31,        A_yt2_1,          A_p14        SMPY32  .1     A_co31,        A_xt2_1,          A_p15        ADD     .1     A_p14,         A_p15,            A_xl2_2o        SMPY32  .1     A_co31,        A_yt2_1,          A_p16        SMPY32  .1     A_si31,        A_xt2_1,          A_p17        SUB     .1     A_p16,         A_p17,            A_xl2_3o        ;-------------------------------------------------------------;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -