📄 dsp_fft32x32_sa.sa
字号:
; Store four outputs for all the four legs of butterfly ; ;-------------------------------------------------------------; MV .2 A_y, B_y STDW .D1T2 B_x_1o:B_x_0o, *++A_y[2] STDW .D2T1 A_x_3o:A_x_2o, *++B_y[3] STDW .D1T2 B_xh2_1o:B_xh2_0o, *++A_y[A_h2] STDW .D2T1 A_xh2_3o:A_xh2_2o, *++B_y[B_h2] STDW .D1T2 B_xl1_1o:B_xl1_0o, *++A_y[A_h2] STDW .D2T1 A_xl1_3o:A_xl1_2o, *++B_y[B_h2] STDW .D1T2 B_xl2_1o:B_xl2_0o, *A_y[A_h2] STDW .D2T1 A_xl2_3o:A_xl2_2o, *B_y[B_h2] SHL .1 A_h2, 4, A_2h2 SUB .1 A_y, A_2h2, A_y ADD .2 6, B_j, B_j SUB .2 B_j, A_fft_jmp, B_ifj [!B_ifj]ADD .1 A_y, B_fft_jmp, A_y [!B_ifj]ZERO .2 B_j [B_i]BDEC .2 LOOP_Y, B_i *=========================================================================== * CMPGTU .2 B_stride, A_radix, B_while [B_while]B .2 LOOP_WHILE ;-----------------------------------------------------------; ; The following code performs either a standard radix4 pass ; ; radix2 pass. Two pointers are used to access the input. ; ; The input data is read "N/4" complex samples apart or ; ; "N/2" words apart using pointers "x0" and "x2". This ; ; produces outputs that are 0, N/4, N/2, 3N/4 for a radix4 ; ; FFT, and 0, N/8, N/2, 3N/8 for radix 2. ; ; The following pointers are therfore set up. They are set ; ; up as twin pointers so that data accesses can parallelize ; ; ; ; y0 = ptr_y ; ; y2 = ptr_y + (int) npoints ; ; x0 = ptr_x ; ; x2 = ptr_x + (int) (npoints>>1) ; ;-----------------------------------------------------------; SUB .1 A_radix, 2, A_r2 MV .1 A_ptr_x, A_p_x0 ADD .2 A_ptr_x, 8, B_p_x0 MV .2 B_ptr_y, B_p_y0 ADDAW .2 B_p_y0, B_n, B_p_y2 ADDAH .2 B_p_y0, B_n, B_p_y1 ADDAH .2 B_p_y2, B_n, B_p_y3 ;-----------------------------------------------------------; ; Prepare shift amount for digit reversed index and the ; ; increment amount assuming that the radix is 4. ; ;-----------------------------------------------------------; NORM .2 B_n, B_l1 ADD .2 B_l1, 2, B_l1 MVK .2 4, B_j0 ;-----------------------------------------------------------; ; Change pointers as required, and the increment amount if ; ; radix 2 is required. ; ; ; ; if (radix == 2) ; ; { ; ; y1 = y0 + (int) (npoints >> 2) ; ; y3 = y2 + (int) (npoints >> 2) ; ; l1 = _norm(npoints) + 1 ; ; j0 = 8 ; ; n0 = npoints >> 1 ; ; } ; ;-----------------------------------------------------------;[!A_r2] ADD .2 B_p_y0, B_n, B_p_y1[!A_r2] ADD .2 B_p_y2, B_n, B_p_y3[!A_r2] NORM .2 B_n, B_l1 [!A_r2] ADD .2 B_l1, 1, B_l1[!A_r2] MVK .2 8, B_j0 ;----------------------------------------------------------; ; Loop counter for the following loop is npoints >> 2. In ; ; addition deduct 2 from the loop trip counter to account ; ; for BDEC. ; ;----------------------------------------------------------; SHRU .1 B_n, 2, A_i SUB .1 A_i, 2, A_i ZERO .2 B_j .mptr A_p_x0, A_x+0, 16 .mptr B_p_x0, A_x+2, 16 .mptr B_p_y0, A_x+0, 0 .mptr B_p_y1, A_x+2, 0 .mptr B_p_y2, A_x+0, 0 .mptr B_p_y3, A_x+2, 0LOOP_Z: .trip 8 ;----------------------------------------------------------; ; Digit reverse the index starting from 0. The increment ; ; "j" is either by 4, or 8. ; ; ; ; h2 = _deal(j) ; ; h2 = _bitr(h2) ; ; h2 = _rotl(h2, 16) ; ; h2 = _shfl(h2) ; ; h2 >>= l1 ; ;----------------------------------------------------------; DEAL .2 B_j, B_h0 BITR .2 B_h0, B_h1 ROTL .2 B_h1, 16, B_h2 SHFL .2 B_h2, B_h3 SHRU .2 B_h3, B_l1, B_h4 ADD .2 B_j, B_j0, B_j ;----------------------------------------------------------; ; Read in the input data, from the first eight locations. ; ; These are transformed either as a radix4 or as radix 2. ; ;----------------------------------------------------------; LDDW .D1T1 *A_p_x0++[2], A_x1:A_x0 LDDW .D2T2 *B_p_x0++[2], B_x3:B_x2 LDDW .D1T1 *A_p_x0++[2], A_x5:A_x4 LDDW .D2T2 *B_p_x0++[2], B_x7:B_x6 ;----------------------------------------------------------; ; xh0_0 = x_0 + x_4 xh1_0 = x_1 + x_5 ; ; xl0_0 = x_0 - x_4 xl1_0 = x_1 - x_5 ; ; xh0_1 = x_2 + x_6 xh1_1 = x_3 + x_7 ; ; xl0_1 = x_2 - x_6 xl1_1 = x_3 - x_7 ; ;----------------------------------------------------------; ADDSUB .1 A_x0, A_x4, A_xh0_0:A_xl0_0 ADDSUB .1 A_x1, A_x5, A_xh1_0:A_xl1_0 ADDSUB .2 B_x2, B_x6, B_xh0_1:B_xl0_1 ADDSUB .2 B_x3, B_x7, B_xh1_1:B_xl1_1 MV .2 B_xl1_1, B_xl1_1c MV .1 A_xh1_0, A_xh1_0c ;----------------------------------------------------------; ; Replace results conditionally if it is determined that ; ; it is a radix 2 pass as follows: ; ; ; ; xh0_0 = x0 xh1_0 = x1 ; ; xh0_1 = x2 xh1_1 = x3 ; ; xl0_0 = x4 xl1_0 = x5, ; ; xl0_1 = x7 xl1_1 = x6 ; ;----------------------------------------------------------;[!A_r2] ROTL .1 A_x0, 0, A_xh0_0[!A_r2] ROTL .1 A_x1, 0, A_xh1_0c[!A_r2] ROTL .2 B_x2, 0, B_xh0_1[!A_r2] ROTL .2 B_x3, 0, B_xh1_1[!A_r2] ROTL .1 A_x4, 0, A_xl0_0[!A_r2] ROTL .1 A_x5, 0, A_xl1_0[!A_r2] MV .2 B_x6, B_xl1_1c[!A_r2] MV .2 B_x7, B_xl0_1 ;---------------------------------------------------------; ; radix4: y0 = xh0_0 + xh0_1 radix2: y0 = x0 + x2 ; ; radix4: y1 = xh1_0 + xh1_1 radix2: y1 = x1 + x3 ; ; radix4: y4 = xh0_0 - xh1_1 radix2: y4 = x0 - x2 ; ; radix4: y5 = xh1_0 - xh1_1 radix2: y5 = x1 - x3 ; ;---------------------------------------------------------; ADD .2 A_xh0_0, B_xh0_1, B_y0 ADD .2 A_xh1_0c, B_xh1_1, B_y1 SUB .2 A_xh0_0, B_xh0_1, B_y4 SUB .2 A_xh1_0c, B_xh1_1, B_y5 ;---------------------------------------------------------; ; radix4: y2 = xl0_0 + xl1_1 radix2: y2 = x4 + x6 ; ; radix4: y3 = xl1_0 + xl0_1 radix2: y7 = x5 + x7 ; ; radix4: y6 = xl0_0 - xl1_1 radix2: y6 = x4 - x6 ; ; radix4: y7 = xl1_0 - xl0_1 radix2: y3 = x5 - x7 ; ;---------------------------------------------------------; ADD .1 A_xl0_0, B_xl1_1c, A_y2 SUB .1 A_xl1_0, B_xl0_1, A_y3 SUB .1 A_xl0_0, B_xl1_1c, A_y6 ADD .1 A_xl1_0, B_xl0_1, A_y7 ;---------------------------------------------------------; ; Swap y3, y7 if radix2 y2 = x4 + x6, y3 = x5 + x7, ; ; y6 = x4 - x6, y7 = x5 - x7 ; ;---------------------------------------------------------; MV .1 A_y3, A_temp[!A_r2] MV .1 A_y7, A_y3[!A_r2] MV .1 A_temp, A_y7 ;--------------------------------------------------------; ; Store using digit reversed index, bit reversed index ; ; and pointers p_y0,...p_y3 ; ;--------------------------------------------------------; STDW .D2T2 B_y1:B_y0, *B_p_y0[B_h4] STDW .D2T1 A_y3:A_y2, *B_p_y1[B_h4] STDW .D2T2 B_y5:B_y4, *B_p_y2[B_h4] STDW .D2T1 A_y7:A_y6, *B_p_y3[B_h4] ;---------------------------------------------------------; ; Decrement and branch back to LOOP_Z ; ;---------------------------------------------------------; BDEC .1 LOOP_Z, A_i .return .endproc* ======================================================================== ** End of file: DSP_fft32x32_p.sa ** ------------------------------------------------------------------------ ** Copyright (C) 2007 Texas Instruments, Incorporated. ** All Rights Reserved. ** ======================================================================== *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -