📄 fft.asm
字号:
ADDSUB2 .L1 A_xh2_1_xh2_0, A_xl2_1_xl2_0, A_xh21_0_xh20_0:A_xl21_0_xl20_0 ;[9,1]
PACKLH2 .S1 A_xl21_0_xl20_0, A_xl21_0_xl20_0, A_xl20_0_xl21_0 ;[10,1]
|| ADDSUB2 .L1 A_xh2_3_xh2_2, A_xl2_3_xl2_2, A_xh21_1_xh20_1:A_xl21_1_xl20_1 ;[10,1]
|| ADDSUB2 .L2 B_x_3_x_2, B_xl1_3_xl1_2, B_xh1_1_xh0_1:B_xl1_1_xl0_1 ;[10,1]
PACKLH2 .S1 A_xl21_1_xl20_1, A_xl21_1_xl20_1, A_xl20_1_xl21_1 ;[11,1]
|| ADDSUB2 .L2 B_x_1_x_0, B_xl1_1_xl1_0, B_xh1_0_xh0_0:B_xl1_0_xl0_0 ;[11,1]
* ================================ STAGE III ============================== *
ADDSUB2 .L2X B_xl1_0_xl0_0, A_xl20_0_xl21_0, B_yt2_0_xt1_0:B_yt1_0_xt2_0 ;[12,1]
|| SUB2 .L1X A_xh21_1_xh20_1, B_xh1_1_xh0_1, A_yt0_1_xt0_1 ;[12,1]
DPACKX2 .L2 B_yt1_0_xt2_0, B_yt2_0_xt1_0, B_xt1_0_yt1_0:B_xt2_0_yt2_0 ;[13,1]
|| SUB2 .L1X A_xh21_0_xh20_0, B_xh1_0_xh0_0, A_yt0_0_xt0_0 ;[13,1]
|| CMPYR1 .M1 A_co21_si21, A_yt0_1_xt0_1, A_xl1_3_2 ;[13,1]
ADDSUB2 .L2X B_xl1_1_xl0_1, A_xl20_1_xl21_1, B_yt2_1_xt1_1:B_yt1_1_xt2_1 ;[14,1]
|| MVD .M1X B_x_, A_x ;[14,1]
DPACKX2 .L2 B_yt1_1_xt2_1, B_yt2_1_xt1_1, B_xt1_1_yt1_1:B_xt2_1_yt2_1 ;[15,1]
|| ADD2 .S2X B_xh1_0_xh0_0, A_xh21_0_xh20_0, B_x_1o_x_0o ;[15,1]
|| CMPYR1 .M2 B_co30_si30, B_xt2_0_yt2_0, B_x_l2_0_1 ;[15,1] si = -si
|| CMPYR1 .M1X A_co10_si10, B_xt1_0_yt1_0, A_x_h2_0_1 ;[15,1] si = -si
ADD2 .S2X B_xh1_1_xh0_1, A_xh21_1_xh20_1, B_x_3o_x_2o ;[16,1]
|| CMPYR1 .M2 B_co31_si31, B_xt2_1_yt2_1, B_x_l2_2_3 ;[16,1]
|| CMPYR1 .M1 A_co20_si20, A_yt0_0_xt0_0, A_xl1_1_0 ;[16,1] si = -si
CMPYR1 .M1X A_co11_si11, B_xt1_1_yt1_1, A_x_h2_2_3 ;[17,1]
* ============================== STAGE IV ================================= *
NOP 1 ;[18,1]
ROTL .M2 B_x_l2_0_1, 16, B_xl2_1_0 ;[19,1]
STDW .D1T1 A_xl1_3_2:A_xl1_1_0, *A_x[A_h2] ;[20,1]
|| PACKLH2 .S1 A_x_h2_0_1, A_x_h2_0_1, A_xh2_1_0 ;[20,1]
|| ROTL .M2 B_x_l2_2_3, 16, B_xl2_3_2 ;[20,1]
STDW .D1T2 B_x_3o_x_2o:B_x_1o_x_0o, *A_x[0] ;[21,1]
|| PACKLH2 .S1 A_x_h2_2_3, A_x_h2_2_3, A_xh2_3_2 ;[21,1]
STDW .D1T2 B_xl2_3_2:B_xl2_1_0, *A_x[A_l2] ;[22,1]
STDW .D1T1 A_xh2_3_2:A_xh2_1_0, *A_x[A_l1] ;[23,1]
* ============================== STAGE V ================================== *
NOP 5 ;extend schedule to allow +1 prolog stage
SPKERNELR ;delay 0 cycles before reloading
* ========================================================================= *
* ========================================================================= *
LOOP_WHILE:
SHRU .S2 B_l1, 2, B_l1 ;paM
|| SUB .S1 A_fft_jmp1, 3, A_fft_jmp_3 ;paM
SPMASK
||^ MV .S2X A_ptr_x, B_x ;[1,1]bump ADD B_x, 8, B_x
SPMASK
|| SHRU .S2 B_h2, 2, B_h2 ;paM
NOP 1
SPMASK
||^ LDDW .D2T1 *B_x[B_l2_], A_xl2_3_xl2_2:A_xl2_1_xl2_0 ;[4,1]bump B_l2
SPMASK
||^ MV .L1 A_fft_jmp_3, A_ifj ;[5,1]bump SUB A_fft_jmp_3, A_j, A_ifj
||^ MPY .M2X 0, A_j, B_j ;[5,1]bump ROTL A_j, 0, B_j
|| MV .S2 B_l2_, B_l2 ;paM
* ========================================================================= *
SPMASK
||^ LDDW .D1T1 *++A_w1[A_fft_jmp], A_co21_si21:A_co20_si20 ;[ 6,1]bump
|| ADD .S2 B_w2, B_fft_jmp, B_w2 ;pbM
|| SSHVR .M1 A_fft_jmp, A_c2, A_fft_jmp ;pbM
|| SHRU .S1 A_fft_jmp1, 2, A_fft_jmp1 ;pbM
SPMASK
||^ MVK .S1 3, A_j ;[ 7,1] bumped A_j = 3+ A_j
||^ LDDW .D2T2 *B_w2[0], B_co31_si31:B_co30_si30 ;[ 7,1] bumped LDDW *B_w2[B_j]
||^ LDDW .D1T1 *++A_w0[A_fft_jmp], A_co11_si11:A_co10_si10 ;[ 7,1] bumped
SHRU .S2 B_l2_, 2, B_l2_ ;pbM
NOP 2
SHRU .S2 B_fft_jmp, 2, B_fft_jmp ;pbM
* ========================================================================= *
SMPY32 .M1 A_h2, A_shr2, A_h2 ;pcM
||[!B_wh]BNOP .S2 EARLY, 5 ;pcM skip early
* ========================================================================= *
[B_wh]BDEC .S2 LOOP_WHILE, B_wh ;pdM
|| SSHVR .M1 A_l2, A_c2, A_l2 ;pdM
|| SHRU .S1 A_l1, 2, A_l1 ;pdM
NOP 5
;BRANCH OCCURS HERE
EARLY:
* =============== LOOP 2 SYMBOLIC REGISTER ASSIGNMENTS ================== *
.asg A6, A_x0
.asg B20, B_x0
.asg A17, A_x1
.asg B21, B_x1
.asg B6, B_y0
.asg B23, B_y1
.asg B24, B_y2
.asg B25, B_y3
.asg A29, A_x3x2
.asg A28, A_x1x0
.asg B27, B_x7x6
.asg B26, B_x5x4
.asg A18, A_x7x6
.asg A31, A_xbxa
.asg A30, A_x9x8
.asg B31, B_xfxe
.asg B30, B_xdxc
.asg A24, A_xfxe
.asg A25, A_xl1_0_xl0_0
.asg A29, A_xl1_1_xl0_1
.asg A27, A_xl1_2_xl0_2
.asg A28, A_xl1_3_xl0_3
.asg A19, A_zero
.asg A31, A_xl0_1_xl1_1
.asg A29, A_mx7mx6
.asg A26, A_xl0_3_xl1_3
.asg A31, A_mxfmxe
.asg A25, A_yt7_yt2
.asg A24, A_yt3_yt6
.asg A27, A_ytf_yta
.asg A26, A_ytb_yte
.asg A22, A_y3y2
.asg A26, A_y7y6
.asg A23, A_ybya
.asg A27, A_yfye
.asg B26, B_xh1_0_xh0_0
.asg B27, B_xh1_1_xh0_1
.asg B19, B_xh1_2_xh0_2
.asg B18, B_xh1_3_xh0_3
.asg B28, B_y1y0
.asg B28, B_y5y4
.asg B29, B_y9y8
.asg B29, B_ydyc
.asg A16, A_2n
* =========================== PIPE LOOP PROLOG ============================ *
SPLOOPD 6 ;II = 6
|| MVC .S2 B_i, ILC ;setup loop count
|| ADD .L2X A_ptr_x, 8, B_x0 ;
|| SHL .S1X B_n, 1, A_2n ;pcM 2nd loop prolog
* ================================ STAGE 1 =============================== *
SPMASK
||^ ADDAH .D2 B_x0, B_n, B_x1 ;prolog
||^ ADD .L1 A_x0, A_2n, A_x1 ;prolog
||^ ADD .S2 B_y0, B_n, B_y1 ;prolog
||^ NORM .L2 B_n, B_l1 ;prolog
SPMASK
|| LDDW .D2T2 *B_x1++[2], B_xfxe:B_xdxc ;[ 1,1]
|| LDDW .D1T1 *A_x0++[2], A_x3x2:A_x1x0 ;[ 1,1]
||^ ADD .L2 B_y1, B_n, B_y2 ;prolog
||^ ADD .S2 B_l1, 5, B_l1 ;prolog
||^ ZERO .L1 A_j ;prolog code
SPMASK
|| LDDW .D1T1 *A_x1++[2], A_xbxa:A_x9x8 ;[ 2,1]
|| LDDW .D2T2 *B_x0++[2], B_x7x6:B_x5x4 ;[ 2,1]
||^ ADD .L2 B_y2, B_n, B_y3 ;prolog code
NOP 2 ;
ZERO .L1 A_zero ;[ 5,1]
|| ADD .S1 A_j, 1, A_j ;[ 5,1]
|| BITR .M2X A_j, B_h2 ;[ 5,1]
* ================================ STAGE 2 =============================== *
NOP 1
ADD2 .S2X B_xdxc, A_x9x8, B_xh1_2_xh0_2 ;[ 7,1]
|| ROTL .M1X B_x7x6, 0, A_x7x6 ;[ 7,1]
ADD2 .S2X B_x7x6, A_x3x2, B_xh1_1_xh0_1 ;[ 8,1]
|| ROTL .M1X B_xfxe, 0, A_xfxe ;[ 8,1]
ADD2 .L2X B_xfxe, A_xbxa, B_xh1_3_xh0_3 ;[ 9,1]
|| SUB2 .D1 A_x3x2, A_x7x6, A_xl1_1_xl0_1 ;[ 9,1]
|| SUB2 .L1X A_x1x0, B_x5x4, A_xl1_0_xl0_0 ;[ 9,1]
SUB2 .S2 B_xh1_2_xh0_2, B_xh1_3_xh0_3, B_ydyc ;[10,1]
|| ADD2 .L2X B_x5x4, A_x1x0, B_xh1_0_xh0_0 ;[10,1]
|| SUB2 .D1 A_zero, A_xfxe, A_mxfmxe ;[10,1]
|| SUB2 .L1 A_zero, A_x7x6, A_mx7mx6 ;[10,1]
|| SUB2 .S1 A_xbxa, A_xfxe, A_xl1_3_xl0_3 ;[10,1]
|| ROTL .M1 A_xl1_1_xl0_1, 16, A_xl0_1_xl1_1 ;[10,1]
SUB2 .L2 B_xh1_0_xh0_0, B_xh1_1_xh0_1, B_y5y4 ;[11,1]
|| SUB2 .D1X A_x9x8, B_xdxc, A_xl1_2_xl0_2 ;[11,1]
|| SHRU .S2 B_h2, B_l1, B_l2 ;[11,1]
|| ROTL .M1 A_xl1_3_xl0_3, 16, A_xl0_3_xl1_3 ;[11,1]
* ================================ STAGE 3 =============================== *
STDW .D2T2 B_ydyc:B_y5y4, *B_y2[B_l2] ;[12,1]
|| ADD2 .L2 B_xh1_2_xh0_2, B_xh1_3_xh0_3, B_y9y8 ;[12,1]
|| ADD2 .S2 B_xh1_0_xh0_0, B_xh1_1_xh0_1, B_y1y0 ;[12,1]
|| ADDSUB2 .L1 A_xl1_0_xl0_0, A_xl0_1_xl1_1, A_yt7_yt2:A_yt3_yt6 ;[12,1]
PACKHL2 .S1 A_yt3_yt6, A_yt7_yt2, A_y3y2 ;[13,1]
|| ADDSUB2 .L1 A_xl1_2_xl0_2, A_xl0_3_xl1_3, A_ytf_yta:A_ytb_yte ;[13,1]
PACKHL2 .L1 A_ytf_yta, A_ytb_yte, A_yfye ;[14,1]
|| PACKHL2 .S1 A_ytb_yte, A_ytf_yta, A_ybya ;[14,1]
STDW .D2T1 A_ybya:A_y3y2, *B_y1[B_l2] ;[15,1]
|| PACKHL2 .S1 A_yt7_yt2, A_yt3_yt6, A_y7y6 ;[15,1]
STDW .D2T2 B_y9y8:B_y1y0, *B_y0[B_l2] ;[16,1]
SPKERNEL 1,1
|| STDW .D2T1 A_yfye:A_y7y6, *B_y3[B_l2] ;[17,1]
******************************************************************************
**** Overlap with Epilogue E1, C1
**** E1, C1 **** .S1, .L1 ****************************************************
BNOP B3
|| LDDW .D2T1 *+B_SP[0], A11:A10 ; Restore A11:A10
**** E1, C2 **** .S1, .L1 ****************************************************
LDW .D2T2 *++B_SP[2], B10 ; Reset SP, Restore B10
**** E1, C3 **** .S1, .D2T1 **************************************************
NOP 4
**** E1, C4 **** .D2T2 *******************************************************
**** E1, C5 **** .D2T1 *******************************************************
.end
* ======================================================================== *
* End of file: fft.asm *
* ------------------------------------------------------------------------ *
* Copyright (C) 2005 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ======================================================================== *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -