📄 fftspx_h.asm
字号:
CMPGTU.L2 B_stride, B_radix, B_while ;test for last pass
|| SHRU .S2 B_stride, 2, B_stride ;[6,0]stride=stride>>2
[B_while]B .S1 LOOP_WHILE ;}/* end while */
||[B_while]LDDW.D2T2 *B_ptr_x[0],B_xp1:B_xp0 ;[3,1] X[i]
||[B_while]MV .L2 B_ptr_x, B_x ;[3,1]x = ptr_x
|| MV .S2 B_ptr_x, B_x_ ;[6,0]x_ = ptr_x
[B_while]LDDW.D2T1 *++B_x_[B_stride],A_xh2p1:A_xh2p0;[4,1]x[N/4+i]
[B_while]LDDW.D2T2 *++B_x_[B_stride],B_xl1p1:B_xl1p0;[5,1]x[N/2+i]
||[B_while]MPYSU.M1X 12, B_stride, A_fft_jmp ;[4,0]fft_jmp=3*stride
[B_while]LDDW.D2T1 *++B_x_[B_stride],A_xl2p1:A_xl2p0;[6,1]x[3N/4+i]
||[B_while]ADDAH.D1 A_ptr_w, A_tw_offset,A_w0 ;[5,0]w=ptr_w+tw_offset
[B_while]MPYSU.M2 12, B_stride, B_fft_jmp ;[7,1]fft_jmp=stride*3
||[B_while]SUB.S1 A_n, 0, A_i ;[6,0]for(i=0; i < n){
||[B_while]ADD.L1 A_tw_offset,A_fft_jmp,A_tw_offset;[6,0]tw_offset+=fft_jmp
[B_while]STW.D2T1 A_tw_offset,*-B_SP[20] ;save tw_offset
;BRANCH OCCURS
CMPGTU.L2 B_radix, 4, B_return ;check for early exit
[B_return]B .S2 EARLY_EXIT ;early exit for cache
LDW .D2T2 *-B_SP[10], B_n_max ;restore n_max
|| LDW .D1T1 *-A_SP[11], A_offset ;restore offset
LDW .D2T2 *-B_SP[14], B_ptr_y ;restore ptr_y
|| LDW .D1T1 *-A_SP[13], A_brev ;restore brev table
NOP 3 ;pipeline latency
NOP 1 ;pipeline latency
* ====================== SYMBOLIC REGISTER ASSIGNMENTS ======================= *
.asg A4, A_n ;number of points in transform
.asg B4, B_ptr_x ;pntr to in data partial transform
.asg B6, B_ptr_y ;pntr to final output data
.asg A8, A_brev ;pointer to bit reverse table
.asg B8, B_radix ;smallest butterfly size radix=2,4
.asg A10, A_offset ;index into main fft array
.asg B10, B_n_max ;maximuk size of all ffts
.asg A2, A_r2 ;condition whether radix2
.asg B8, B_x ;pointer to data
.asg A7, A_x ;pointer to data
.asg A11, A_y0 ;even output data pointer
.asg B5, B_y0 ;odd output data pointer
.asg B12, B_n2 ;n/4
.asg A6, A_n2 ;copy of n2
.asg B2, B_i ;loop counter
.asg B9, B_l0 ;shift for index bit reverse
.asg A1, A_pro ;prolog counter
.asg A9, A_j ;index into data
.asg A12, A_cx3f ;mask for bit reverse calc.
.asg A3, A_j0 ;lower 6 bits of j
.asg A5, A_j1 ;upper 6 bits of j
.asg A0, A_k0 ;reversed j0
.asg A10, A_k1 ;reversed j1
.asg B4, B_k0_ ;k0 << 6
.asg B4, B_k_ ;k0_ + k1
.asg B6, B_k ;k_ >> l0
.asg A3, A_k ;copy of k
.asg A5, A_ptr_y0 ;copy of A_y0
.asg B7, B_ptr_y1 ;copy of B_y0
.asg B11, B_x1 ;partial sums
.asg B10, B_x0 ;partial sums
.asg A15, A_x3 ;partial sums
.asg A14, A_x2 ;partial sums
.asg B1, B_x5 ;partial sums
.asg B0, B_x4 ;partial sums
.asg A13, A_x7 ;partial sums
.asg A12, A_x6 ;partial sums
.asg B1, B_xh0_0 ;partial sums
.asg B7, B_xh1_0 ;partial sums
.asg A4, A_xh0_1 ;partial sums
.asg A13, A_xh1_1 ;partial sums
.asg A12, A_yt0 ;partial sums
.asg A5, A_yt1 ;partial sums
.asg B3, B_yt4 ;partial sums
.asg B6, B_yt5 ;partial sums
.asg B3, B_xl0_0 ;partial sums
.asg B4, B_xl1_0 ;partial sums
.asg A5, A_xl0_1 ;partial sums
.asg A14, A_xl1_1 ;partial sums
.asg A3, A_yt2 ;partial sums
.asg B4, B_yt3 ;partial sums
.asg B5, B_yt6 ;partial sums
.asg A4, A_yt7 ;partial sums
* ============================ PIPE LOOP PROLOG ============================== *
SHRU .S1 A_offset, 8, A_j1 ;[ 1,1]j1 =offset >> 8
|| NORM .L2 B_n_max, B_l0 ;[ 2,0]l0=norm(n_max)
LDBU .D1T1 *A_brev[A_j1], A_k1 ;[ 2,1]k1=brev[j1]
|| SHRU .S1 A_offset, 2, A_j ;[ 4,0]j=offset>>2
MVK .S1 03Fh, A_cx3f ;[ 2,1]const = 0x3f
AND .S1 A_j, A_cx3f, A_j0 ;[ 3,1]j0 = lo6bit of j
LDBU .D1T1 *A_brev[A_j0], A_k0 ;[ 5,1]k0 = brev[j0]
ADD .L2 B_l0, -16, B_l0 ;[ 3,0]l0 -=16
|| MV .S1X B_ptr_y, A_y0 ;[ 3,0]y0 = ptr_y
ADD .S2 B_l0, -1, B_l0 ;[ 4,0]l0=norm(n) - 17
|| ADD .L1X B_ptr_x, 8, A_x ;[ 4,0]x=ptr_x+1
MVK .S1 1, A_pro ;[ 5,0]pro = 1
|| ADD .L2X A_n, 4, B_i ;[ 5,0]for(i=0;i<n;){
|| SHRU .S2 B_n_max, 1, B_n2 ;[ 5,0]n2= n/2
|| MV .D2 B_ptr_x, B_x ;[ 5,0]x=ptr_x
|| SUB .L1X B_radix, 2, A_r2 ;[ 5,0]test if radix2
* ============================ PIPE LOOP KERNEL ============================== *
LOOP_FOR_B:
MV .S1X B_n2, A_n2 ;[21,1]copy n2
||[!A_r2]MV .S2 B_x1, B_xh1_0 ;[21,1]if(!r2)xh1_0=x1
||[!A_r2]MV .D1 A_x7, A_xl0_1 ;[21,1]if(!r2)xl0_1=x7
|| LDDW .D2T2 *B_x++[2], B_x1:B_x0 ;[ 9,2]X[i]
ADDSP .L1X B_xl1_0, A_xl0_1, A_yt7 ;[22,1]yt7=xl1_0+xl0_1
||[!A_r2]MV .S2 B_x0, B_xh0_0 ;[22,1]if(!r2)xh0_0=x0
|| SUBSP .L2X B_xl1_0, A_xl0_1, B_yt3 ;[22,1]yt3=xl1_0-xl0_1
||[!A_r2]MV .S1 A_x3, A_xh1_1 ;[22,1]if(!r2)xh1_1=x3
|| LDDW .D1T1 *A_x++[2], A_x3:A_x2 ;[10,2]X[i+2]
[!A_r2]ADDSP.L2X B_xl1_0, A_xl0_1, B_yt3 ;[23,1](!r2)yt3=xl1_0+xl0_1
|| ADDSP .L1X B_xh1_0, A_xh1_1, A_yt1 ;[23,1]yt1=xh1_0+xh1_1
||[!A_r2]MV .S1 A_x2, A_xh0_1 ;[23,1]if(!r2)xh0_1=x2
|| ADD .D1 A_j, 1, A_j ;[11,2] j+=1
|| LDDW .D2T2 *B_x++[2], B_x5:B_x4 ;[11,2]X[i+3]
[!A_r2]MV .S1 A_x6, A_xl1_1 ;[24,1]if(!r2)xl1_1=x6
||[!A_r2]MV .D2 B_x4, B_xl0_0 ;[24,1]if(!r2)xl0_0=x4
|| ADDSP .L1X B_xh0_0, A_xh0_1, A_yt0 ;[24,1]yt0=xh0_0+xh0_1
|| SUBSP .L2X B_xh0_0, A_xh0_1, B_yt4 ;[24,1]yt4=xh0_0-xh0_1
|| LDDW .D1T1 *A_x++[2], A_x7:A_x6 ;[12,2]X[i+4]
SUBSP .L2X B_xl0_0, A_xl1_1, B_yt6 ;[25,1]yt6=xl0_0-xl1_1
||[!A_r2]SUBSP.L1X B_xl1_0, A_xl0_1, A_yt7 ;[25,1](!r2)yt7=xl1_0-xl0_1
|| SHRU .S1 A_j, 6, A_j1 ;[ 1,3]j1 = j >> 6
SUB .S2 B_i, 4, B_i ;[26,1]i+= 4
|| ADDAW .D2 B_y0, B_k, B_ptr_y1;[26,1]ptr_y1 = y0+k
|| ADDSP .L1X B_xl0_0, A_xl1_1, A_yt2 ;[26,1]yt2=xl1_0+xl1_1
|| SUBSP .L2X B_xh1_0, A_xh1_1, B_yt5 ;[26,1]yt5=xh1_0-xh1_1
|| LDBU .D1T1 *A_brev[A_j1], A_k1 ;[ 2,3]k1 = brev[j1]
|| MVK .S1 03Fh, A_cx3f ;[ 2,3]const = 0x3f
ADDAW .D1 A_y0, A_k, A_ptr_y0;[27,1]ptr_y0 = y0 + k
||[!A_pro]STW .D2T1 A_yt1, *B_ptr_y1++[B_n2] ;[27,1]store x[1]
||[ B_i]B .S2 LOOP_FOR_B ;[27,1]} /* end for */
||[ B_i]AND .S1 A_j, A_cx3f, A_j0 ;[ 3,3]j0 - j & 0x3f
[!A_pro]STW .D2T2 B_yt3, *B_ptr_y1++[B_n2] ;[28,1]store x[3]
||[!A_pro]STW .D1T1 A_yt0, *A_ptr_y0++[A_n2] ;[28,1]store x[0]
||[ B_i]SHL .S2X A_k0, 6, B_k0_ ;[16,2]k0_ = k0 << 6
||[ B_i]SUBSP .L2 B_x1, B_x5, B_xl1_0 ;[16,2]xl1_0=x[1]+x[5]
[ B_i]ADD .S2X B_k0_, A_k1, B_k_ ;[17,2]k_=k0_+k1
||[ B_i]SUBSP .L1 A_x2, A_x6, A_xl0_1 ;[17,2]xl0_1=x[2]-x[6]
||[ B_i]ADDSP .L2 B_x1, B_x5, B_xh1_0 ;[17,2]xh1_0=x[1]+x[5]
||[ B_i]LDBU .D1T1 *A_brev[A_j0], A_k0 ;[ 5,3]k0=brev[j0]
[!A_pro]STW .D1T1 A_yt2, *A_ptr_y0++[A_n2] ;[30,1]store x[2]
||[!A_pro]STW .D2T2 B_yt5, *B_ptr_y1++[B_n2] ;[30,1]store x[5]
||[ B_i]ADDSP .L1 A_x3, A_x7, A_xh1_1 ;[18,2]xh1_1=x[3]+x[7]
||[ B_i]ADDSP .L2 B_x0, B_x4, B_xh0_0 ;[18,2]xh0_0=x[0]+x[4]
||[ B_i]SHRU .S2 B_k_, B_l0, B_k ;[18,2]k=k_ >> l0
[!A_pro]STW .D2T1 A_yt7, *B_ptr_y1[0] ;[31,1]store x[7]
||[!A_pro]STW .D1T2 B_yt4, *A_ptr_y0++[A_n2] ;[31,1]store x[4]
||[ B_i]MV .S1X B_k, A_k ;[19,2]duplicate k
||[ B_i]ADDSP .L1 A_x2, A_x6, A_xh0_1 ;[19,2]xh0_1=x[2]+x[6]
[ B_i]ZERO .S1 A_pro ;[32,1]pro = 0
||[!A_pro]STW .D1T2 B_yt6, *A_ptr_y0[0] ;[32,1]store x[6]
||[ B_i]ADD .S2X A_y0, 4, B_y0 ;[20,2]y0 += 1
||[ B_i]SUBSP .L2 B_x0, B_x4, B_xl0_0 ;[20,2]xl0_0=x[0]-x[4]
||[ B_i]SUBSP .L1 A_x3, A_x7, A_xl1_1 ;[20,2]xl1_1=x[3]-x[7]
||[!A_r2]MV .D2 B_x5, B_xl1_0 ;[20,2]if(!r2)xl1_0=x5
* ============================ PIPE LOOP EPILOG ============================== *
EARLY_EXIT:
MV .L1X B_SP, A_SP ;copy stack pointer
|| LDW .D2T2 *-B_SP[19], B3 ;restore B3
LDW .D2T1 *-B_SP[1], A15 ;restore A15
|| LDW .D1T2 *-A_SP[18], B_csr ;load original CSR
LDW .D2T2 *-B_SP[2], B14 ;restore B14
|| LDW .D1T1 *-A_SP[3], A14 ;restore A14
LDW .D2T2 *-B_SP[4], B13 ;restore B13
|| LDW .D1T1 *-A_SP[5], A13 ;restore A13
LDW .D2T2 *-B_SP[6], B12 ;restore B12
|| LDW .D1T1 *-A_SP[7], A12 ;restore A12
LDW .D2T2 *-B_SP[8], B11 ;restore B11
|| LDW .D1T1 *-A_SP[9], A11 ;restore A11
|| B .S2 B3 ;return to caller
LDW .D2T2 *-B_SP[10], B10 ;restore B10
|| LDW .D1T1 *-A_SP[11], A10 ;restore A10
MVC .S2 B_csr, CSR ;interuptabilty restored
NOP 3 ;wait for branch delay
;BRANCH OCCURS HERE
* ============================================================================ *
* End of fftSPxSP assembly code *
*==============================================================================*
* Copyright (C) 1997-2000 Texas Instruments Incorporated. *
* All Rights Reserved *
*==============================================================================*
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -