📄 ffts.s
字号:
MOV Zr,Zr,ASR #1 ; scale results
MOV Zi,Zi,ASR #1
STMIA Zadr,{Zr,Zi} ; store new values for next stage
ADD Yr,Yr,Tr,ASR #14 ; Y = Y+T
ADD Yi,Yi,Ti,ASR #14 ; ASR #14 compensates for
; fixed-point cos & sin
MOV Yr,Yr,ASR #1
MOV Yi,Yi,ASR #1
STMIA Yadr,{Yr,Yi}
ADD Yadr,Yadr,GrpStage,LSL #4 ; prepare for next calculation
; of this group
SUBS CalcsLoop,CalcsLoop,#1
BNE CALCLOOP1 ; end of CALCULATIONS loop
; now for pi/2-theta (cos and sin swapped)
LDMIA PDataAr,{Yadr, CalcsLoop, StartElem} ; retrieve stored values
RSB Tp, StartElem, GrpStage,LSR#1
ADD Yadr,Yadr,Tp,LSL #3 ; address of first calculation
CALCLOOP2 ; start of calculations loop
ADD Zadr,Yadr,GrpStage,LSL #3 ; calculate address of element Z
LDMIA Yadr,{Yr,Yi} ; load element Y
LDMIA Zadr,{Zr,Zi} ; load element Z
MUL Tr,Zr,Sin ; t=complex_mult(k,cis(theta))
MLA Tr,Zi,Cos,Tr
MUL Tp,Zr,Cos
MUL Ti,Zi,Sin
SUB Ti,Ti,Tp
SUB Zr,Yr,Tr,ASR #14 ; Z = Y-T
SUB Zi,Yi,Ti,ASR #14
MOV Zr,Zr,ASR #1 ; scale results
MOV Zi,Zi,ASR #1
STMIA Zadr,{Zr,Zi} ; store new values for next stage
ADD Yr,Yr,Tr,ASR #14 ; Y = Y+T
ADD Yi,Yi,Ti,ASR #14 ; ASR #14 compensates for
; fixed-point cos & sin
MOV Yr,Yr,ASR #1
MOV Yi,Yi,ASR #1
STMIA Yadr,{Yr,Yi}
ADD Yadr,Yadr,GrpStage,LSL #4 ; prepare for next calculation
; of this group
SUBS CalcsLoop,CalcsLoop,#1
BNE CALCLOOP2 ; end of CALCULATIONS loop
; now for pi/2+theta (cos negated but sin the same)
LDMIA PDataAr,{Yadr, CalcsLoop, StartElem} ; retrieve stored values
ADD Tp, StartElem, GrpStage,LSR#1
ADD Yadr,Yadr,Tp,LSL #3 ; of first calculation
CALCLOOP3 ; start of calculations loop
ADD Zadr,Yadr,GrpStage,LSL #3 ; calculate address of element Z
LDMIA Yadr,{Yr,Yi} ; load element Y
LDMIA Zadr,{Zr,Zi} ; load element Z
MUL Tp,Zr,Sin ; t=complex_mult(k,cis(theta))
MUL Tr,Zi,Cos
SUB Tr,Tr,Tp
MUL Tp,Zr,Cos
MLA Ti,Zi,Sin,Tp ; Ti is now minus what it should be
SUB Zr,Yr,Tr,ASR #14 ; Z = Y-T
ADD Zi,Yi,Ti,ASR #14
MOV Zr,Zr,ASR #1 ; scale results
MOV Zi,Zi,ASR #1
STMIA Zadr,{Zr,Zi} ; store new values for next stage
ADD Yr,Yr,Tr,ASR #14 ; Y = Y+T
SUB Yi,Yi,Ti,ASR #14 ; ASR #14 compensates for
; fixed-point cos & sin
MOV Yr,Yr,ASR #1
MOV Yi,Yi,ASR #1
STMIA Yadr,{Yr,Yi}
ADD Yadr,Yadr,GrpStage,LSL #4 ; prepare for next calculation
; of this group
SUBS CalcsLoop,CalcsLoop,#1
BNE CALCLOOP3 ; end of CALCULATIONS loop
; now for pi-theta (cos and sin swapped and both now negative)
LDMIA PDataAr,{Yadr, CalcsLoop, StartElem} ; retrieve stored values
RSB Tp, StartElem, GrpStage
ADD Yadr,Yadr,Tp,LSL #3 ; of first calculation
CALCLOOP4 ; start of calculations loop
ADD Zadr,Yadr,GrpStage,LSL #3 ; calculate address of element Z
LDMIA Yadr,{Yr,Yi} ; load element Y
LDMIA Zadr,{Zr,Zi} ; load element Z
MUL Tp,Zr,Cos ; t=complex_mult(k,cis(theta))
MUL Tr,Zi,Sin
SUB Tr,Tr,Tp
MUL Tp,Zr,Sin
MLA Ti,Zi,Cos,Tp ; Ti is now minus what it should be
SUB Zr,Yr,Tr,ASR #14 ; Z = Y-T
ADD Zi,Yi,Ti,ASR #14
MOV Zr,Zr,ASR #1 ; scale results
MOV Zi,Zi,ASR #1
STMIA Zadr,{Zr,Zi} ; store new values for next stage
ADD Yr,Yr,Tr,ASR #14 ; Y = Y+T
SUB Yi,Yi,Ti,ASR #14 ; ASR #14 compensates for
; fixed-point cos & sin
MOV Yr,Yr,ASR #1
MOV Yi,Yi,ASR #1
STMIA Yadr,{Yr,Yi}
ADD Yadr,Yadr,GrpStage,LSL #4 ; prepare for next calculation
; of this group
SUBS CalcsLoop,CalcsLoop,#1
BNE CALCLOOP4 ; end of CALCULATIONS loop
; done the four groups
LDMIB PDataAr,{CalcsLoop,StartElem,Tab}
ADD StartElem, StartElem,#1 ; next group starts one element later
ADD Tab, Tab, TabStep ; increase angle wk
CMP StartElem, GrpStage, LSR#2 ; doing four groups per group loop
BLT GRPLOOP ; do next set of four groups
MOV GrpStage, GrpStage,LSL #1 ; next stage has twice as many
; groups per stage
MOV CalcsLoop, CalcsLoop,LSR #1 ; next stage has half as many
; calculations per group
MOV TabStep, TabStep, LSR #1 ; halve the angle increment
LDR Tp,[PDataAr,#MN]
CMP GrpStage, Tp ; compare n/2 with N
BLT STGLOOP ; go around stages loop again
]
;==========================================================================
; Finished the forward FFT
[ INVERSE = 1 ; inverse transform supported
[ FORWARD = 1 ; both supported
LDR R0,[PDataAr,#MDirection]
TST R0,#1 ; test which it is
BNE fft_done ; finished forward transform
]
; for inverse transform, now take the complex conjugate of the output data
CJ_cnt RN 0 ; count of data elements as they are conjugated
CJ_pt RN 1 ; pointer to data as elements are conjugated
CJ_dat RN 2 ; value being negated
LDR CJ_cnt,[PDataAr,#MN]
LDR CJ_pt,[PDataAr,#MOutAddr] ; start at the beginning
ADD CJ_pt,CJ_pt,#4
CNJLOOP
LDR CJ_dat,[CJ_pt] ; Load the imaginary value,
RSB CJ_dat,CJ_dat,#0 ; negate imaginary value,
STR CJ_dat,[CJ_pt],#8 ; and store.
SUBS CJ_cnt,CJ_cnt,#1
BNE CNJLOOP
]
;==========================================================================
; FFT is now complete, all we have to do now is restore the registers to
; the state they were at before we were called (we stored them away for safe
; keeping in "MSavedRegs", and return to the caller.
fft_done
MOV R0,#0 ; successful flag
ADD Tp,PDataAr,#MSavedRegs
RETURN "r4-r13", Tp ; return (rlist, sp=Tp, ignore lr, no condition)
;==========================================================================
; Perform an FFT on real data - at twice the speed
[ REALFFTS=1 ; real FFT's supported?
; Start the Real FFT code. At this point:
; R0 = address of the input buffer (2*N reals)
; R1 = address of the output buffer (for N complex's)
; R2 = LogN
; On Exit R0-R3 corrupt.
REALFFT
CMP R2,#TABLE_LOGN-1
MOVGT R0,#1 ; table not big enough
RETURN "","","",GT ; return (no rlist, no stack, use lr, greater than)
LDR R3,MPDataAr
STR R14,[R3,#MReturn] ; save the return address
MOV R3,#1 ; do a forward FFT
BL FFT ; do a forward FFT & save the registers
LDR PDataAr,MPDataAr ; get the data pointer in R14
; declare the register bindings we will use
RF_Xr RN 0 ; output real value
RF_Xi RN 1 ; output imaginary value
RF_R1 RN 2 ; input real R(n) value | (R(n)+R(N-n))
RF_R2 RN 3 ; input real R(N-n) value | (R(n)-R(N-n))/2
RF_I1 RN 4 ; input imaginary I(n) value | (I(n)+I(N-n))/2
RF_I2 RN 5 ; input imaginary I(N-n) value | (I(n)-I(N-n))
RF_Adr1 RN 6 ; Pointer to R(n)
RF_Adr2 RN 7 ; Pointer to R(N-n) +4
RF_Tab RN 8 ; Current trig table angle
; R9 free
RF_TabS RN 10 ; table angle step (pi/N)
; Tp is 11
; Cos is 12
; Sin is 13
; PDataAr is 14
LDR Tp,[PDataAr,#MN] ; number in FFT
LDR RF_Adr1,[PDataAr,#MOutAddr] ; address of the output buffer
ADD RF_Adr2,RF_Adr1,Tp,LSL#3 ; end of output buffer (8*N size)
LDMIA RF_Adr1,{RF_R1,RF_I1} ; get R(0) and I(0)
ADD RF_Xr,RF_R1,RF_I1
MOV RF_Xi,#0 ; always zero as we have real data
STMIA RF_Adr1!,{RF_Xr,RF_Xi} ; first result
MOV RF_Tab,#0 ; angle in multiples of 2pi/TABLE_N
MOV RF_TabS,#TABLE_N/2 ; angle of pi
LDR Tp,[PDataAr,#MLogN]
MOV RF_TabS,RF_TabS,LSR Tp ; angle of pi/N to increment on
RF_loop
; look up sin and cos (angle will be between 0 and pi/2 since we
; are working forwards from start and backwards from end at same time)
ADRL Tp,TRIG_TABLE ; find start of the trig table
CMP RF_Tab,#TABLE_N/8 ; compare with pi/4
LDRLE Cos,[Tp,RF_Tab,LSL#2]
MOVLE Sin,Cos,LSR#16
BICLE Cos,Cos,Sin,LSL#16 ; extract first 1/8th sin and cos
RSBGT Sin,RF_Tab,#TABLE_N/4 ; reflect about pi/4
LDRGT Sin,[Tp,Sin,LSL#2]
MOVGT Cos,Sin,LSR#16
BICGT Sin,Sin,Cos,LSL#16 ; get second 1/8th sin and cos
; process two entries in place - separate out
LDMIA RF_Adr1,{RF_R1,RF_I1} ; n'th value
LDMDB RF_Adr2,{RF_R2,RF_I2} ; N-n'th value
SUB Tp,RF_R1,RF_R2
ADD RF_R1,RF_R1,RF_R2
MOV RF_R1,RF_R1,ASR#2 ; (R(n)+R(N-n))/4
MOV RF_R2,Tp,ASR#1 ; (R(n)-R(N-n))/2
ADD Tp,RF_I1,RF_I2
SUB RF_I2,RF_I1,RF_I2
MOV RF_I2,RF_I2,ASR#2 ; (I(n)-I(N-n))/4
MOV RF_I1,Tp,ASR#1 ; (I(n)+I(N-n))/2
; mutliply by sin and cos
MUL RF_Xr,RF_I1,Cos
MUL RF_Xi,RF_R2,Sin
SUB Tp,RF_Xr,RF_Xi ; Tp=(Cos*I-Sin*R)<<14
MUL RF_Xr,RF_I1,Sin
MLA RF_I1,RF_R2,Cos,RF_Xr ; I1=(Sin*I+Cos*R)<<14
ADD RF_Xr,RF_R1,Tp,ASR#15
SUB RF_Xi,RF_I2,RF_I1,ASR#15
STMIA RF_Adr1!,{RF_Xr,RF_Xi} ; write out the first result
SUB RF_Xr,RF_R1,Tp,ASR#15
ADD RF_Xi,RF_I2,RF_I1,ASR#15
RSB RF_Xi,RF_Xi,#0
STMDB RF_Adr2!,{RF_Xr,RF_Xi} ; write out the second result
ADD RF_Tab,RF_Tab,RF_TabS ; increment angle
CMP RF_Adr1,RF_Adr2
BLT RF_loop ; some more left to do!
; finished
LDR R1,[PDataAr,#MReturn] ; recall the return address
MOV R0,#0 ; successful finish
ADD Tp,PDataAr,#MSavedRegs
LDMIA Tp,{R4-R13} ; recall the saved registers
RETURN "","",R1,"" ; return (no rlist, no stack, lr=r1, no condition)
]
;===========================================================================
; There follows a memory area used for temporary storage ...
AREA FFTDAT,DATA,NOINIT
; Declare offsets into the data area - the first four cannot be moved.
; Their order is VERY carefully chosen.
^ 0
MOutAddr # 4 ; address of the output buffer (X)
MCalcsLoop # 4 ; inital value of CalcsLoop counter (N/n)
MStartElem # 4 ; offset of current start element (k)
MTab # 4 ; current angle (wk)
MN # 4 ; size of the FFT (N)
; you can add extra ones from here on
MDirection # 4 ; saves the FFT direction
MLogN # 4 ; log (base 2) of FFT size
MSavedRegs # (11*4) ; R4-R14 = 11 registers
[ REALFFTS=1
MReturn # 4 ; return address for REALFFT
]
MDataArEnd # 0 ; record the size of the data area
DataArSt % MDataArEnd ; reserve the space
END
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -