📄 ffts.s

📁 ARM库里的FFT算法
💻 S
📖 第 1 页 / 共 4 页
字号:
上一页 1 2 34
  MOV    Zr,Zr,ASR #1                 ; scale results
  MOV    Zi,Zi,ASR #1
  STMIA  Zadr,{Zr,Zi}                 ; store new values for next stage
  ADD    Yr,Yr,Tr,ASR #14             ; Y = Y+T
  ADD    Yi,Yi,Ti,ASR #14             ; ASR #14 compensates for
                                      ; fixed-point cos & sin
  MOV    Yr,Yr,ASR #1
  MOV    Yi,Yi,ASR #1
  STMIA  Yadr,{Yr,Yi}
  ADD    Yadr,Yadr,GrpStage,LSL #4    ; prepare for next calculation
                                      ; of this group
  SUBS   CalcsLoop,CalcsLoop,#1
  BNE    CALCLOOP1                     ; end of CALCULATIONS loop

; now for pi/2-theta (cos and sin swapped)

  LDMIA  PDataAr,{Yadr, CalcsLoop, StartElem} ; retrieve stored values
  RSB	 Tp, StartElem, GrpStage,LSR#1
  ADD    Yadr,Yadr,Tp,LSL #3          ; address of first calculation
CALCLOOP2                             ; start of calculations loop
  ADD    Zadr,Yadr,GrpStage,LSL #3    ; calculate address of element Z
  LDMIA  Yadr,{Yr,Yi}                 ; load element Y
  LDMIA  Zadr,{Zr,Zi}                 ; load element Z

  MUL    Tr,Zr,Sin                    ; t=complex_mult(k,cis(theta))
  MLA    Tr,Zi,Cos,Tr
  MUL    Tp,Zr,Cos
  MUL    Ti,Zi,Sin
  SUB    Ti,Ti,Tp

  SUB    Zr,Yr,Tr,ASR #14             ; Z = Y-T
  SUB    Zi,Yi,Ti,ASR #14
  MOV    Zr,Zr,ASR #1                 ; scale results
  MOV    Zi,Zi,ASR #1
  STMIA  Zadr,{Zr,Zi}                 ; store new values for next stage
  ADD    Yr,Yr,Tr,ASR #14             ; Y = Y+T
  ADD    Yi,Yi,Ti,ASR #14             ; ASR #14 compensates for
                                      ; fixed-point cos & sin
  MOV    Yr,Yr,ASR #1
  MOV    Yi,Yi,ASR #1
  STMIA  Yadr,{Yr,Yi}
  ADD    Yadr,Yadr,GrpStage,LSL #4    ; prepare for next calculation
                                      ; of this group
  SUBS   CalcsLoop,CalcsLoop,#1
  BNE    CALCLOOP2                     ; end of CALCULATIONS loop

; now for pi/2+theta (cos negated but sin the same)

  LDMIA  PDataAr,{Yadr, CalcsLoop, StartElem} ; retrieve stored values
  ADD	 Tp, StartElem, GrpStage,LSR#1
  ADD    Yadr,Yadr,Tp,LSL #3          ; of first calculation
CALCLOOP3                             ; start of calculations loop
  ADD    Zadr,Yadr,GrpStage,LSL #3    ; calculate address of element Z
  LDMIA  Yadr,{Yr,Yi}                 ; load element Y
  LDMIA  Zadr,{Zr,Zi}                 ; load element Z

  MUL    Tp,Zr,Sin		      ; t=complex_mult(k,cis(theta))
  MUL    Tr,Zi,Cos
  SUB	 Tr,Tr,Tp
  MUL    Tp,Zr,Cos
  MLA    Ti,Zi,Sin,Tp		      ; Ti is now minus what it should be

  SUB    Zr,Yr,Tr,ASR #14             ; Z = Y-T
  ADD    Zi,Yi,Ti,ASR #14
  MOV    Zr,Zr,ASR #1                 ; scale results
  MOV    Zi,Zi,ASR #1
  STMIA  Zadr,{Zr,Zi}                 ; store new values for next stage
  ADD    Yr,Yr,Tr,ASR #14             ; Y = Y+T
  SUB    Yi,Yi,Ti,ASR #14             ; ASR #14 compensates for
                                      ; fixed-point cos & sin
  MOV    Yr,Yr,ASR #1
  MOV    Yi,Yi,ASR #1
  STMIA  Yadr,{Yr,Yi}
  ADD    Yadr,Yadr,GrpStage,LSL #4    ; prepare for next calculation
                                      ; of this group
  SUBS   CalcsLoop,CalcsLoop,#1
  BNE    CALCLOOP3                     ; end of CALCULATIONS loop

; now for pi-theta (cos and sin swapped and both now negative)

  LDMIA  PDataAr,{Yadr, CalcsLoop, StartElem} ; retrieve stored values
  RSB	 Tp, StartElem, GrpStage
  ADD    Yadr,Yadr,Tp,LSL #3          ; of first calculation
CALCLOOP4                             ; start of calculations loop
  ADD    Zadr,Yadr,GrpStage,LSL #3    ; calculate address of element Z
  LDMIA  Yadr,{Yr,Yi}                 ; load element Y
  LDMIA  Zadr,{Zr,Zi}                 ; load element Z

  MUL    Tp,Zr,Cos		      ; t=complex_mult(k,cis(theta))
  MUL    Tr,Zi,Sin
  SUB	 Tr,Tr,Tp
  MUL    Tp,Zr,Sin
  MLA    Ti,Zi,Cos,Tp		      ; Ti is now minus what it should be

  SUB    Zr,Yr,Tr,ASR #14             ; Z = Y-T
  ADD    Zi,Yi,Ti,ASR #14
  MOV    Zr,Zr,ASR #1                 ; scale results
  MOV    Zi,Zi,ASR #1
  STMIA  Zadr,{Zr,Zi}                 ; store new values for next stage
  ADD    Yr,Yr,Tr,ASR #14             ; Y = Y+T
  SUB    Yi,Yi,Ti,ASR #14             ; ASR #14 compensates for
                                      ; fixed-point cos & sin
  MOV    Yr,Yr,ASR #1
  MOV    Yi,Yi,ASR #1
  STMIA  Yadr,{Yr,Yi}
  ADD    Yadr,Yadr,GrpStage,LSL #4    ; prepare for next calculation
                                      ; of this group
  SUBS   CalcsLoop,CalcsLoop,#1
  BNE    CALCLOOP4                    ; end of CALCULATIONS loop

; done the four groups

  LDMIB  PDataAr,{CalcsLoop,StartElem,Tab}
  ADD    StartElem, StartElem,#1      ; next group starts one element later
  ADD    Tab, Tab, TabStep            ; increase angle wk
  CMP    StartElem, GrpStage, LSR#2   ; doing four groups per group loop
  BLT    GRPLOOP		      ; do next set of four groups

  MOV    GrpStage, GrpStage,LSL #1    ; next stage has twice as many
                                      ; groups per stage
  MOV    CalcsLoop, CalcsLoop,LSR #1  ; next stage has half as many
                                      ; calculations per group
  MOV	 TabStep, TabStep, LSR #1     ; halve the angle increment
  LDR	 Tp,[PDataAr,#MN]
  CMP	 GrpStage, Tp		      ; compare n/2 with N
  BLT    STGLOOP                      ; go around stages loop again

 ]

;==========================================================================
; Finished the forward FFT

 [ INVERSE = 1				; inverse transform supported

  [ FORWARD = 1          	        ; both supported
    LDR R0,[PDataAr,#MDirection]
    TST R0,#1				; test which it is
    BNE fft_done			; finished forward transform
  ]

 ; for inverse transform, now take the complex conjugate of the output data

CJ_cnt    RN  0           ; count of data elements as they are conjugated
CJ_pt     RN  1           ; pointer to data as elements are conjugated
CJ_dat    RN  2           ; value being negated

  LDR    CJ_cnt,[PDataAr,#MN]
  LDR	 CJ_pt,[PDataAr,#MOutAddr]  ; start at the beginning
  ADD    CJ_pt,CJ_pt,#4
CNJLOOP
  LDR    CJ_dat,[CJ_pt]             ; Load the imaginary value,
  RSB    CJ_dat,CJ_dat,#0           ; negate imaginary value,
  STR    CJ_dat,[CJ_pt],#8          ; and store.
  SUBS   CJ_cnt,CJ_cnt,#1
  BNE    CNJLOOP

 ]

;==========================================================================
; FFT is now complete, all we have to do now is restore the registers to
; the state they were at before we were called (we stored them away for safe
; keeping in "MSavedRegs", and return to the caller.

fft_done
  MOV    R0,#0			    ; successful flag
  ADD    Tp,PDataAr,#MSavedRegs
  RETURN "r4-r13", Tp	; return (rlist, sp=Tp, ignore lr, no condition)

;==========================================================================
; Perform an FFT on real data - at twice the speed

 [ REALFFTS=1			; real FFT's supported?

; Start the Real FFT code. At this point:
;  R0 = address of the input buffer (2*N reals)
;  R1 = address of the output buffer (for N complex's)
;  R2 = LogN
; On Exit R0-R3 corrupt.

REALFFT
  CMP	R2,#TABLE_LOGN-1
  MOVGT	R0,#1	; table not big enough
  RETURN "","","",GT	; return (no rlist, no stack, use lr, greater than)
  LDR	R3,MPDataAr
  STR	R14,[R3,#MReturn]	; save the return address
  MOV	R3,#1			; do a forward FFT
  BL	FFT			; do a forward FFT & save the registers
  LDR	PDataAr,MPDataAr	; get the data pointer in R14

; declare the register bindings we will use

RF_Xr	RN 0		; output real value
RF_Xi	RN 1		; output imaginary value
RF_R1	RN 2		; input real R(n) value        | (R(n)+R(N-n))
RF_R2	RN 3		; input real R(N-n) value      | (R(n)-R(N-n))/2
RF_I1	RN 4		; input imaginary I(n) value   | (I(n)+I(N-n))/2
RF_I2	RN 5		; input imaginary I(N-n) value | (I(n)-I(N-n))
RF_Adr1	RN 6		; Pointer to R(n)
RF_Adr2 RN 7		; Pointer to R(N-n) +4
RF_Tab  RN 8		; Current trig table angle
; R9 free
RF_TabS RN 10		; table angle step (pi/N)
; Tp is 11
; Cos is 12
; Sin is 13
; PDataAr is 14

  LDR	Tp,[PDataAr,#MN]		; number in FFT
  LDR	RF_Adr1,[PDataAr,#MOutAddr]	; address of the output buffer
  ADD	RF_Adr2,RF_Adr1,Tp,LSL#3	; end of output buffer (8*N size)
  LDMIA	RF_Adr1,{RF_R1,RF_I1}		; get R(0) and I(0)
  ADD	RF_Xr,RF_R1,RF_I1
  MOV	RF_Xi,#0			; always zero as we have real data
  STMIA	RF_Adr1!,{RF_Xr,RF_Xi}		; first result
  MOV	RF_Tab,#0			; angle in multiples of 2pi/TABLE_N
  MOV	RF_TabS,#TABLE_N/2		; angle of pi
  LDR	Tp,[PDataAr,#MLogN]
  MOV	RF_TabS,RF_TabS,LSR Tp		; angle of pi/N to increment on

RF_loop
  ; look up sin and cos (angle will be between 0 and pi/2 since we
  ; are working forwards from start and backwards from end at same time)
  ADRL  Tp,TRIG_TABLE			; find start of the trig table
  CMP   RF_Tab,#TABLE_N/8		; compare with pi/4
  LDRLE Cos,[Tp,RF_Tab,LSL#2]
  MOVLE Sin,Cos,LSR#16
  BICLE Cos,Cos,Sin,LSL#16		; extract first 1/8th sin and cos
  RSBGT Sin,RF_Tab,#TABLE_N/4		; reflect about pi/4
  LDRGT Sin,[Tp,Sin,LSL#2]
  MOVGT Cos,Sin,LSR#16
  BICGT Sin,Sin,Cos,LSL#16		; get second 1/8th sin and cos

  ; process two entries in place - separate out
  LDMIA	RF_Adr1,{RF_R1,RF_I1}		; n'th value
  LDMDB	RF_Adr2,{RF_R2,RF_I2}		; N-n'th value
  SUB	Tp,RF_R1,RF_R2
  ADD	RF_R1,RF_R1,RF_R2
  MOV	RF_R1,RF_R1,ASR#2		; (R(n)+R(N-n))/4
  MOV	RF_R2,Tp,ASR#1			; (R(n)-R(N-n))/2
  ADD	Tp,RF_I1,RF_I2
  SUB	RF_I2,RF_I1,RF_I2
  MOV	RF_I2,RF_I2,ASR#2		; (I(n)-I(N-n))/4
  MOV	RF_I1,Tp,ASR#1			; (I(n)+I(N-n))/2

  ; mutliply by sin and cos
  MUL	RF_Xr,RF_I1,Cos
  MUL	RF_Xi,RF_R2,Sin
  SUB	Tp,RF_Xr,RF_Xi			; Tp=(Cos*I-Sin*R)<<14
  MUL	RF_Xr,RF_I1,Sin
  MLA	RF_I1,RF_R2,Cos,RF_Xr		; I1=(Sin*I+Cos*R)<<14
  ADD	RF_Xr,RF_R1,Tp,ASR#15
  SUB	RF_Xi,RF_I2,RF_I1,ASR#15
  STMIA	RF_Adr1!,{RF_Xr,RF_Xi}		; write out the first result
  SUB	RF_Xr,RF_R1,Tp,ASR#15
  ADD	RF_Xi,RF_I2,RF_I1,ASR#15
  RSB	RF_Xi,RF_Xi,#0
  STMDB	RF_Adr2!,{RF_Xr,RF_Xi}		; write out the second result

  ADD	RF_Tab,RF_Tab,RF_TabS		; increment angle
  CMP	RF_Adr1,RF_Adr2
  BLT	RF_loop				; some more left to do!

; finished
  LDR	R1,[PDataAr,#MReturn]		; recall the return address
  MOV	R0,#0				; successful finish
  ADD	Tp,PDataAr,#MSavedRegs
  LDMIA	Tp,{R4-R13}			; recall the saved registers
  RETURN "","",R1,""	; return (no rlist, no stack, lr=r1, no condition)

 ]

;===========================================================================
; There follows a memory area used for temporary storage ...

  AREA FFTDAT,DATA,NOINIT

; Declare offsets into the data area - the first four cannot be moved.
; Their order is VERY carefully chosen.

		^ 0
MOutAddr        # 4      ; address of the output buffer (X)
MCalcsLoop      # 4	 ; inital value of CalcsLoop counter (N/n)
MStartElem      # 4      ; offset of current start element (k)
MTab            # 4      ; current angle (wk)
MN		# 4	 ; size of the FFT (N)

; you can add extra ones from here on
MDirection      # 4  	 ; saves the FFT direction
MLogN		# 4	 ; log (base 2) of FFT size
MSavedRegs      # (11*4) ; R4-R14 = 11 registers
 [ REALFFTS=1
MReturn		# 4	 ; return address for REALFFT
 ]
MDataArEnd	# 0	 ; record the size of the data area

DataArSt % MDataArEnd    ; reserve the space

 END
上一页 1 2 34
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -