📄 ffts.s
字号:
;/*
; * Fast Fourier Transform assembler
; * Copyright (C) ARM Limited 1992,1995,1996,1998-1999. All rights reserved.
; */
;==========================================================================
; ARM Fast Fourier Transform program
;==========================================================================
;
; Original Routines : 26/2/92
; Optimised and Tidied : 13/9/95
; Further optimisation : 12/10/95
; Addition of REALFFT : 1/11/95
; Bug fix in Real FFT : 9/2/96
;
; Version: 1.31 Dated: Jan 98
;
; Define some options which decide which bits of this source code
INCLUDE intworkm.h
OPTIMISE EQU 1 ; 0 = Optimise for size. Use a smaller slower
; algorithm.
;
; 1 = Optimise for speed which:
; (a) Optimises the first three stages.
; (b) Takes advantage of the special cased
; multiplies (ie x0, x1, x2root2 etc)
; (c) Has a cleverer (but larger) inner
; loop.
; The direction of the FFT and the addresses of the input and output
; buffers are passed as arguments. The FFT is said to be "inplace" if
; the input buffer is the same as the output buffer. By default all
; four options (inplace+forward FFT, inplace+inverse FFT,
; outofplace+forward FFT and outofplace+inverse FFT) are supported. However,
; to reduce code side support can be switched off via the flags below.
INPLACE EQU 1 ; 1 = support in place FFT's (0 to not support)
OUTPLACE EQU 1 ; 1 = support out of place FFT's
FORWARD EQU 1 ; 1 = support forward FFT's
INVERSE EQU 1 ; 1 = support inverse FFT's
REALFFTS EQU 1 ; 1 = support real FFT's
;==========================================================================
; Note that the data is scaled by 1/N for both the forward and inverse
; transforms; this is to prevent overflow during the forward transform
;==========================================================================
; Export the label "FFT" giving the start of the FFT algorithm.
; This is required by other assembler or C programs calling the FFT program.
EXPORT FFT ; entry point of the FFT program
; FFT takes 4 arguments:
; input = address of the input data (consisting of 2*N ints)
; output = address for the output data (consisting of 2*N ints)
; (input and output may be equal provided inplace FFT's
; are supported. Otherwise they must not overlap.)
; logN = log base two of N, the number of points in FFT
; (8 for a 256 point FFT, 10 for a 1024 point FFT etc)
; direction = 0 for an inverse FFT, 1 for a forward FFT
;
; In C FFT should be declared as:
; int FFT(complex *input, complex *output, int logN, int direction);
;
; It returns 0 if the FFT was successful or 1 if not (eg table not big
; enough).
;
; If you wish to take FFT's of real data (all the imaginary parts zero)
; then the next function will be twice as fast.
[ REALFFTS=1 ; check real FFT's supporrted
EXPORT REALFFT
]
; REALFFT takes 3 arguments similar to FFT except this time:
; input = address of the real input data consisting of 2*N
; real values, each in an int.
; output = address for the output data which is now N complex
; numbers giving the first half of the 2*N size FFT.
; The second half of the FFT can be calculated using
; the symmetry relations Re X(N-n) = Re X(n) and
; Im X(N-n) = - Im X(n). Again the input buffer may
; equal the output buffer.
; logN = log base two of N, which is half the number of real
; data points inputted.
;
; The input and output buffers are both 2*N ints = 8*N bytes as before.
; Only the forward direction is supported at present.
;==========================================================================
; Start of the code area
AREA FFTCODE, CODE, READONLY $interwork
;===========================================================================
; The trigonometry table goes here...
; (1) The trig table consists of an included file generated by a C program,
; called "trig.c"
; (2) The trig table should be re-created if the number of elements in the
; FFT is changed. In any event the table size TABLE_N must be at least
; as large as N for the FFT to work.
; (3) Since the table is compiled along with this ARM code, then the
; compiler will ensure that the endianess of the table is correct.
; (4) The table consists of word entries of the form (sin(t)<<16 + cos(t))
; for t in the range 0 <= t <= pi/4. The t step size is 2*pi/TABLE_N.
; All values are thus positive and stored as fixed point numbers
; with the binary point after bit 14.
TRIG_TABLE
INCLUDE ffttabls.h
; Trig table must be included here as it defines TABLE_N and TABLE_LOGN
; giving the accuracy of the table.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Global register bindings
PDataAr RN 14 ; set up to point to the start of the data area
Tp RN 11 ; temporary address pointer or intermediate value
MPDataAr DCD DataArSt ; the linker puts the address of the start
; of the data area in here
;==========================================================================
FFT
; The actual ARM code starts here.
; On Entry: R0 = address of the input data (N complex numbers)
; R1 = address for the output data (N complex numbers)
; R2 = logN
; R3 = direction 0=inverse fft 1=forward fft
;
; On Exit: R0-R3,R14,flags corrupted. R4-R13 preserved.
; R0=0 if successful.
; The code uses no byte operations, and so is neither big nor little endian
; (provided that the trig table is generated to the same endianness).
; The code uses no features that make it specific to either 26-bit or 32-bit
; processor modes.
CMP R2,#TABLE_LOGN
MOVGT R0,#1 ; return if table not big enough
RETURN "","","",GT ; return (no rlist, no stack, use lr, greater than)
ADD R2,R2,R3,LSL#16 ; Temporarily combine the registers
LDR R3, MPDataAr ; Address of our data area
ADD R3, R3, #MSavedRegs
STMIA R3, {R4-R14} ; Save registers for APCS / Callee
SUB PDataAr, R3, #MSavedRegs ; Set R14 to our data area
MOV R3,R2,LSR#16 ; recover the direction flag
BIC R4,R2,R2,LSL#16 ; recover the logN
MOV R2,#1
MOV R2,R2,LSL R4 ; calculate N
STR R1,[PDataAr,#MOutAddr] ; save the output buffer address
STR R2,[PDataAr,#MN] ; save N for later use
STR R3,[PDataAr,#MDirection] ; save the direction
STR R4,[PDataAr,#MLogN] ; save LogN for later use
;---------------------------------------------------------------------------
; This first part of this program re-orders the data in bit-reversed order
; and conjugates the data if the inverse transform is required.
BR_inadr RN 0 ; address of current input element
BR_outst RN 1 ; address of first output element
BR_N RN 2 ; number of elements in the transform
BR_direction RN 3 ; direction 0=inverse 1=forward FFT
BR_outadr RN 4 ; address of current output element
BR_incnt RN 5 ; count of input elements in normal order
BR_outcnt RN 6 ; count of output elements in bit reversed order*8
BR_bit RN 7 ; bit counter used in bit-reversed counting
BR_d1r RN 7 ; holds the data as it is transfered, real
BR_d1i RN 8 ; ... and imaginary
BR_d2r RN 9 ; ditto, other value, real.
BR_d2i RN 10 ; ... and imaginary
; BR_inadr, BR_outadr, BR_N, BR_direction are set up at this point
MOV BR_incnt,#0 ; Count over input data in normal order
MOV BR_outcnt,#0 ; Count over the output data in bit-
; reversed order
[ (INPLACE=1):LAND:(OUTPLACE=1) ; both supported so decide ....
TEQ BR_inadr,BR_outst ; are we in-place
BEQ BR_inplace
]
; Out of place bit reversing
[ OUTPLACE=1
[ (FORWARD=1):LAND:(INVERSE=1) ; both supported so decide ...
TST BR_direction,#1
BEQ BR_outplace_inverse ; inverse FFT
]
[ FORWARD=1
BR_outplace_forward ; outplace, forward FFT
ADD BR_outadr,BR_outst,BR_outcnt,LSL #3 ; destination address
LDMIA BR_inadr!,{BR_d1r,BR_d1i} ; Load the next element
STMIA BR_outadr,{BR_d1r,BR_d1i} ; Store the swapped elements.
MOV BR_bit,BR_N,LSR#1 ; Increment the output counter in
BR_outplace_forward_count ; bit-reversed order in this loop
TST BR_outcnt,BR_bit
EOR BR_outcnt,BR_outcnt,BR_bit ; Toggle the bit.
BEQ BR_outplace_forward ; If the bit was clear then exit.
MOVS BR_bit,BR_bit,LSR #1 ; else, go to the next bit
BNE BR_outplace_forward_count ; after last increment, BR_bit becomes
B FFTSTART ; zero, and we start main FFT
]
[ INVERSE=1
BR_outplace_inverse ; outplace, inverse FFT
ADD BR_outadr,BR_outst,BR_outcnt,LSL #3 ; destination address
LDMIA BR_inadr!,{BR_d1r,BR_d1i} ; Load the elements to swap.
RSB BR_d1i,BR_d1i,#0 ; Take their complex conjugates.
STMIA BR_outadr,{BR_d1r,BR_d1i} ; Store the swapped elements.
MOV BR_bit,BR_N,LSR#1 ; Increment the output counter in
BR_outplace_inverse_count ; bit-reversed order in this loop
TST BR_outcnt,BR_bit
EOR BR_outcnt,BR_outcnt,BR_bit ; Toggle the bit.
BEQ BR_outplace_inverse ; If the bit was clear then exit.
MOVS BR_bit,BR_bit,LSR #1 ; else, go to the next bit
BNE BR_outplace_inverse_count ; after last increment, BR_bit becomes
B FFTSTART ; zero, and we start main FFT
]
]
; In place bit reversing
[ INPLACE=1
BR_inplace
[ (FORWARD=1):LAND:(INVERSE=1) ; both supported so decide ...
TST BR_direction,#1
BEQ BR_inplace_inverse ; inverse FFT
]
[ FORWARD=1
BR_inplace_forward ; inplace, forward FFT
CMP BR_incnt,BR_outcnt ; Do we need to move this element ?
BGE BR_inplace_forward_no ; Already been done
ADD BR_outadr,BR_outst,BR_outcnt,LSL #3 ; destination address
LDMIA BR_inadr ,{BR_d1r,BR_d1i} ; Load the elements to swap.
LDMIA BR_outadr,{BR_d2r,BR_d2i}
STMIA BR_outadr,{BR_d1r,BR_d1i} ; Store the swapped elements.
STMIA BR_inadr ,{BR_d2r,BR_d2i}
BR_inplace_forward_no
ADD BR_inadr,BR_inadr,#8 ; Next input element
ADD BR_incnt,BR_incnt,#1 ; Increment the input counter in the
MOV BR_bit,BR_N,LSR#1 ; Increment the output counter in
BR_inplace_forward_count ; bit-reversed order in this loop
TST BR_outcnt,BR_bit
EOR BR_outcnt,BR_outcnt,BR_bit ; Toggle the bit.
BEQ BR_inplace_forward ; If the bit was clear then exit.
MOVS BR_bit,BR_bit,LSR #1 ; else, go to the next bit
BNE BR_inplace_forward_count ; after last increment, BR_bit becomes
B FFTSTART ; zero, and we start main FFT
]
[ INVERSE=1
BR_inplace_inverse ; inplace, inverse FFT
CMP BR_incnt,BR_outcnt ; Do we need to move this element ?
BGT BR_inplace_inverse_no ; Already been done
ADD BR_outadr,BR_outst,BR_outcnt,LSL #3 ; destination address
LDMIA BR_inadr ,{BR_d1r,BR_d1i} ; Load the elements to swap.
LDMIA BR_outadr,{BR_d2r,BR_d2i}
RSB BR_d1i,BR_d1i,#0 ; Take their complex conjugates.
RSB BR_d2i,BR_d2i,#0
STMIA BR_outadr,{BR_d1r,BR_d1i} ; Store the swapped elements.
STMIA BR_inadr ,{BR_d2r,BR_d2i}
BR_inplace_inverse_no
ADD BR_inadr,BR_inadr,#8 ; Next input element
ADD BR_incnt,BR_incnt,#1 ; Increment the input counter in the
MOV BR_bit,BR_N,LSR#1 ; Increment the output counter in
BR_inplace_inverse_count ; bit-reversed order in this loop
TST BR_outcnt,BR_bit
EOR BR_outcnt,BR_outcnt,BR_bit ; Toggle the bit.
BEQ BR_inplace_inverse ; If the bit was clear then exit.
MOVS BR_bit,BR_bit,LSR #1 ; else, go to the next bit
BNE BR_inplace_inverse_count ; after last increment, BR_bit becomes
B FFTSTART ; zero, and we start main FFT
]
]
FFTSTART
; The main FFT algorithm starts here...
;
; There are two different algorithms following. One is small but unoptimised
; and the second large but optimised. The OPTIMISE flag decides which one
; is assembled.
;
; The whole FFT routine does a divide by two of the results after each
; stage, this prevents overflow of the registers during a multiply. For
; the three optimised stages this right shift by one for each stage is
; not done for the first two stages, but the third stage does a right
; shift by 3 to make up for it. This may increase the likelyhood of an
; overflow.
;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -