📄 ffts.s

📁 ARM库里的FFT算法
💻 S
📖 第 1 页 / 共 4 页
字号:
12 3 4 下一页
;/*
; * Fast Fourier Transform assembler
; * Copyright (C) ARM Limited 1992,1995,1996,1998-1999. All rights reserved.
; */

;==========================================================================
;  ARM Fast Fourier Transform program
;==========================================================================
;
;  Original Routines    : 26/2/92
;  Optimised and Tidied : 13/9/95
;  Further optimisation : 12/10/95
;  Addition of REALFFT	: 1/11/95
;  Bug fix in Real FFT	: 9/2/96
;
;  Version: 1.31  Dated: Jan 98
;
; Define some options which decide which bits of this source code

	INCLUDE intworkm.h

OPTIMISE EQU 1           ; 0 = Optimise for size. Use a smaller slower
	     		 		 ;     algorithm.
                         ;
                         ; 1 = Optimise for speed which:
                         ;       (a) Optimises the first three stages.
                         ;       (b) Takes advantage of the special cased
                         ;           multiplies (ie x0, x1, x2root2 etc)
                         ;       (c) Has a cleverer (but larger) inner
                         ;	     loop.

; The direction of the FFT and the addresses of the input and output
; buffers are passed as arguments. The FFT is said to be "inplace" if
; the input buffer is the same as the output buffer. By default all
; four options (inplace+forward FFT, inplace+inverse FFT,
; outofplace+forward FFT and outofplace+inverse FFT) are supported. However,
; to reduce code side support can be switched off via the flags below.

INPLACE   EQU 1      ; 1 = support in place FFT's (0 to not support)
OUTPLACE  EQU 1      ; 1 = support out of place FFT's
FORWARD	  EQU 1	     ; 1 = support forward FFT's
INVERSE	  EQU 1	     ; 1 = support inverse FFT's
REALFFTS  EQU 1      ; 1 = support real FFT's

;==========================================================================
; Note that the data is scaled by 1/N for both the forward and inverse
; transforms; this is to prevent overflow during the forward transform

;==========================================================================
; Export the label "FFT" giving the start of the FFT algorithm.
; This is required by other assembler or C programs calling the FFT program.

 EXPORT FFT              ; entry point of the FFT program

; FFT takes 4 arguments:
;  input	= address of the input data (consisting of 2*N ints)
;  output       = address for the output data (consisting of 2*N ints)
;  		  (input and output may be equal provided inplace FFT's
;		   are supported. Otherwise they must not overlap.)
;  logN		= log base two of N, the number of points in FFT
;		  (8 for a 256 point FFT, 10 for a 1024 point FFT etc)
;  direction	= 0 for an inverse FFT, 1 for a forward FFT
;
; In C FFT should be declared as:
;  int FFT(complex *input, complex *output, int logN, int direction);
;
; It returns 0 if the FFT was successful or 1 if not (eg table not big
; enough).
;
; If you wish to take FFT's of real data (all the imaginary parts zero)
; then the next function will be twice as fast.

 [ REALFFTS=1	; check real FFT's supporrted
   EXPORT REALFFT
 ]

; REALFFT takes 3 arguments similar to FFT except this time:
;  input	= address of the real input data consisting of 2*N
;		  real values, each in an int.
;  output	= address for the output data which is now N complex
;		  numbers giving the first half of the 2*N size FFT.
;		  The second half of the FFT can be calculated using
;		  the symmetry relations Re X(N-n) = Re X(n) and
;		  Im X(N-n) = - Im X(n). Again the input buffer may
;		  equal the output buffer.
;  logN		= log base two of N, which is half the number of real
;		  data points inputted.
;
; The input and output buffers are both 2*N ints = 8*N bytes as before.
; Only the forward direction is supported at present.

;==========================================================================
; Start of the code area

  AREA FFTCODE, CODE, READONLY $interwork

;===========================================================================
; The trigonometry table goes here...
;   (1) The trig table consists of an included file generated by a C program,
;       called "trig.c"
;   (2) The trig table should be re-created if the number of elements in the
;       FFT is changed. In any event the table size TABLE_N must be at least
;	as large as N for the FFT to work.
;   (3) Since the table is compiled along with this ARM code, then the
;       compiler will ensure that the endianess of the table is correct.
;   (4) The table consists of word entries of the form (sin(t)<<16 + cos(t))
;	for t in the range 0 <= t <= pi/4. The t step size is 2*pi/TABLE_N.
;	All values are thus positive and stored as fixed point numbers
;	with the binary point after bit 14.

TRIG_TABLE
  INCLUDE ffttabls.h

; Trig table must be included here as it defines TABLE_N and TABLE_LOGN
; giving the accuracy of the table.

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Global register bindings

PDataAr     RN 14        ; set up to point to the start of the data area
Tp          RN 11        ; temporary address pointer or intermediate value

MPDataAr    DCD DataArSt ; the linker puts the address of the start
                         ; of the data area in here

;==========================================================================

FFT

; The actual ARM code starts here.

; On Entry:	R0 = address of the input data (N complex numbers)
;		R1 = address for the output data (N complex numbers)
;		R2 = logN
;		R3 = direction 0=inverse fft 1=forward fft
;
; On Exit:	R0-R3,R14,flags corrupted. R4-R13 preserved.
;		R0=0 if successful.

; The code uses no byte operations, and so is neither big nor little endian
; (provided that the trig table is generated to the same endianness).
; The code uses no features that make it specific to either 26-bit or 32-bit
; processor modes.

  CMP	 R2,#TABLE_LOGN
  MOVGT	 R0,#1			     ; return if table not big enough
  RETURN "","","",GT	; return (no rlist, no stack, use lr, greater than)
  ADD	 R2,R2,R3,LSL#16	     ; Temporarily combine the registers
  LDR    R3, MPDataAr                ; Address of our data area
  ADD    R3, R3, #MSavedRegs
  STMIA  R3, {R4-R14}                ; Save registers for APCS / Callee
  SUB    PDataAr, R3, #MSavedRegs    ; Set R14 to our data area
  MOV	 R3,R2,LSR#16		     ; recover the direction flag
  BIC	 R4,R2,R2,LSL#16	     ; recover the logN
  MOV	 R2,#1
  MOV	 R2,R2,LSL R4		     ; calculate N

  STR    R1,[PDataAr,#MOutAddr]	     ; save the output buffer address
  STR    R2,[PDataAr,#MN]	     ; save N for later use
  STR    R3,[PDataAr,#MDirection]    ; save the direction
  STR	 R4,[PDataAr,#MLogN]	     ; save LogN for later use

;---------------------------------------------------------------------------
; This first part of this program re-orders the data in bit-reversed order
; and conjugates the data if the inverse transform is required.

BR_inadr        RN 0      ; address of current input element
BR_outst        RN 1      ; address of first output element
BR_N		RN 2	  ; number of elements in the transform
BR_direction	RN 3      ; direction 0=inverse 1=forward FFT
BR_outadr       RN 4      ; address of current output element
BR_incnt        RN 5      ; count of input elements in normal order
BR_outcnt       RN 6      ; count of output elements in bit reversed order*8
BR_bit          RN 7      ; bit counter used in bit-reversed counting
BR_d1r          RN 7      ; holds the data as it is transfered, real
BR_d1i          RN 8      ; ... and imaginary
BR_d2r          RN 9      ; ditto, other value, real.
BR_d2i          RN 10     ; ... and imaginary

; BR_inadr, BR_outadr, BR_N, BR_direction are set up at this point

     MOV BR_incnt,#0                  ; Count over input data in normal order
     MOV BR_outcnt,#0                 ; Count over the output data in bit-
                                      ; reversed order

 [ (INPLACE=1):LAND:(OUTPLACE=1)      ; both supported so decide ....
     TEQ BR_inadr,BR_outst	      ; are we in-place
     BEQ BR_inplace
 ]

; Out of place bit reversing

 [ OUTPLACE=1
   [ (FORWARD=1):LAND:(INVERSE=1)     ; both supported so decide ...
     TST BR_direction,#1
     BEQ BR_outplace_inverse          ; inverse FFT
   ]

   [ FORWARD=1
BR_outplace_forward                   ; outplace, forward FFT
     ADD    BR_outadr,BR_outst,BR_outcnt,LSL #3  ; destination address
     LDMIA  BR_inadr!,{BR_d1r,BR_d1i} ; Load the next element
     STMIA  BR_outadr,{BR_d1r,BR_d1i} ; Store the swapped elements.
     MOV    BR_bit,BR_N,LSR#1         ; Increment the output counter in
BR_outplace_forward_count             ; bit-reversed order in this loop
     TST    BR_outcnt,BR_bit
     EOR    BR_outcnt,BR_outcnt,BR_bit ; Toggle the bit.
     BEQ    BR_outplace_forward       ; If the bit was clear then exit.
     MOVS   BR_bit,BR_bit,LSR #1      ; else, go to the next bit
     BNE    BR_outplace_forward_count ; after last increment, BR_bit becomes
     B FFTSTART                       ; zero, and we start main FFT
   ]

   [ INVERSE=1
BR_outplace_inverse                   ; outplace, inverse FFT
     ADD    BR_outadr,BR_outst,BR_outcnt,LSL #3  ; destination address
     LDMIA  BR_inadr!,{BR_d1r,BR_d1i} ; Load the elements to swap.
     RSB    BR_d1i,BR_d1i,#0          ; Take their complex conjugates.
     STMIA  BR_outadr,{BR_d1r,BR_d1i} ; Store the swapped elements.
     MOV    BR_bit,BR_N,LSR#1         ; Increment the output counter in
BR_outplace_inverse_count             ; bit-reversed order in this loop
     TST    BR_outcnt,BR_bit
     EOR    BR_outcnt,BR_outcnt,BR_bit ; Toggle the bit.
     BEQ    BR_outplace_inverse       ; If the bit was clear then exit.
     MOVS   BR_bit,BR_bit,LSR #1      ; else, go to the next bit
     BNE    BR_outplace_inverse_count ; after last increment, BR_bit becomes
     B FFTSTART                       ; zero, and we start main FFT
   ]
 ]

; In place bit reversing

 [ INPLACE=1
BR_inplace
   [ (FORWARD=1):LAND:(INVERSE=1)     ; both supported so decide ...
     TST BR_direction,#1
     BEQ BR_inplace_inverse           ; inverse FFT
   ]

   [ FORWARD=1
BR_inplace_forward                    ; inplace, forward FFT
     CMP    BR_incnt,BR_outcnt        ; Do we need to move this element ?
     BGE    BR_inplace_forward_no     ; Already been done
     ADD    BR_outadr,BR_outst,BR_outcnt,LSL #3  ; destination address
     LDMIA  BR_inadr ,{BR_d1r,BR_d1i} ; Load the elements to swap.
     LDMIA  BR_outadr,{BR_d2r,BR_d2i}
     STMIA  BR_outadr,{BR_d1r,BR_d1i} ; Store the swapped elements.
     STMIA  BR_inadr ,{BR_d2r,BR_d2i}
BR_inplace_forward_no
     ADD    BR_inadr,BR_inadr,#8      ; Next input element
     ADD    BR_incnt,BR_incnt,#1      ; Increment the input counter in the
     MOV    BR_bit,BR_N,LSR#1         ; Increment the output counter in
BR_inplace_forward_count              ; bit-reversed order in this loop
     TST    BR_outcnt,BR_bit
     EOR    BR_outcnt,BR_outcnt,BR_bit ; Toggle the bit.
     BEQ    BR_inplace_forward        ; If the bit was clear then exit.
     MOVS   BR_bit,BR_bit,LSR #1      ; else, go to the next bit
     BNE    BR_inplace_forward_count  ; after last increment, BR_bit becomes
     B FFTSTART                       ; zero, and we start main FFT
   ]

   [ INVERSE=1
BR_inplace_inverse                    ; inplace, inverse FFT
     CMP    BR_incnt,BR_outcnt        ; Do we need to move this element ?
     BGT    BR_inplace_inverse_no     ; Already been done
     ADD    BR_outadr,BR_outst,BR_outcnt,LSL #3  ; destination address
     LDMIA  BR_inadr ,{BR_d1r,BR_d1i} ; Load the elements to swap.
     LDMIA  BR_outadr,{BR_d2r,BR_d2i}
     RSB    BR_d1i,BR_d1i,#0          ; Take their complex conjugates.
     RSB    BR_d2i,BR_d2i,#0
     STMIA  BR_outadr,{BR_d1r,BR_d1i} ; Store the swapped elements.
     STMIA  BR_inadr ,{BR_d2r,BR_d2i}
BR_inplace_inverse_no
     ADD    BR_inadr,BR_inadr,#8      ; Next input element
     ADD    BR_incnt,BR_incnt,#1      ; Increment the input counter in the
     MOV    BR_bit,BR_N,LSR#1         ; Increment the output counter in
BR_inplace_inverse_count              ; bit-reversed order in this loop
     TST    BR_outcnt,BR_bit
     EOR    BR_outcnt,BR_outcnt,BR_bit ; Toggle the bit.
     BEQ    BR_inplace_inverse        ; If the bit was clear then exit.
     MOVS   BR_bit,BR_bit,LSR #1      ; else, go to the next bit
     BNE    BR_inplace_inverse_count  ; after last increment, BR_bit becomes
     B FFTSTART                       ; zero, and we start main FFT
   ]
 ]

FFTSTART
 ; The main FFT algorithm starts here...
 ;
 ; There are two different algorithms following. One is small but unoptimised
 ; and the second large but optimised. The OPTIMISE flag decides which one
 ; is assembled.
 ;
 ; The whole FFT routine does a divide by two of the results after each
 ; stage, this prevents overflow of the registers during a multiply. For
 ; the three optimised stages this right shift by one for each stage is
 ; not done for the first two stages, but the third stage does a right
 ; shift by 3 to make up for it. This may increase the likelyhood of an
 ; overflow.
 ;
12 3 4 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -