fft_mac.h
来自「realview22.rar」· C头文件 代码 · 共 816 行 · 第 1/2 页
H
816 行
;
; $Copyright:
; ----------------------------------------------------------------
; This confidential and proprietary software may be used only as
; authorised by a licensing agreement from ARM Limited
; (C) COPYRIGHT 2000,2002 ARM Limited
; ALL RIGHTS RESERVED
; The entire notice above must be reproduced on all authorised
; copies and copies may only be made to the extent permitted
; by a licensing agreement from ARM Limited.
; ----------------------------------------------------------------
; File: fft_mac.h,v
; Revision: 1.14
; ----------------------------------------------------------------
; $
;
; Optimised ARM assembler multi-radix FFT
; Please read the readme.txt before this file
;
; Shared macros and interface definition file.
; NB: All the algorithms in this code are Decimation in Time. ARM
; is much better at Decimation in Time (as opposed to Decimation
; in Frequency) due to the position of the barrel shifter. Decimation
; in time has the twiddeling at the start of the butterfly, where as
; decimation in frequency has it at the end of the butterfly. The
; post multiply shifts can be hidden for Decimation in Time.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; FIRST STAGE INTERFACE
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; The FIRST STAGE macros "FS_RAD<R>" have the following interface:
;
; ON ENTRY:
; REGISTERS:
; r0 = inptr => points to the input buffer consisting of N complex
; numbers of size (1<<datainlog) bytes each
; r1 = dptr => points to the output buffer consisting of N complex
; numbers of size (1<<datalog) bytes each
; r2 = N => is the number of points in the transform
; r3 = pscale => shift to prescale input by (if applicable)
; ASSEMBLER VARIABLES:
; reversed => logical variable, true if input data is already bit reversed
; The data needs to be bit reversed otherwise
;
; ACTION:
; The routine should
; (1) Bit reverse the data as required for the whole FFT (unless
; the reversed flag is set)
; (2) Prescale the input data by
; (3) Perform a radix R first stage on the data
; (4) Place the processed data in the output array pointed to be dptr
;
; ON EXIT:
; r1 = dptr => preserved and pointing to the output data
; r2 = dinc => number of bytes per "block" or "Group" in this stage
; this is: R<<datalog
; r3 = count => number of radix-R blocks or groups processed in this stage
; this is: N/R
; r0,r4-r12,r14 corrupted
inptr RN 0 ; input buffer
dptr RN 1 ; output/scratch buffer
N RN 2 ; size of the FFT
dptr RN 1 ; data pointer - points to end (load in reverse order)
dinc RN 2 ; bytes between data elements at this level of FFT
count RN 3 ; (elements per block<<16) | (blocks per stage)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; GENERAL STAGE INTERFACE
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; The GENERAL STAGE macros "GS_RAD<R>" have the following interface.
;
; To describe the arguments, suppose this routine is called as stage j
; in a k-stage FFT with N=R1*R2*...*Rk. This stage is radix R=Rj.
;
; ON ENTRY:
; REGISTERS:
; r0 = cptr => Pointer to twiddle coefficients for this stage consisting
; of complex numbers of size (1<<coeflog) bytes each in some
; stage dependent format.
; The format currently used in described in full in the
; ReadMe file in the tables subdirectory.
; r1 = dptr => points to the working buffer consisting of N complex
; numbers of size (1<<datalog) bytes each
; r2 = dinc => number of bytes per "block" or "Group" in the last stage:
; dinc = (R1*R2*...*R(j-1))<<datalog
; r3 = count => number of blocks or Groups in the last stage:
; count = Rj*R(j+1)*...*Rk
; NB dinc*count = N<<datalog
;
; ACTION:
; The routine should
; (1) Twiddle the input data
; (2) Perform a radix R stage on the data
; (3) Perform the actions in place, result written to the dptr buffer
;
; ON EXIT:
; r0 = cptr => Updated to the end of the coefficients for the stage
; (the coefficients for the next stage will usually follow)
; r1 = dptr => preserved and pointing to the output data
; r2 = dinc => number of bytes per "block" or "Group" in this stage:
; dinc = (R1*R2*..*Rj)<<datalog = (input dinc)*R
; r3 = count => number of radix-R blocks or groups processed in this stage
; count = R(j+1)*...*Rk = (input count)/R
; r0,r4-r12,r14 corrupted
cptr RN 0 ; pointer to twiddle coefficients
dptr RN 1 ; pointer to FFT data working buffer
dinc RN 2 ; bytes per block/group at this stage
count RN 3 ; number of blocks/groups at this stage
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; LAST STAGE INTERFACE
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; The LAST STAGE macros "LS_RAD<R>" have the following interface.
;
; ON ENTRY:
; REGISTERS:
; r0 = cptr => Pointer to twiddle coefficients for this stage consisting
; of complex numbers of size (1<<coeflog) bytes each in some
; stage dependent format.
; The format currently used in described in full in the
; ReadMe file in the tables subdirectory.
; There is a possible stride between the coefficients
; specified by cinc
; r1 = dptr => points to the working buffer consisting of N complex
; numbers of size (1<<datalog) bytes each
; r2 = dinc => number of bytes per "block" or "Group" in the last stage:
; dinc = (N/R)<<datalog
; r3 = cinc => Bytes between twiddle values in the array pointed to by cptr
;
; ACTION:
; The routine should
; (1) Twiddle the input data
; (2) Perform a (last stage optimised) radix R stage on the data
; (3) Perform the actions in place, result written to the dptr buffer
;
; ON EXIT:
; r0 = cptr => Updated to point to real-to-complex conversion coefficients
; r1 = dptr => preserved and pointing to the output data
; r2 = dinc => number of bytes per "block" or "Group" in this stage:
; dinc = N<<datalog = (input dinc)*R
; r0,r4-r12,r14 corrupted
cptr RN 0 ; pointer to twiddle coefficients
dptr RN 1 ; pointer to FFT data working buffer
dinc RN 2 ; bytes per block/group at this stage
cinc RN 3 ; stride between twiddle coefficients in bytes
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; COMPLEX TO REAL CONVERSION INTERFACE
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; The COMPLEX TO REAL macros "LS_ZTOR" have the following interface.
;
; Suppose that 'w' is the N'th root of unity being used for the real FFT
; (usually exp(-2*pi*i/N) for forward transforms and exp(+2*pi*i/N) for
; the inverse transform).
;
; ON ENTRY:
; REGISTERS:
; r0 = cptr => Pointer to twiddle coefficients for this stage
; This consists of (1,w,w^2,w^3,...,w^(N/4-1)).
; There is a stride between each coeficient specified by cinc
; r1 = dptr => points to the working buffer consisting of N/2 complex
; numbers of size (1<<datalog) bytes each
; r2 = dinc => (N/2)<<datalog, the size of the complex buffer in bytes
; r3 = cinc => Bytes between twiddle value in array pointed to by cptr
; r4 = dout => Output buffer (usually the same as dptr)
;
; ACTION:
; The routine should take the output of an N/2 point complex FFT and convert
; it to the output of an N point real FFT, assuming that the real input
; inputs were packed up into the real,imag,real,imag,... buffers of the complex
; input. The output is N/2 complex numbers of the form:
; y[0]+i*y[N/2], y[1], y[2], ..., y[N/2-1]
; where y[0],...,y[N-1] is the output from a complex transform of the N
; real inputs.
;
; ON EXIT:
; r0-r12,r14 corrupted
cptr RN 0 ; pointer to twiddle coefficients
dptr RN 1 ; pointer to FFT data working buffer
dinc RN 2 ; (N/2)<<datalog, the size of the data in bytes
cinc RN 3 ; bytes between twiddle values in the coefficient buffer
dout RN 4 ; address to write the output (normally the same as dptr)
;;;;;;;;;;;;;;;;;;;;;; END OF INTERFACES ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; first stage/outer loop level
;inptr RN 0
;dptr RN 1
;N RN 2 ; size of FFT
;dinc RN 2 ; bytes between block size when bit reversed (scaling of N)
bitrev RN 3
; inner loop level
;cptr RN 0 ; coefficient pointer for this level
;dptr RN 1 ; data pointer - points to end (load in reverse order)
;dinc RN 2 ; bytes between data elements at this level of FFT
;count RN 3 ; (elements per block<<16) | (blocks per stage)
; data registers
x0r RN 4
x0i RN 5
x1r RN 6
x1i RN 7
x2r RN 8
x2i RN 9
x3r RN 10
x3i RN 11
t0 RN 12 ; these MUST be in correct order (t0<t1) for STM's
t1 RN 14
MACRO
SETREG $prefix,$v0,$v1
GBLS $prefix.r
GBLS $prefix.i
$prefix.r SETS "$v0"
$prefix.i SETS "$v1"
MEND
MACRO
SETREGS $prefix,$v0,$v1,$v2,$v3,$v4,$v5,$v6,$v7
SETREG $prefix.0,$v0,$v1
SETREG $prefix.1,$v2,$v3
SETREG $prefix.2,$v4,$v5
SETREG $prefix.3,$v6,$v7
MEND
MACRO
SET2REGS $prefix,$v0,$v1,$v2,$v3
SETREG $prefix.0,$v0,$v1
SETREG $prefix.1,$v2,$v3
MEND
; Macro to load twiddle coeficients
; Customise according to coeficient format
; Load next 3 complex coeficients into thr given registers
; Update the coeficient pointer
MACRO
LOADCOEFS $cp, $c0r, $c0i, $c1r, $c1i, $c2r, $c2i
IF "$coefformat"="W"
; one word per scalar
LDMIA $cp!, {$c0r, $c0i, $c1r, $c1i, $c2r, $c2i}
MEXIT
ENDIF
IF "$coefformat"="H"
; one half word per scalar
LDRSH $c0r, [$cp], #2
LDRSH $c0i, [$cp], #2
LDRSH $c1r, [$cp], #2
LDRSH $c1i, [$cp], #2
LDRSH $c2r, [$cp], #2
LDRSH $c2i, [$cp], #2
MEXIT
ENDIF
ERROR "Unsupported coeficient format: $coefformat"
MEND
; Macro to load one twiddle coeficient
; $cp = address to load complex data
; $ci = post index to make to address after load
MACRO
LOADCOEF $cp, $ci, $re, $im
IF "$coefformat"="W"
LDR $im, [$cp, #4]
LDR $re, [$cp], $ci
MEXIT
ENDIF
IF "$coefformat"="H"
LDRSH $im, [$cp, #2]
LDRSH $re, [$cp], $ci
MEXIT
ENDIF
ERROR "Unsupported coeficient format: $coefformat"
MEND
; Macro to load one component of one twiddle coeficient
; $cp = address to load complex data
; $ci = post index to make to address after load
MACRO
LOADCOEFR $cp, $re
IF "$coefformat"="W"
LDR $re, [$cp]
MEXIT
ENDIF
IF "$coefformat"="H"
LDRSH $re, [$cp]
MEXIT
ENDIF
ERROR "Unsupported coeficient format: $coefformat"
MEND
; Macro to load data elements in the given format
; $dp = address to load complex data
; $di = post index to make to address after load
MACRO
LOADDATAF $dp, $di, $re, $im, $format
IF "$format"="W"
LDR $im, [$dp, #4]
LDR $re, [$dp], $di
MEXIT
ENDIF
IF "$format"="H"
LDRSH $im, [$dp, #2]
LDRSH $re, [$dp], $di
MEXIT
ENDIF
ERROR "Unsupported load format: $format"
MEND
MACRO
LOADDATAZ $dp, $re, $im
IF "$datainformat"="W"
LDMIA $dp, {$re,$im}
MEXIT
ENDIF
IF "$datainformat"="H"
LDRSH $im, [$dp, #2]
LDRSH $re, [$dp]
MEXIT
ENDIF
ERROR "Unsupported load format: $format"
MEND
; Load a complex data element from the working array
MACRO
LOADDATA $dp, $di, $re, $im
LOADDATAF $dp, $di, $re, $im, $dataformat
MEND
; Load a complex data element from the input array
MACRO
LOADDATAI $dp, $di, $re, $im
LOADDATAF $dp, $di, $re, $im, $datainformat
MEND
MACRO
LOADDATA4 $dp, $re0,$im0, $re1,$im1, $re2,$im2, $re3,$im3
IF "$datainformat"="W"
LDMIA $dp!, {$re0,$im0, $re1,$im1, $re2,$im2, $re3,$im3}
ELSE
LOADDATAI $dp, #1<<$datalog, $re0,$im0
LOADDATAI $dp, #1<<$datalog, $re1,$im1
LOADDATAI $dp, #1<<$datalog, $re2,$im2
LOADDATAI $dp, #1<<$datalog, $re3,$im3
ENDIF
MEND
; Shift data after load
MACRO
SHIFTDATA $dr, $di
IF "$postldshift"<>""
IF "$di"<>""
MOV $di, $di $postldshift
ENDIF
MOV $dr, $dr $postldshift
ENDIF
MEND
; Store a complex data item in the output data buffer
MACRO
STORE $dp, $di, $re, $im
IF "$dataformat"="W"
STR $im, [$dp, #4]
STR $re, [$dp], $di
MEXIT
ENDIF
IF "$dataformat"="H"
STRH $im, [$dp, #2]
STRH $re, [$dp], $di
MEXIT
ENDIF
ERROR "Unsupported save format: $dataformat"
MEND
; Store a complex data item in the output data buffer
MACRO
STOREP $dp, $re, $im
IF "$dataformat"="W"
STMIA $dp!, {$re,$im}
MEXIT
ENDIF
IF "$dataformat"="H"
STRH $im, [$dp, #2]
STRH $re, [$dp], #4
MEXIT
ENDIF
ERROR "Unsupported save format: $dataformat"
MEND
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?