fft_mac.h
来自「realview22.rar」· C头文件 代码 · 共 816 行 · 第 1/2 页
H
816 行
MACRO
STORE3P $dp, $re0, $im0, $re1, $im1, $re2, $im2
IF "$dataformat"="W"
STMIA $dp!, {$re0,$im0, $re1,$im1, $re2,$im2}
MEXIT
ENDIF
IF "$dataformat"="H"
STRH $im0, [$dp, #2]
STRH $re0, [$dp], #4
STRH $im1, [$dp, #2]
STRH $re1, [$dp], #4
STRH $im2, [$dp, #2]
STRH $re2, [$dp], #4
MEXIT
ENDIF
ERROR "Unsupported save format: $dataformat"
MEND
; do different command depending on forward/inverse FFT
MACRO
DOi $for, $bac, $d, $s1, $s2, $shift
IF "$direction"="F"
IF "$shift"=""
$for $d, $s1, $s2
ELSE
$for $d, $s1, $s2, $shift
ENDIF
ELSE
IF "$shift"=""
$bac $d, $s1, $s2
ELSE
$bac $d, $s1, $s2, $shift
ENDIF
ENDIF
MEND
; d = s1 + s2 if w=exp(+2*pi*i/N) j=+i - inverse transform
; d = s1 - s2 if w=exp(-2*pi*i/N) j=-i - forward transform
MACRO
ADDi $d, $s1, $s2, $shift
DOi SUB, ADD, $d, $s1, $s2, $shift
MEND
; d = s1 - s2 if w=exp(+2*pi*i/N) j=+i - inverse transform
; d = s1 + s2 if w=exp(-2*pi*i/N) j=-i - forward transform
MACRO
SUBi $d, $s1, $s2, $shift
DOi ADD, SUB, $d, $s1, $s2, $shift
MEND
; check that $val is in the range -$max to +$max-1
; set carry flag (sicky) if not (2 cycles)
; has the advantage of not needing a separate register
; to store the overflow state
MACRO
CHECKOV $val, $tmp, $max
EOR $tmp, $val, $val, ASR#31
CMPCC $tmp, $max
MEND
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; Macro's to perform the twiddle stage (complex multiply by coefficient)
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; The coefficients are stored in different formats according to the
; precision and processor architecture. The coefficients required
; will be of the form:
;
; c(k) = cos( + k*2*pi*i/N ), s(k) = sin( + k*2*pi*i/N )
;
; c(k) + i*s(k) = exp(+2*pi*k*i/N)
;
; for some k's. The storage formats are:
;
; Format Data
; Q14S (c-s, s) in Q14 format, 16-bits per real
; Q14R (c, s) in Q14 format, 16-bits per real
; Q30S (c-s, s) in Q30 format, 32-bits per real
;
; The operation to be performed is one of:
;
; a+i*b = (x+i*y)*(c-i*s) => forward transform
; OR a+i*b = (x+i*y)*(c+i*s) => inverse transform
;
; For the R format the operation is quite simple - requiring 4 muls
; and 2 adds:
;
; Forward: a = x*c+y*s, b = y*c-x*s
; Inverse: a = x*c-y*s, b = y*c+x*s
;
; For the S format the operations is more complex but only requires
; three multiplies, and is simpler to schedule:
;
; Forward: a = (y-x)*s + x*(c+s) = x*(c-s) + (x+y)*s
; b = (y-x)*s + y*(c-s) = y*(c+s) - (x+y)*s
;
; Inverse: a = (x-y)*s + x*(c-s)
; b = (x-y)*s + y*(c+s)
;
; S advantage 16bit: 1ADD, 1SUB, 1MUL, 2MLA instead of 1SUB, 3MUL, 1MLA
; S advantage 32bit: 2ADD, 1SUB, 2SMULL, 1SMLAL instead of 1RSB, 2SMULL, 2SMLAL
; So S wins except for a very fast multiplier (eg 9E)
;
; NB The coefficients must always be the second operand on processor that
; take a variable number of cycles per multiply - so the FFT time remains constant
; This twiddle takes unpacked real and imaginary values
; Expects (cr,ci) = (c-s,s) on input
; Sets (cr,ci) = (a,b) on output
MACRO
TWIDDLE $xr, $xi, $cr, $ci, $t0, $t1
IF qshift>=0 :LAND: qshift<32
IF "$direction"="F"
SUB $t1, $xi, $xr ; y-x
MUL $t0, $t1, $ci ; (y-x)*s
ADD $t1, $cr, $ci, LSL#1 ; t1 = c+s allow mul to finish on SA
MLA $ci, $xi, $cr, $t0 ; b
MLA $cr, $xr, $t1, $t0 ; a
ELSE
SUB $t1, $xr, $xi ; x-y
MUL $t0, $t1, $ci ; (x-y)*s
ADD $ci, $cr, $ci, LSL#1 ; ci = c+s allow mul to finish on SA
MLA $cr, $xr, $cr, $t0 ; a
MLA $ci, $xi, $ci, $t0 ; b
ENDIF
ELSE
IF "$direction"="F"
ADD $t1, $cr, $ci, LSL#1 ; t1 = c+s
SMULL $cr, $t0, $xi, $cr ; t0 = y*(c-s)
SUB $xi, $xi, $xr ; xr = y-x + allow mul to finish on SA
SMULL $ci, $cr, $xi, $ci ; cr = (y-x)*s
ADD $ci, $cr, $t0 ; b + allow mul to finish on SA
SMLAL $t0, $cr, $xr, $t1 ; a
ELSE
ADD $t1, $cr, $ci, LSL#1 ; c+s
SMULL $t0, $cr, $xr, $cr ; x*(c-s)
SUB $xr, $xr, $xi ; x-y + allow mul to finish on SA
SMULL $t0, $ci, $xr, $ci ; (x-y)*s
ADD $cr, $cr, $ci ; a + allow mul to finish on SA
SMLAL $t0, $ci, $xi, $t1 ; b
ENDIF
ENDIF
MEND
; The following twiddle variant is similar to the above
; except that it is for an "E" processor varient. A standard
; 4 multiply twiddle is used as it requires the same number
; of cycles and needs less intermediate precision
;
; $co = coeficent real and imaginary (c,s) (packed)
; $xx = input data real and imaginary part (packed)
;
; $xr = destination register for real part of product
; $xi = destination register for imaginary part of product
;
; All registers should be distinct
;
MACRO
TWIDDLE_E $xr, $xi, $c0, $t0, $xx, $xxi
IF "$direction"="F"
SMULBT $t0, $xx, $c0
SMULBB $xr, $xx, $c0
IF "$xxi"=""
SMULTB $xi, $xx, $c0
SMLATT $xr, $xx, $c0, $xr
ELSE
SMULBB $xi, $xxi, $c0
SMLABT $xr, $xxi, $c0, $xr
ENDIF
SUB $xi, $xi, $t0
ELSE
SMULBB $t0, $xx, $c0
SMULBT $xi, $xx, $c0
IF "$xxi"=""
SMULTT $xr, $xx, $c0
SMLATB $xi, $xx, $c0, $xi
ELSE
SMULBT $xr, $xxi, $c0
SMLABB $xi, $xxi, $c0, $xi
ENDIF
SUB $xr, $t0, $xr
ENDIF
MEND
; Scale data value in by the coefficient, writing result to out
; The coeficient must be the second multiplicand
; The post mul shift need not be done so in most cases this
; is just a multiply (unless you need higher precision)
; coef must be preserved
MACRO
SCALE $out, $in, $coef, $tmp
IF qshift>=0 :LAND: qshift<32
MUL $out, $in, $coef
ELSE
SMULL $tmp, $out, $in, $coef
ENDIF
MEND
MACRO
DECODEFORMAT $out, $format
GBLS $out.log
GBLS $out.format
$out.format SETS "$format"
IF "$format"="B"
$out.log SETS "1"
MEXIT
ENDIF
IF "$format"="H"
$out.log SETS "2"
MEXIT
ENDIF
IF "$format"="W"
$out.log SETS "3"
MEXIT
ENDIF
ERROR "Unrecognised format for $out: $format"
MEND
; generate a string in $var of the correct right shift
; amount - negative values = left shift
MACRO
SETSHIFT $var, $value
LCLA svalue
svalue SETA $value
$var SETS ""
IF svalue>0 :LAND: svalue<32
$var SETS ",ASR#0x$svalue"
ENDIF
svalue SETA -svalue
IF svalue>0 :LAND: svalue<32
$var SETS ",LSL#0x$svalue"
ENDIF
MEND
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; ;
; CODE to decipher the FFT options ;
; ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; The $flags variable specifies the FFT options
; The global string $name is set to a textual version
; The global string $table is set the table name
MACRO
FFT_OPTIONS_STRING $flags, $name
GBLS $name
GBLS qname ; name of the precision (eg Q14, Q30)
GBLS direction ; name of the direction (eg I, F)
GBLS radix ; name of the radix (2, 4E, 4B, 4O etc)
GBLS intype ; name of input data type (if real)
GBLS prescale ; flag to indicate prescale
GBLS outpos ; position for the output data
GBLS datainformat ; bytes per input data item
GBLS dataformat ; bytes per working item
GBLS coefformat ; bytes per coefficient working item
GBLS coeforder ; R=(c,s) S=(c-s,s) storage format
GBLA datainlog ; shift to bytes per input complex
GBLA datalog ; shift to bytes per working complex
GBLA coeflog ; shift to bytes per coefficient complex
GBLA qshift ; right shift after multiply
GBLA norm
GBLA architecture ; 4=Arch4(7TDMI,SA), 5=Arch5TE(ARM9E)
GBLS cdshift
GBLS postmulshift
GBLS postldshift
GBLS postmulshift1
GBLS postldshift1
GBLL reversed ; flag to indicate input is already bit reversed
GBLS tablename
; find what sort of processor we are building the FFT for
architecture SETA 4 ; Architecture 4 (7TDMI, StrongARM etc)
;qname SETS {CPU}
; P $qname
IF ((({ARCHITECTURE}:CC:"aaaa"):LEFT:3="5TE") :LOR: (({ARCHITECTURE}:CC:"aa"):LEFT:1="6"))
architecture SETA 5 ; Architecture 5 (ARM9E, E extensions)
; P arch E
ENDIF
reversed SETL {FALSE}
; decode input order
IF ($flags:AND:FFT_INPUTORDER)=FFT_REVERSED
reversed SETL {TRUE}
ENDIF
; decode radix type to $radix
IF ($flags:AND:FFT_RADIX)=FFT_RADIX4
radix SETS "4E"
ENDIF
IF ($flags:AND:FFT_RADIX)=FFT_RADIX4_8F
radix SETS "4O"
ENDIF
IF ($flags:AND:FFT_RADIX)=FFT_RADIX4_2L
radix SETS "4B"
ENDIF
; decode direction to $direction
IF ($flags:AND:FFT_DIRECTION)=FFT_INVERSE
direction SETS "I"
ELSE
direction SETS "F"
ENDIF
; decode data size to $qname, and *log's
IF ($flags:AND:FFT_DATA_SIZES)=FFT_32bit
qname SETS "Q30"
datainlog SETA 3 ; 8 bytes per complex
datalog SETA 3
coeflog SETA 3
datainformat SETS "W"
dataformat SETS "W"
coefformat SETS "W"
qshift SETA -2 ; shift left top word of 32 bit result
ENDIF
IF ($flags:AND:FFT_DATA_SIZES)=FFT_16bit
qname SETS "Q14"
datainlog SETA 2
datalog SETA 2
coeflog SETA 2
datainformat SETS "H"
dataformat SETS "H"
coefformat SETS "H"
qshift SETA 14
ENDIF
; find the coefficient ordering
coeforder SETS "S"
IF (architecture>=5):LAND:(qshift<16)
coeforder SETS "R"
ENDIF
; decode real vs complex input data type
intype SETS ""
IF ($flags:AND:FFT_INPUTTYPE)=FFT_REAL
intype SETS "R"
ENDIF
; decode on outpos
outpos SETS ""
IF ($flags:AND:FFT_OUTPUTPOS)=FFT_OUT_INBUF
outpos SETS "I"
ENDIF
; decode on prescale
prescale SETS ""
IF ($flags:AND:FFT_INPUTSCALE)=FFT_PRESCALE
prescale SETS "P"
ENDIF
; decode on output scale
norm SETA 1
IF ($flags:AND:FFT_OUTPUTSCALE)=FFT_NONORM
norm SETA 0
ENDIF
; calculate shift to convert data offsets to coefficient offsets
SETSHIFT cdshift, ($datalog)-($coeflog)
$name SETS "$radix.$direction.$qname.$intype.$outpos.$prescale"
MEND
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; ;
; FFT GENERATOR ;
; ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; FFT options bitfield
FFT_DIRECTION EQU 0x00000001 ; direction select bit
FFT_FORWARD EQU 0x00000000 ; forward exp(-ijkw) coefficient FFT
FFT_INVERSE EQU 0x00000001 ; inverse exp(+ijkw) coefficient FFT
FFT_INPUTORDER EQU 0x00000002 ; input order select field
FFT_BITREV EQU 0x00000000 ; input data is in normal order (bit reverse)
FFT_REVERSED EQU 0x00000002 ; assume input data is already bit revesed
FFT_INPUTSCALE EQU 0x00000004 ; select scale on input data
FFT_NOPRESCALE EQU 0x00000000 ; do not scale input data
FFT_PRESCALE EQU 0x00000004 ; scale input data up by a register amount
FFT_INPUTTYPE EQU 0x00000010 ; selector for real/complex input data
FFT_COMPLEX EQU 0x00000000 ; do complex FFT of N points
FFT_REAL EQU 0x00000010 ; do a 2*N point real FFT
FFT_OUTPUTPOS EQU 0x00000020 ; where is the output placed?
FFT_OUT_OUTBUF EQU 0x00000000 ; default - in the output buffer
FFT_OUT_INBUF EQU 0x00000020 ; copy it back to the input buffer
FFT_RADIX EQU 0x00000F00 ; radix select
FFT_RADIX4 EQU 0x00000000 ; radix 4 (log_2 N must be even)
FFT_RADIX4_8F EQU 0x00000100 ; radix 4 with radix 8 first stage
FFT_RADIX4_2L EQU 0x00000200 ; radix 4 with optional radix 2 last stage
FFT_OUTPUTSCALE EQU 0x00001000 ; select output scale value
FFT_NORMALISE EQU 0x00000000 ; default - divide by N during algorithm
FFT_NONORM EQU 0x00001000 ; calculate the raw sum (no scale)
FFT_DATA_SIZES EQU 0x000F0000
FFT_16bit EQU 0x00000000 ; 16-bit data and Q14 coefs
FFT_32bit EQU 0x00010000 ; 32-bit data and Q30 coefs
END
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?