fft_mac.h

来自「realview22.rar」· C头文件 代码 · 共 816 行 · 第 1/2 页

H
816
字号
        MACRO
        STORE3P $dp, $re0, $im0, $re1, $im1, $re2, $im2
        IF "$dataformat"="W"
          STMIA $dp!, {$re0,$im0, $re1,$im1, $re2,$im2}
          MEXIT
        ENDIF
        IF "$dataformat"="H"
          STRH  $im0, [$dp, #2]
          STRH  $re0, [$dp], #4
          STRH  $im1, [$dp, #2]
          STRH  $re1, [$dp], #4
          STRH  $im2, [$dp, #2]
          STRH  $re2, [$dp], #4
          MEXIT
        ENDIF
        ERROR "Unsupported save format: $dataformat"
        MEND

        ; do different command depending on forward/inverse FFT
        MACRO
        DOi     $for, $bac, $d, $s1, $s2, $shift
        IF "$direction"="F"
          IF "$shift"=""
            $for $d, $s1, $s2
          ELSE
            $for $d, $s1, $s2, $shift
          ENDIF
        ELSE
          IF "$shift"=""
            $bac $d, $s1, $s2
          ELSE
            $bac $d, $s1, $s2, $shift
          ENDIF
        ENDIF
        MEND

        ; d = s1 + s2 if w=exp(+2*pi*i/N) j=+i - inverse transform
        ; d = s1 - s2 if w=exp(-2*pi*i/N) j=-i - forward transform
        MACRO
        ADDi    $d, $s1, $s2, $shift
        DOi     SUB, ADD, $d, $s1, $s2, $shift
        MEND

        ; d = s1 - s2 if w=exp(+2*pi*i/N) j=+i - inverse transform
        ; d = s1 + s2 if w=exp(-2*pi*i/N) j=-i - forward transform
        MACRO
        SUBi    $d, $s1, $s2, $shift
        DOi     ADD, SUB, $d, $s1, $s2, $shift
        MEND
        
        ; check that $val is in the range -$max to +$max-1
        ; set carry flag (sicky) if not (2 cycles)
        ; has the advantage of not needing a separate register
        ; to store the overflow state
        MACRO
        CHECKOV $val, $tmp, $max
        EOR     $tmp, $val, $val, ASR#31
        CMPCC   $tmp, $max
        MEND
        
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; Macro's to perform the twiddle stage (complex multiply by coefficient)
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

; The coefficients are stored in different formats according to the
; precision and processor architecture. The coefficients required
; will be of the form:
;
;   c(k) = cos( + k*2*pi*i/N ),  s(k) = sin( + k*2*pi*i/N )
;
;               c(k) + i*s(k) = exp(+2*pi*k*i/N)
;
; for some k's. The storage formats are:
;
; Format        Data
; Q14S          (c-s, s) in Q14 format, 16-bits per real
; Q14R          (c, s)   in Q14 format, 16-bits per real
; Q30S          (c-s, s) in Q30 format, 32-bits per real
;
; The operation to be performed is one of:
;
;     a+i*b = (x+i*y)*(c-i*s)   => forward transform
; OR  a+i*b = (x+i*y)*(c+i*s)   => inverse transform
;
; For the R format the operation is quite simple - requiring 4 muls
; and 2 adds:
;
;   Forward:  a = x*c+y*s, b = y*c-x*s
;   Inverse:  a = x*c-y*s, b = y*c+x*s
;
; For the S format the operations is more complex but only requires
; three multiplies, and is simpler to schedule:
;
;   Forward:  a = (y-x)*s + x*(c+s) = x*(c-s) + (x+y)*s
;             b = (y-x)*s + y*(c-s) = y*(c+s) - (x+y)*s
;
;   Inverse:  a = (x-y)*s + x*(c-s)
;             b = (x-y)*s + y*(c+s)
; 
; S advantage 16bit: 1ADD, 1SUB, 1MUL, 2MLA instead of 1SUB, 3MUL, 1MLA
; S advantage 32bit: 2ADD, 1SUB, 2SMULL, 1SMLAL instead of 1RSB, 2SMULL, 2SMLAL
; So S wins except for a very fast multiplier (eg 9E)
;
; NB The coefficients must always be the second operand on processor that
; take a variable number of cycles per multiply - so the FFT time remains constant

        ; This twiddle takes unpacked real and imaginary values
        ; Expects (cr,ci) = (c-s,s) on input
        ; Sets    (cr,ci) = (a,b) on output
        MACRO
        TWIDDLE $xr, $xi, $cr, $ci, $t0, $t1
        IF qshift>=0 :LAND: qshift<32
          IF "$direction"="F"
            SUB $t1, $xi, $xr           ; y-x
            MUL $t0, $t1, $ci           ; (y-x)*s
            ADD $t1, $cr, $ci, LSL#1    ; t1 = c+s allow mul to finish on SA
            MLA $ci, $xi, $cr, $t0      ; b
            MLA $cr, $xr, $t1, $t0      ; a
          ELSE
            SUB $t1, $xr, $xi           ; x-y
            MUL $t0, $t1, $ci           ; (x-y)*s
            ADD $ci, $cr, $ci, LSL#1    ; ci = c+s allow mul to finish on SA
            MLA $cr, $xr, $cr, $t0      ; a
            MLA $ci, $xi, $ci, $t0      ; b
          ENDIF
        ELSE
          IF "$direction"="F"
            ADD   $t1, $cr, $ci, LSL#1  ; t1 = c+s
            SMULL $cr, $t0, $xi, $cr    ; t0 = y*(c-s)
            SUB   $xi, $xi, $xr         ; xr = y-x + allow mul to finish on SA
            SMULL $ci, $cr, $xi, $ci    ; cr = (y-x)*s
            ADD   $ci, $cr, $t0         ; b + allow mul to finish on SA
            SMLAL $t0, $cr, $xr, $t1    ; a
          ELSE
            ADD   $t1, $cr, $ci, LSL#1  ; c+s
            SMULL $t0, $cr, $xr, $cr    ; x*(c-s)
            SUB   $xr, $xr, $xi         ; x-y + allow mul to finish on SA
            SMULL $t0, $ci, $xr, $ci    ; (x-y)*s
            ADD   $cr, $cr, $ci         ; a + allow mul to finish on SA
            SMLAL $t0, $ci, $xi, $t1    ; b
          ENDIF
        ENDIF
        MEND

        ; The following twiddle variant is similar to the above
        ; except that it is for an "E" processor varient. A standard
        ; 4 multiply twiddle is used as it requires the same number
        ; of cycles and needs less intermediate precision
        ;
        ; $co = coeficent real and imaginary (c,s) (packed)
        ; $xx = input data real and imaginary part (packed)
        ;
        ; $xr = destination register for real part of product
        ; $xi = destination register for imaginary part of product
        ;
        ; All registers should be distinct
        ;
        MACRO
        TWIDDLE_E $xr, $xi, $c0, $t0, $xx, $xxi
        IF "$direction"="F"
          SMULBT  $t0, $xx, $c0
          SMULBB  $xr, $xx, $c0
          IF "$xxi"=""
            SMULTB  $xi, $xx, $c0
            SMLATT  $xr, $xx, $c0, $xr
          ELSE
            SMULBB  $xi, $xxi, $c0
            SMLABT  $xr, $xxi, $c0, $xr
          ENDIF
          SUB     $xi, $xi, $t0
        ELSE
          SMULBB  $t0, $xx, $c0
          SMULBT  $xi, $xx, $c0
          IF "$xxi"=""
            SMULTT  $xr, $xx, $c0
            SMLATB  $xi, $xx, $c0, $xi
          ELSE
            SMULBT  $xr, $xxi, $c0
            SMLABB  $xi, $xxi, $c0, $xi
          ENDIF
          SUB     $xr, $t0, $xr
        ENDIF
        MEND

        ; Scale data value in by the coefficient, writing result to out
        ; The coeficient must be the second multiplicand
        ; The post mul shift need not be done so in most cases this
        ; is just a multiply (unless you need higher precision)
        ; coef must be preserved
        MACRO
        SCALE   $out, $in, $coef, $tmp
        IF qshift>=0 :LAND: qshift<32
          MUL   $out, $in, $coef
        ELSE
          SMULL $tmp, $out, $in, $coef
        ENDIF
        MEND

        MACRO
        DECODEFORMAT    $out, $format
        GBLS    $out.log
        GBLS    $out.format
$out.format SETS "$format"
        IF "$format"="B"
$out.log  SETS "1"
          MEXIT
        ENDIF
        IF "$format"="H"
$out.log  SETS "2"
          MEXIT
        ENDIF
        IF "$format"="W"
$out.log SETS "3"
         MEXIT
        ENDIF
        ERROR "Unrecognised format for $out: $format"
        MEND

        ; generate a string in $var of the correct right shift
        ; amount - negative values = left shift
        MACRO
        SETSHIFT $var, $value
        LCLA svalue
svalue  SETA $value
$var    SETS ""
        IF svalue>0 :LAND: svalue<32
$var      SETS ",ASR#0x$svalue"
        ENDIF
svalue  SETA -svalue
        IF svalue>0 :LAND: svalue<32
$var      SETS ",LSL#0x$svalue"
        ENDIF
        MEND


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;                                                                ;
;  CODE to decipher the FFT options                              ;
;                                                                ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;


        ; The $flags variable specifies the FFT options
        ; The global string $name is set to a textual version
        ; The global string $table is set the table name
        MACRO
        FFT_OPTIONS_STRING $flags, $name
        GBLS    $name
        GBLS    qname           ; name of the precision (eg Q14, Q30)
        GBLS    direction       ; name of the direction (eg I, F)
        GBLS    radix           ; name of the radix (2, 4E, 4B, 4O etc)
        GBLS    intype          ; name of input data type (if real)
        GBLS    prescale        ; flag to indicate prescale
        GBLS    outpos          ; position for the output data
        GBLS    datainformat    ; bytes per input data item
        GBLS    dataformat      ; bytes per working item
        GBLS    coefformat      ; bytes per coefficient working item
        GBLS    coeforder       ; R=(c,s) S=(c-s,s) storage format
        GBLA    datainlog       ; shift to bytes per input complex
        GBLA    datalog         ; shift to bytes per working complex
        GBLA    coeflog         ; shift to bytes per coefficient complex
        GBLA    qshift          ; right shift after multiply
        GBLA    norm
        GBLA    architecture    ; 4=Arch4(7TDMI,SA), 5=Arch5TE(ARM9E)
        GBLS    cdshift
        GBLS    postmulshift
        GBLS    postldshift
        GBLS    postmulshift1
        GBLS    postldshift1
        GBLL    reversed        ; flag to indicate input is already bit reversed
        GBLS    tablename
        
        ; find what sort of processor we are building the FFT for
architecture SETA 4             ; Architecture 4 (7TDMI, StrongARM etc)
;qname SETS {CPU}
;    P $qname
        IF ((({ARCHITECTURE}:CC:"aaaa"):LEFT:3="5TE") :LOR: (({ARCHITECTURE}:CC:"aa"):LEFT:1="6"))
architecture SETA 5             ; Architecture 5 (ARM9E, E extensions)
;    P arch E
        ENDIF

reversed SETL {FALSE}
        ; decode input order
        IF ($flags:AND:FFT_INPUTORDER)=FFT_REVERSED
reversed SETL {TRUE}
        ENDIF

        ; decode radix type to $radix
        IF ($flags:AND:FFT_RADIX)=FFT_RADIX4
radix     SETS "4E"
        ENDIF
        IF ($flags:AND:FFT_RADIX)=FFT_RADIX4_8F
radix     SETS "4O"
        ENDIF
        IF ($flags:AND:FFT_RADIX)=FFT_RADIX4_2L
radix     SETS "4B"
        ENDIF

        ; decode direction to $direction
        IF ($flags:AND:FFT_DIRECTION)=FFT_INVERSE
direction SETS "I"
        ELSE
direction SETS "F"
        ENDIF

        ; decode data size to $qname, and *log's
        IF ($flags:AND:FFT_DATA_SIZES)=FFT_32bit
qname     SETS "Q30"
datainlog SETA 3        ; 8 bytes per complex
datalog   SETA 3
coeflog   SETA 3
datainformat SETS "W"
dataformat   SETS "W"
coefformat   SETS "W"
qshift    SETA -2       ; shift left top word of 32 bit result
        ENDIF
        IF ($flags:AND:FFT_DATA_SIZES)=FFT_16bit
qname     SETS "Q14"
datainlog SETA 2
datalog   SETA 2
coeflog   SETA 2
datainformat SETS "H"
dataformat   SETS "H"
coefformat   SETS "H"
qshift    SETA 14
        ENDIF
        
        ; find the coefficient ordering
coeforder SETS "S"
        IF (architecture>=5):LAND:(qshift<16)
coeforder SETS "R"
        ENDIF

        ; decode real vs complex input data type
intype  SETS ""
        IF ($flags:AND:FFT_INPUTTYPE)=FFT_REAL
intype    SETS "R"
        ENDIF
        
        ; decode on outpos
outpos  SETS ""
        IF ($flags:AND:FFT_OUTPUTPOS)=FFT_OUT_INBUF
outpos  SETS "I"
        ENDIF
        
        ; decode on prescale
prescale SETS ""
        IF ($flags:AND:FFT_INPUTSCALE)=FFT_PRESCALE
prescale SETS "P"
        ENDIF
        
        ; decode on output scale
norm    SETA 1
        IF ($flags:AND:FFT_OUTPUTSCALE)=FFT_NONORM
norm      SETA 0
        ENDIF
        
        ; calculate shift to convert data offsets to coefficient offsets
        SETSHIFT cdshift, ($datalog)-($coeflog)

$name   SETS    "$radix.$direction.$qname.$intype.$outpos.$prescale"
        MEND

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;                                                                ;
;  FFT GENERATOR                                                 ;
;                                                                ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

; FFT options bitfield

FFT_DIRECTION   EQU     0x00000001      ; direction select bit
FFT_FORWARD     EQU     0x00000000      ; forward exp(-ijkw) coefficient FFT
FFT_INVERSE     EQU     0x00000001      ; inverse exp(+ijkw) coefficient FFT

FFT_INPUTORDER  EQU     0x00000002      ; input order select field
FFT_BITREV      EQU     0x00000000      ; input data is in normal order (bit reverse)
FFT_REVERSED    EQU     0x00000002      ; assume input data is already bit revesed

FFT_INPUTSCALE  EQU     0x00000004      ; select scale on input data
FFT_NOPRESCALE  EQU     0x00000000      ; do not scale input data
FFT_PRESCALE    EQU     0x00000004      ; scale input data up by a register amount

FFT_INPUTTYPE   EQU     0x00000010      ; selector for real/complex input data
FFT_COMPLEX     EQU     0x00000000      ; do complex FFT of N points
FFT_REAL        EQU     0x00000010      ; do a 2*N point real FFT

FFT_OUTPUTPOS   EQU     0x00000020      ; where is the output placed?
FFT_OUT_OUTBUF  EQU     0x00000000      ; default - in the output buffer
FFT_OUT_INBUF   EQU     0x00000020      ; copy it back to the input buffer

FFT_RADIX       EQU     0x00000F00      ; radix select
FFT_RADIX4      EQU     0x00000000      ; radix 4 (log_2 N must be even)
FFT_RADIX4_8F   EQU     0x00000100      ; radix 4 with radix 8 first stage
FFT_RADIX4_2L   EQU     0x00000200      ; radix 4 with optional radix 2 last stage

FFT_OUTPUTSCALE EQU     0x00001000      ; select output scale value
FFT_NORMALISE   EQU     0x00000000      ; default - divide by N during algorithm
FFT_NONORM      EQU     0x00001000      ; calculate the raw sum (no scale)

FFT_DATA_SIZES  EQU     0x000F0000
FFT_16bit       EQU     0x00000000      ; 16-bit data and Q14 coefs
FFT_32bit       EQU     0x00010000      ; 32-bit data and Q30 coefs

        END

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?