fft_mac.h

来自「realview22.rar」· C头文件 代码 · 共 816 行 · 第 1/2 页

H
816
字号
;
; $Copyright: 
; ----------------------------------------------------------------
; This confidential and proprietary software may be used only as
; authorised by a licensing agreement from ARM Limited
;   (C) COPYRIGHT 2000,2002 ARM Limited
;       ALL RIGHTS RESERVED
; The entire notice above must be reproduced on all authorised
; copies and copies may only be made to the extent permitted
; by a licensing agreement from ARM Limited.
; ----------------------------------------------------------------
; File:     fft_mac.h,v
; Revision: 1.14
; ----------------------------------------------------------------
; $
;
; Optimised ARM assembler multi-radix FFT
; Please read the readme.txt before this file
;
; Shared macros and interface definition file.

; NB: All the algorithms in this code are Decimation in Time. ARM
; is much better at Decimation in Time (as opposed to Decimation
; in Frequency) due to the position of the barrel shifter. Decimation
; in time has the twiddeling at the start of the butterfly, where as
; decimation in frequency has it at the end of the butterfly. The
; post multiply shifts can be hidden for Decimation in Time.

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
;  FIRST STAGE INTERFACE
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; The FIRST STAGE macros "FS_RAD<R>" have the following interface:
;
; ON ENTRY:
;   REGISTERS:
;     r0 = inptr  => points to the input buffer consisting of N complex
;                    numbers of size (1<<datainlog) bytes each
;     r1 = dptr   => points to the output buffer consisting of N complex
;                    numbers of size (1<<datalog) bytes each
;     r2 = N      => is the number of points in the transform
;     r3 = pscale => shift to prescale input by (if applicable)
;   ASSEMBLER VARIABLES:
;     reversed    => logical variable, true if input data is already bit reversed
;                    The data needs to be bit reversed otherwise
;
; ACTION:
;     The routine should
;      (1) Bit reverse the data as required for the whole FFT (unless
;          the reversed flag is set)
;      (2) Prescale the input data by
;      (3) Perform a radix R first stage on the data
;      (4) Place the processed data in the output array pointed to be dptr
;
; ON EXIT:
;     r1 = dptr  => preserved and pointing to the output data
;     r2 = dinc  => number of bytes per "block" or "Group" in this stage
;                   this is: R<<datalog
;     r3 = count => number of radix-R blocks or groups processed in this stage
;                   this is: N/R
;     r0,r4-r12,r14 corrupted

inptr   RN 0    ; input buffer
dptr    RN 1    ; output/scratch buffer
N       RN 2    ; size of the FFT

dptr    RN 1    ; data pointer - points to end (load in reverse order)
dinc    RN 2    ; bytes between data elements at this level of FFT
count   RN 3    ; (elements per block<<16) | (blocks per stage)


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
;  GENERAL STAGE INTERFACE
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; The GENERAL STAGE macros "GS_RAD<R>" have the following interface.
;
; To describe the arguments, suppose this routine is called as stage j
; in a k-stage FFT with N=R1*R2*...*Rk. This stage is radix R=Rj.
;
; ON ENTRY:
;   REGISTERS:
;     r0 = cptr   => Pointer to twiddle coefficients for this stage consisting
;                    of complex numbers of size (1<<coeflog) bytes each in some
;                    stage dependent format.
;                    The format currently used in described in full in the
;                    ReadMe file in the tables subdirectory.
;     r1 = dptr   => points to the working buffer consisting of N complex
;                    numbers of size (1<<datalog) bytes each
;     r2 = dinc   => number of bytes per "block" or "Group" in the last stage:
;                      dinc  = (R1*R2*...*R(j-1))<<datalog
;     r3 = count  => number of blocks or Groups in the last stage:
;                      count = Rj*R(j+1)*...*Rk
;                    NB dinc*count = N<<datalog
;
; ACTION:
;     The routine should
;      (1) Twiddle the input data
;      (2) Perform a radix R stage on the data
;      (3) Perform the actions in place, result written to the dptr buffer
;
; ON EXIT:
;     r0 = cptr  => Updated to the end of the coefficients for the stage
;                   (the coefficients for the next stage will usually follow)
;     r1 = dptr  => preserved and pointing to the output data
;     r2 = dinc  => number of bytes per "block" or "Group" in this stage:
;                     dinc  = (R1*R2*..*Rj)<<datalog = (input dinc)*R
;     r3 = count => number of radix-R blocks or groups processed in this stage
;                     count = R(j+1)*...*Rk = (input count)/R
;     r0,r4-r12,r14 corrupted

cptr    RN 0    ; pointer to twiddle coefficients
dptr    RN 1    ; pointer to FFT data working buffer
dinc    RN 2    ; bytes per block/group at this stage
count   RN 3    ; number of blocks/groups at this stage

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
;  LAST STAGE INTERFACE
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; The LAST STAGE macros "LS_RAD<R>" have the following interface.
;
; ON ENTRY:
;   REGISTERS:
;     r0 = cptr   => Pointer to twiddle coefficients for this stage consisting
;                    of complex numbers of size (1<<coeflog) bytes each in some
;                    stage dependent format.
;                    The format currently used in described in full in the
;                    ReadMe file in the tables subdirectory.
;                    There is a possible stride between the coefficients
;                    specified by cinc
;     r1 = dptr   => points to the working buffer consisting of N complex
;                    numbers of size (1<<datalog) bytes each
;     r2 = dinc   => number of bytes per "block" or "Group" in the last stage:
;                      dinc  = (N/R)<<datalog
;     r3 = cinc   => Bytes between twiddle values in the array pointed to by cptr
;
; ACTION:
;     The routine should
;      (1) Twiddle the input data
;      (2) Perform a (last stage optimised) radix R stage on the data
;      (3) Perform the actions in place, result written to the dptr buffer
;
; ON EXIT:
;     r0 = cptr  => Updated to point to real-to-complex conversion coefficients
;     r1 = dptr  => preserved and pointing to the output data
;     r2 = dinc  => number of bytes per "block" or "Group" in this stage:
;                     dinc  = N<<datalog = (input dinc)*R
;     r0,r4-r12,r14 corrupted

cptr    RN 0    ; pointer to twiddle coefficients
dptr    RN 1    ; pointer to FFT data working buffer
dinc    RN 2    ; bytes per block/group at this stage
cinc    RN 3    ; stride between twiddle coefficients in bytes

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
;  COMPLEX TO REAL CONVERSION INTERFACE
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; The COMPLEX TO REAL macros "LS_ZTOR" have the following interface.
;
; Suppose that 'w' is the N'th root of unity being used for the real FFT
; (usually exp(-2*pi*i/N) for forward transforms and exp(+2*pi*i/N) for 
;  the inverse transform).
;
; ON ENTRY:
;   REGISTERS:
;     r0 = cptr   => Pointer to twiddle coefficients for this stage
;                    This consists of (1,w,w^2,w^3,...,w^(N/4-1)).
;                    There is a stride between each coeficient specified by cinc
;     r1 = dptr   => points to the working buffer consisting of N/2 complex
;                    numbers of size (1<<datalog) bytes each
;     r2 = dinc   => (N/2)<<datalog, the size of the complex buffer in bytes
;     r3 = cinc   => Bytes between twiddle value in array pointed to by cptr
;     r4 = dout   => Output buffer (usually the same as dptr)
;
; ACTION:
;     The routine should take the output of an N/2 point complex FFT and convert
;     it to the output of an N point real FFT, assuming that the real input
;     inputs were packed up into the real,imag,real,imag,... buffers of the complex
;     input. The output is N/2 complex numbers of the form:
;      y[0]+i*y[N/2], y[1], y[2], ..., y[N/2-1]
;     where y[0],...,y[N-1] is the output from a complex transform of the N
;     real inputs.
;
; ON EXIT:
;     r0-r12,r14 corrupted

cptr    RN 0    ; pointer to twiddle coefficients
dptr    RN 1    ; pointer to FFT data working buffer
dinc    RN 2    ; (N/2)<<datalog, the size of the data in bytes
cinc    RN 3    ; bytes between twiddle values in the coefficient buffer
dout    RN 4    ; address to write the output (normally the same as dptr)

;;;;;;;;;;;;;;;;;;;;;; END OF INTERFACES ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

; first stage/outer loop level
;inptr  RN 0
;dptr   RN 1
;N      RN 2    ; size of FFT
;dinc   RN 2    ; bytes between block size when bit reversed (scaling of N)
bitrev  RN 3

; inner loop level
;cptr   RN 0    ; coefficient pointer for this level
;dptr   RN 1    ; data pointer - points to end (load in reverse order)
;dinc   RN 2    ; bytes between data elements at this level of FFT
;count  RN 3    ; (elements per block<<16) | (blocks per stage)

; data registers
x0r     RN 4
x0i     RN 5
x1r     RN 6
x1i     RN 7
x2r     RN 8
x2i     RN 9
x3r     RN 10
x3i     RN 11

t0      RN 12   ; these MUST be in correct order (t0<t1) for STM's
t1      RN 14

        MACRO
        SETREG  $prefix,$v0,$v1
        GBLS    $prefix.r
        GBLS    $prefix.i
$prefix.r SETS  "$v0"
$prefix.i SETS  "$v1"
        MEND

        MACRO
        SETREGS $prefix,$v0,$v1,$v2,$v3,$v4,$v5,$v6,$v7
        SETREG  $prefix.0,$v0,$v1
        SETREG  $prefix.1,$v2,$v3
        SETREG  $prefix.2,$v4,$v5
        SETREG  $prefix.3,$v6,$v7
        MEND

        MACRO
        SET2REGS $prefix,$v0,$v1,$v2,$v3
        SETREG  $prefix.0,$v0,$v1
        SETREG  $prefix.1,$v2,$v3
        MEND

        ; Macro to load twiddle coeficients
        ; Customise according to coeficient format
        ; Load next 3 complex coeficients into thr given registers
        ; Update the coeficient pointer
        MACRO
        LOADCOEFS $cp, $c0r, $c0i, $c1r, $c1i, $c2r, $c2i
        IF "$coefformat"="W"
          ; one word per scalar
          LDMIA $cp!, {$c0r, $c0i, $c1r, $c1i, $c2r, $c2i}
          MEXIT
        ENDIF
        IF "$coefformat"="H"
          ; one half word per scalar
          LDRSH $c0r, [$cp], #2
          LDRSH $c0i, [$cp], #2
          LDRSH $c1r, [$cp], #2
          LDRSH $c1i, [$cp], #2
          LDRSH $c2r, [$cp], #2
          LDRSH $c2i, [$cp], #2
          MEXIT
        ENDIF
        ERROR "Unsupported coeficient format: $coefformat"
        MEND

        ; Macro to load one twiddle coeficient
        ; $cp = address to load complex data
        ; $ci = post index to make to address after load
        MACRO
        LOADCOEF $cp, $ci, $re, $im
        IF "$coefformat"="W"
          LDR   $im, [$cp, #4]
          LDR   $re, [$cp], $ci
          MEXIT
        ENDIF
        IF "$coefformat"="H"
          LDRSH $im, [$cp, #2]
          LDRSH $re, [$cp], $ci
          MEXIT
        ENDIF
        ERROR "Unsupported coeficient format: $coefformat"
        MEND

        ; Macro to load one component of one twiddle coeficient
        ; $cp = address to load complex data
        ; $ci = post index to make to address after load
        MACRO
        LOADCOEFR $cp, $re
        IF "$coefformat"="W"
          LDR   $re, [$cp]
          MEXIT
        ENDIF
        IF "$coefformat"="H"
          LDRSH $re, [$cp]
          MEXIT
        ENDIF
        ERROR "Unsupported coeficient format: $coefformat"
        MEND

        ; Macro to load data elements in the given format
        ; $dp = address to load complex data
        ; $di = post index to make to address after load
        MACRO
        LOADDATAF $dp, $di, $re, $im, $format
        IF "$format"="W"
          LDR   $im, [$dp, #4]
          LDR   $re, [$dp], $di
          MEXIT
        ENDIF
        IF "$format"="H"
          LDRSH $im, [$dp, #2]
          LDRSH $re, [$dp], $di
          MEXIT
        ENDIF
        ERROR "Unsupported load format: $format"
        MEND

        MACRO
        LOADDATAZ $dp, $re, $im
        IF "$datainformat"="W"
          LDMIA $dp, {$re,$im}
          MEXIT
        ENDIF
        IF "$datainformat"="H"
          LDRSH $im, [$dp, #2]
          LDRSH $re, [$dp]
          MEXIT
        ENDIF
        ERROR "Unsupported load format: $format"
        MEND

        ; Load a complex data element from the working array
        MACRO
        LOADDATA $dp, $di, $re, $im
        LOADDATAF $dp, $di, $re, $im, $dataformat
        MEND

        ; Load a complex data element from the input array
        MACRO
        LOADDATAI $dp, $di, $re, $im
        LOADDATAF $dp, $di, $re, $im, $datainformat
        MEND

        MACRO
        LOADDATA4 $dp, $re0,$im0, $re1,$im1, $re2,$im2, $re3,$im3 
        IF "$datainformat"="W"
         LDMIA  $dp!, {$re0,$im0, $re1,$im1, $re2,$im2, $re3,$im3}
        ELSE
         LOADDATAI $dp, #1<<$datalog, $re0,$im0
         LOADDATAI $dp, #1<<$datalog, $re1,$im1
         LOADDATAI $dp, #1<<$datalog, $re2,$im2
         LOADDATAI $dp, #1<<$datalog, $re3,$im3
        ENDIF
        MEND

        ; Shift data after load
        MACRO
        SHIFTDATA $dr, $di
        IF "$postldshift"<>""
          IF "$di"<>""
            MOV $di, $di $postldshift
          ENDIF
          MOV   $dr, $dr $postldshift
        ENDIF
        MEND

        ; Store a complex data item in the output data buffer
        MACRO
        STORE   $dp, $di, $re, $im
        IF "$dataformat"="W"
          STR   $im, [$dp, #4]
          STR   $re, [$dp], $di
          MEXIT
        ENDIF
        IF "$dataformat"="H"
          STRH  $im, [$dp, #2]
          STRH  $re, [$dp], $di
          MEXIT
        ENDIF
        ERROR "Unsupported save format: $dataformat"
        MEND

        ; Store a complex data item in the output data buffer
        MACRO
        STOREP  $dp, $re, $im
        IF "$dataformat"="W"
          STMIA $dp!, {$re,$im}
          MEXIT
        ENDIF
        IF "$dataformat"="H"
          STRH  $im, [$dp, #2]
          STRH  $re, [$dp], #4
          MEXIT
        ENDIF
        ERROR "Unsupported save format: $dataformat"
        MEND

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?