fftalgorithms.tex

来自「用于VC.net的gsl的lib库文件包」· TEX 代码 · 共 1,973 行 · 第 1/5 页
TEX
1,973 行
%
We have defined the following three additional variables $p$, $q$ and
$m$ to denote different partial products of the factors,
%
\begin{eqnarray}
p_i &=& f_1 f_2 \dots f_i \quad (p_0 = 1)  \\
q_i &=& N / p_i  \\
m_i &=& N / f_i 
\end{eqnarray}
%
Note that the FFT modules $W$ are applied before the permutations $P$,
which makes this a decimation-in-frequency algorithm.

\subsection{Implementation}
%
Now to the implementation of the algorithm. We start with a vector of
data, $z$, as input and want to apply the transform,
%
\begin{eqnarray}
x &=& W_N z \\
  &=& T_{n_f} \dots T_2 T_1 z
\end{eqnarray}
%
where $T_i = (P^{f_i}_{q_i} D^{f_i}_{q_i} \otimes I_{p_{i-1}}) (
W_{f_i} \otimes I_{m_i})$.

The outer structure of the implementation will be a loop over the
$n_f$ factors, applying each matrix $T_i$ to the vector in turn to
build up the complete transform.
%
\begin{algorithm}
\FOR{$(i = 1 \dots n_f)$}
        \STATE{$v \Leftarrow T_i v $}
\ENDFOR
\end{algorithm}
%
The order of the factors is not important. Now we examine the iteration
$v \Leftarrow T_i v$, which we'll write as,
%
\begin{equation}
v' = 
(P^{f_i}_{q_i} D^{f_i}_{q_i} \otimes I_{p_{i-1}}) ~
( W_{f_i} \otimes I_{m_i}) v
\end{equation}
%
There are two Kronecker product matrices in this iteration. The
rightmost matrix, which is the first to be applied, is a DFT of length
$f_i$ applied to $N/f_i$ subsets of the data. We'll call this $t$,
since it will be a temporary array,
%
\begin{equation}
t = (W_{f_i} \otimes I_{m_i}) v
\end{equation}
%
The second matrix applies a permutation and the exponential
twiddle-factors. We'll call this $v'$, since it is the result of the
full iteration on $v$,
%
\begin{equation}
v' = (P^{f_i}_{q_i} D^{f_i}_{q_i} \otimes I_{p_{i-1}}) t 
\end{equation}

The effect of the matrix $(W_{f_i} \otimes I_{m_i})$ is best seen by
an example. Suppose the factor is $f_i = 3$, and the length of the FFT
is $N=6$, then the relevant Kronecker product is,
%
\begin{equation}
t = (W_3 \otimes I_2) v 
\end{equation}
%
which expands out to,
%
\begin{equation}
\left(
\begin{array}{c}
t_0 \\
t_1 \\
t_2 \\
t_3 \\
t_4 \\
t_5
\end{array}
\right)
=
\left(
\begin{array}{cccccc}
W_3(1,1) & 0 & W_3(1,2) & 0 & W_3(1,3) & 0 \\
0 & W_3(1,1) & 0 & W_3(1,2) & 0 & W_3(1,3) \\
W_3(2,1) & 0 & W_3(2,2) & 0 & W_3(2,3) & 0 \\
0 & W_3(2,1) & 0 & W_3(2,2) & 0 & W_3(2,3) \\
W_3(3,1) & 0 & W_3(3,2) & 0 & W_3(3,3) & 0 \\
0 & W_3(3,1) & 0 & W_3(3,2) & 0 & W_3(3,3) 
\end{array}
\right)
\left(
\begin{array}{c}
v_0 \\
v_1 \\
v_2 \\
v_3 \\
v_4 \\
v_5
\end{array}
\right)
\end{equation}
%
We can rearrange the components in a computationally convenient form,
\begin{equation}
\left(
\begin{array}{c}
t_0 \\
t_2 \\
t_4 \\
t_1 \\
t_3 \\
t_5
\end{array}
\right)
=
\left(
\begin{array}{cccccc}
W_3(1,1) & W_3(1,2) & W_3(1,3) & 0 & 0 & 0 \\
W_3(2,1) & W_3(2,2) & W_3(2,3) & 0 & 0 & 0 \\
W_3(3,1) & W_3(3,2) & W_3(3,3) & 0 & 0 & 0 \\
0 & 0 & 0 & W_3(1,1) & W_3(1,2) & W_3(1,3) \\
0 & 0 & 0 & W_3(2,1) & W_3(2,2) & W_3(2,3) \\
0 & 0 & 0 & W_3(3,1) & W_3(3,2) & W_3(3,3)
\end{array}
\right)
\left(
\begin{array}{c}
v_0 \\
v_2 \\
v_4 \\
v_1 \\
v_3 \\
v_5
\end{array}
\right)
\end{equation}
%
which clearly shows that we just need to apply the $3\times 3$ DFT
matrix $W_3$ twice, once to the sub-vector of elements $(v_0, v_2, v_4)$,
and independently to the remaining sub-vector $(v_1, v_3, v_5)$.

In the general case, if we index $t$ as $t_k = t(\lambda,\mu) =
t_{\lambda m + \mu}$ then $\lambda = 0 \dots f-1$ is an index within
each transform of length $f$ and $\mu = 0 \dots m-1$ labels the
independent subsets of data. We can see this by showing the
calculation with all indices present,
%
\begin{equation}
t = (W_f \otimes I_m) z 
\end{equation}
%
becomes,
%
\begin{eqnarray}
t_{\lambda m + \mu} &=& \sum_{\lambda'=0}^{f-1} \sum_{\mu'=0}^{m-1} 
        (W_f \otimes I_m)_{(\lambda m + \mu)(\lambda' m + \mu')}
        z_{\lambda'm + \mu} \\
&=& \sum_{\lambda'\mu'} (W_f)_{\lambda\lambda'} \delta_{\mu\mu'} 
        z_{\lambda'm+\mu'} \\
&=& \sum_{\lambda'} (W_f)_{\lambda\lambda'} z_{\lambda'm+\mu}
\end{eqnarray}
%
The DFTs on the index $\lambda$ will be computed using
special optimized modules for each $f$.

To calculate the next stage,
%
\begin{equation}
v'=(P^f_q D^f_q \otimes I_{p_{i-1}}) t
\end{equation}
%
we note that the Kronecker product has the property of performing
$p_{i-1}$ independent multiplications of $PD$ on $q_{i-1}$ different
subsets of $t$. The index $\mu$ of $t(\lambda,\mu)$ which runs from 0
to $m$ will include $q_i$ copies of each $PD$ operation because
$m=p_{i-1}q$. i.e. we can split the index $\mu$ further into $\mu = a
p_{i-1} + b$, where $a = 0 \dots q-1$ and $b=0 \dots p_{i-1}$,
%
\begin{eqnarray}
\lambda m + \mu &=& \lambda m + a p_{i-1} + b \\
        &=& (\lambda q + a) p_{i-1} + b.
\end{eqnarray}
%
Now we can expand the second stage,
%
\begin{eqnarray}
v'&=& (P^f_q D^f_q \otimes I_{p_{i-1}}) t \\
v'_{\lambda m + \mu} &=& \sum_{\lambda' \mu'} 
 (P^f_q D^f_q \otimes I_{p_{i-1}})_{(\lambda m + \mu)(\lambda' m + \mu')}
        t_{\lambda' m + \mu'} \\
v'_{(\lambda q + a) p_{i-1} + b} &=& \sum_{\lambda' a' b'}
(
P^f_q D^f_q \otimes I_{p_{i-1}}
)_{((\lambda q+ a)p_{i-1} + b)((\lambda' q+ a')p_{i-1} + b')} 
t_{(\lambda' q + a')p_{i-1} +b'} 
\end{eqnarray}
%
The first step in removing redundant indices is to take advantage of
the identity matrix $I$ and separate the subspaces of the Kronecker
product,
%
\begin{equation}
(
P^f_q D^f_q \otimes I_{p_{i-1}}
)_{((\lambda q+ a)p_{i-1} + b)((\lambda' q+ a')p_{i-1} + b')} 
=
(P^f_q D^f_q)_{(\lambda q + a)(\lambda' q + a')}
\delta_{bb'}
\end{equation}
%
This eliminates one sum, leaving us with,
%
\begin{equation}
v'_{(\lambda q + a) p_{i-1} + b} 
= 
\sum_{\lambda' a' }
(P^f_q D^f_q)_{(\lambda q + a)(\lambda' q + a')} t_{(\lambda'q+a')p_{i-1} + b}
\end{equation}
%
We can insert the definition of $D^f_q$ to give,
%
\begin{equation}
\phantom{v'_{(\lambda q + a) p_{i-1} + b}}
= \sum_{\lambda'a'} (P^f_q)_{(\lambda q + a)(\lambda'q + a')} 
\omega^{\lambda'a'}_{q_{i-1}} t_{(\lambda'q+a')p_{i-1}+b}
\end{equation}
%
Using the definition of $P^f_q$, which exchanges an index of $\lambda
q + a$ with $a f + \lambda$, we get a final result with no matrix
multiplication,
%
\begin{equation}
v'_{(a f + \lambda) p_{i-1} + b}
= \omega^{\lambda a}_{q_{i-1}} t_{(\lambda q + a)p_{i-1} + b}
\end{equation}
%
All we have to do is premultiply each element of the temporary vector
$t$ by an exponential twiddle factor and store the result in another
index location, according to the digit reversal permutation of $P$.

Here is the algorithm to implement the mixed-radix FFT,
%
\begin{algorithm}
\FOR{$i = 1 \dots n_f$}
\FOR{$a = 0 \dots q-1$}
\FOR{$b = 0 \dots p_{i-1} - 1$}
\FOR{$\lambda = 0 \dots f-1$}
\STATE{$t_\lambda \Leftarrow 
       \sum_{\lambda'=0}^{f-1} W_f(\lambda,\lambda') v_{b+\lambda'm+ap_{i-1}}$}
       \COMMENT{DFT matrix-multiply module}
\ENDFOR
\FOR{$\lambda = 0 \dots f-1$}
\STATE{$v'_{(af+\lambda)p_{i-1}+b} 
        \Leftarrow \omega^{\lambda a}_{q_{i-1}} t_\lambda$}
\ENDFOR
\ENDFOR
\ENDFOR
\STATE{$v \Leftarrow v'$}
\ENDFOR
\end{algorithm}
%
\subsection{Details of the implementation}
%
First the function {\tt gsl\_fft\_complex\_wavetable\_alloc} allocates
$n$ elements of scratch space (to hold the vector $v'$ for each
iteration) and $n$ elements for a trigonometric lookup table of
twiddle factors.

Then the length $n$ must be factorized. There is a general
factorization function {\tt gsl\_fft\_factorize} which takes a list of
preferred factors. It first factors out the preferred factors and then
removes general remaining prime factors.

The algorithm used to generate the trigonometric lookup table is
%
\begin{algorithm}
\FOR {$a = 1 \dots n_f$}
\FOR {$b = 1 \dots f_i - 1$}
\FOR {$c = 1 \dots q_i$}
\STATE $\mbox{trig[k++]} = \exp(- 2\pi i b c p_{a-1}/N)$
\ENDFOR
\ENDFOR
\ENDFOR
\end{algorithm}
%
Note that $\sum_{1}^{n_f} \sum_{0}^{f_i-1} \sum_{1}^{q_i} =
\sum_{1}^{n_f} (f_i-1)q_i = n - 1$ so $n$ elements are always
sufficient to store the lookup table. This is chosen because we need
to compute $\omega_{q_i-1}^{\lambda a} t_\lambda$ in
the FFT. In terms of the lookup table we can write this as,
%
\begin{eqnarray}
\omega_{q_{i-1}}^{\lambda a} t_\lambda 
&=&  \exp(-2\pi i \lambda a/q_{i-1}) t_\lambda \\
&=&  \exp(-2\pi i \lambda a p_{i-1}/N) t_\lambda \\
&=& \left\{
    \begin{array}{ll}
    t_\lambda & a=0 \\
    \mbox{trig}[\mbox{twiddle[i]}+\lambda q+(a-1)] t_\lambda & a\not=0
\end{array}
\right.
\end{eqnarray}
%
\begin{sloppypar}
\noindent 
The array {\tt twiddle[i]} maintains a set of pointers into {\tt trig}
for the starting points for the outer loop.  The core of the
implementation is {\tt gsl\_fft\_complex}. This function loops over
the chosen factors of $N$, computing the iteration $v'=T_i v$ for each
pass. When the DFT for a factor is implemented the iteration is
handed-off to a dedicated small-$N$ module, such as {\tt
gsl\_fft\_complex\_pass\_3} or {\tt
gsl\_fft\_complex\_pass\_5}.  Unimplemented factors are handled
by the general-$N$ routine {\tt gsl\_fft\_complex\_pass\_n}. The
structure of one of the small-$N$ modules is a simple transcription of
the basic algorithm given above.  Here is an example for {\tt
gsl\_fft\_complex\_pass\_3}. For a pass with a factor of 3 we have to
calculate the following expression,
\end{sloppypar}%
\begin{equation}
v'_{(a f + \lambda) p_{i-1} + b} 
= 
\sum_{\lambda' = 0,1,2} 
\omega^{\lambda a}_{q_{i-1}} W^{\lambda \lambda'}_{3} 
v_{b + \lambda' m + a p_{i-1}}
\end{equation}
%
for $b = 0 \dots p_{i-1} - 1$, $a = 0 \dots q_{i} - 1$ and $\lambda =
0, 1, 2$.  This is implemented as,
%
\begin{algorithm}
\FOR {$a = 0 \dots q-1$}
\FOR {$b = 0 \dots p_{i-1} - 1$}
\STATE {$
        \left(
        \begin{array}{c}
        t_0 \\ t_1 \\ t_2
        \end{array}
        \right)
        =
        \left(
        \begin{array}{ccc}
        W^{0}_3 & W^{0}_3 & W^{0}_3 \\
        W^{0}_3 & W^{1}_3 & W^{2}_3 \\
        W^{0}_3 & W^{2}_3 & W^{4}_3
        \end{array}
        \right)
        \left(
        \begin{array}{l}
        v_{b + a p_{i-1}} \\ 
        v_{b + a p_{i-1} + m} \\ 
        v_{b + a p_{i-1} +2m}
        \end{array}
        \right)$}
        \STATE {$ v'_{a p_{i} + b}  = t_0$}
        \STATE {$ v'_{a p_{i} + b + p_{i-1}} = \omega^{a}_{q_{i-1}} t_1$} 
        \STATE {$ v'_{a p_{i} + b + 2 p_{i-1}} = \omega^{2a}_{q_{i-1}} t_2$}
\ENDFOR
\ENDFOR
\end{algorithm}
%
In the code we use the variables {\tt from0}, {\tt from1}, {\tt from2}
to index the input locations,
%
\begin{eqnarray}
\mbox{\tt from0} &=& b + a p_{i-1} \\
\mbox{\tt from1} &=& b + a p_{i-1} + m \\
\mbox{\tt from2} &=& b + a p_{i-1} + 2m
\end{eqnarray}
%
and the variables {\tt to0}, {\tt to1}, {\tt to2} to index the output
locations in the scratch vector $v'$,
%
\begin{eqnarray}
\mbox{\tt to0} &=& b + a p_{i} \\
\mbox{\tt to1} &=& b + a p_{i} + p_{i-1} \\
\mbox{\tt to2} &=& b + a p_{i} + 2 p_{i-1}
\end{eqnarray}
%
The DFT matrix multiplication is computed using the optimized
sub-transform modules given in the next section. The twiddle factors
$\omega^a_{q_{i-1}}$ are taken out of the {\tt trig} array.

To compute the inverse transform we go back to the definition of the
fourier transform and note that the inverse matrix is just the complex
conjugate of the forward matrix (with a factor of $1/N$),
%
\begin{equation}
W^{-1}_N = W^*_N / N 
\end{equation}
%
fftalgorithms.tex - 源码说明

本页面展示了「用于VC.net的gsl的lib库文件包」中的 fftalgorithms.tex 源码文件，采用 TEX 编程语言编写，共 1,973 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与net相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?