📄 fft_new_kc.cpp
字号:
#include "idb_kernelc.hpp"
//shared files
#include "fft_new.hpp"
#include "idb_kernelc2.hpp"
// Ujval Kapasi
// 10/15/97
// 12/1/97
// 5/21/98
// Modified by
// Abhishek Das
// 11/17/2001
// Vector radix two FFT
// 1. assumes twiddle factors are in order
// This program needs two operands for the butterfly, a and b. These should
// correspond to elements 0..(N/2-1) and (N/2)..N of the input respectively
// for the first pass. Subsequently, the streams a,b should be the same
// parts of the stream output by the previous pass of this loop.
// Some twiddle factors are now read in initially, and then needed values
// are then calculated from these values by each cluster.
// Let a, b, and w be the input vectors.
// ISTREAM[0] = in_a = a[0..511]
// ISTREAM[1] = in_b = b[0..511]
// ISTREAM[2] = in_w = w[8 x 0, 8 x 16, 8 x 32,.., 8 x 496] -> 8 x 32 entries = 256
// Let c be the output vector
// OSTREAM[0] = c[0..1023] out
KERNELDEF(fft8c, "fft_new/fft_new_kc.uc");
kernel fft8c(istream<complex> in_a,
istream<complex> in_b,
istream<complex> in_w,
uc<int>& uc_stage_num,
uc<int>& uc_tw_idx_inc,
ostream<complex> out)
{
uc<int> perm_e = 0x37261504;
uc<int> perm_f = 0x73625140;
int two = 1 + 1;
int four = two + two;
int odd = cid() & 1;
int even = 1 - odd;
cc cc_even = itocc((cid() & 1) == 0);
// cc cc_even = itocc(even);
cc low = itocc(cid() < four);
int lows, his;
lows = select(low, 1, 0);
his = select(low, 0, 1);
uc<int> perm_1, perm_2, perm_3;
perm_1 = 0x32107654;
perm_2 = 0x73625140;
perm_3 = 0x37261504;
complex temp;
// load twiddle factors
array<float> twiddle_real(32), twiddle_imag(32);
uc<int> cnt_w = 32;
int idx_w = 0;
loop_count(cnt_w) {
in_w >> temp;
twiddle_real[idx_w] = temp.r;
twiddle_imag[idx_w] = temp.i;
idx_w = idx_w + 1;
}
// load per stage rotation values
array<float> rotate_real(10), rotate_imag(10);
uc<int> cnt_r = 10;
int idx_r = 0;
loop_count(cnt_r) {
in_w >> temp;
rotate_real[idx_r] = temp.r;
rotate_imag[idx_r] = temp.i;
idx_r = idx_r + 1;
}
// load per iteration interpolation values
complex interp_rotate;
in_w >> temp;
interp_rotate.r = temp.r;
interp_rotate.i = temp.i;
complex interp_null;
interp_null.r = 1.0;
interp_null.i = 0.0;
int stage_num = commclperm(0x8, 0, uc_stage_num);
complex interp;
cc first_stages = itocc(stage_num < 4);
interp.r = select(first_stages, interp_rotate.r, interp_null.r);
interp.i = select(first_stages, interp_rotate.i, interp_null.i);
complex rotate;
rotate.r = rotate_real[stage_num];
rotate.i = rotate_imag[stage_num];
int tw_idx_inc_save = commclperm(0x8, 0, uc_tw_idx_inc);
int tw_idx_inc = 1;
int tw_idx = 0 - tw_idx_inc_save;
loop_stream(in_a) unroll(1) pipeline(1) {
// Read in 2 complex data points
complex a, b;
in_a >> a;
in_b >> b;
// calculate twiddle index
cc new_twiddle = itocc(tw_idx_inc == 1);
tw_idx = select(new_twiddle, tw_idx + tw_idx_inc_save, tw_idx);
tw_idx_inc = select(new_twiddle, tw_idx_inc_save, tw_idx_inc - 1);
// Perform twiddle factor calculations
complex w_rotated, tw;
tw.r = twiddle_real[tw_idx];
tw.i = twiddle_imag[tw_idx];
w_rotated.r = ((tw.r * rotate.r) - (tw.i * rotate.i));
w_rotated.i = ((tw.r * rotate.i) + (tw.i * rotate.r));
// Perform butterfly calculations
complex c, d;
float wrbr, wrbi, wibr, wibi;
c.r = a.r + b.r;
c.i = a.i + b.i;
b.r = a.r - b.r;
b.i = a.i - b.i;
wrbr = w_rotated.r*b.r;
wrbi = w_rotated.r*b.i;
wibr = w_rotated.i*b.r;
wibi = w_rotated.i*b.i;
d.r = wrbr - wibi;
d.i = wrbi + wibr;
// want this eventually
// cluster : 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
// first output : dr3 | cr3 | dr2 | cr2 | dr1 | cr1 | dr0 | cr0 |
// second output : dr7 | cr7 | dr6 | cr6 | dr5 | cr5 | dr4 | cr4 |
array<float> sel_array_real(2);
array<float> sel_array_imag(2);
float low_d_hi_c, low_c_hi_d;
float odd_dlo_even_chir, even_clo_odd_dhir;
float odd_dlo_even_chi, even_clo_odd_dhi;
complex out0, out1;
float e,f,g,h1,h2;
low_d_hi_c = select(low, d.r, c.r);
sel_array_real[even] = commucperm(perm_e, low_d_hi_c);
low_c_hi_d = select(low, c.r, d.r);
sel_array_real[odd] = commucperm(perm_f, low_c_hi_d);
out0.r = sel_array_real[0];
low_d_hi_c = select(low, d.i, c.i);
sel_array_imag[even] = commucperm(perm_e, low_d_hi_c);
low_c_hi_d = select(low, c.i, d.i);
sel_array_imag[odd] = commucperm(perm_f, low_c_hi_d);
out0.i = sel_array_imag[0];
out1.r = sel_array_real[1];
out1.i = sel_array_imag[1];
out << out0 << out1;
// Read in 2 more complex data points
in_a >> a;
in_b >> b;
// Interpolate the next twiddle factor
complex w_interp;
w_interp.r = ((w_rotated.r * interp.r) - (w_rotated.i * interp.i));
w_interp.i = ((w_rotated.r * interp.i) + (w_rotated.i * interp.r));
// Perform butterfly calculations
c.r = a.r + b.r;
c.i = a.i + b.i;
b.r = a.r - b.r;
b.i = a.i - b.i;
wrbr = w_interp.r*b.r;
wrbi = w_interp.r*b.i;
wibr = w_interp.i*b.r;
wibi = w_interp.i*b.i;
d.r = wrbr - wibi;
d.i = wrbi + wibr;
// want this eventually
// cluster : 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
// first output : dr3 | cr3 | dr2 | cr2 | dr1 | cr1 | dr0 | cr0 |
// second output : dr7 | cr7 | dr6 | cr6 | dr5 | cr5 | dr4 | cr4 |
array<float> sel_2r(2), sel_2i(2);
sel_2r[lows] = commucperm(perm_1, d.r);
sel_2r[his] = c.r;
out0.r = commucperm(perm_2, sel_2r[0]);
sel_2i[lows] = commucperm(perm_1, d.i);
sel_2i[his] = c.i;
out0.i = commucperm(perm_2, sel_2i[0]);
out1.r = commucperm(perm_3, sel_2r[1]);
out1.i = commucperm(perm_3, sel_2i[1]);
out << out0 << out1;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -