⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fft_new_kc.cpp

📁 并行分块的fft实现 基于斯坦福大学Imagine模拟器开发设计的FFT并行分块实现
💻 CPP
字号:
#include "idb_kernelc.hpp"

//shared files
#include "fft_new.hpp"

#include "idb_kernelc2.hpp"

// Ujval Kapasi
// 10/15/97
// 12/1/97
// 5/21/98

// Modified by
// Abhishek Das
// 11/17/2001

// Vector radix two FFT
// 1. assumes twiddle factors are in order

// This program needs two operands for the butterfly, a and b.  These should
//   correspond to elements 0..(N/2-1) and (N/2)..N of the input respectively
//   for the first pass.  Subsequently, the streams a,b should be the same
//   parts of the stream output by the previous pass of this loop.

// Some twiddle factors are now read in initially, and then needed values
//   are then calculated from these values by each cluster.

// Let a, b, and w be the input vectors.
//   ISTREAM[0] = in_a = a[0..511]
//   ISTREAM[1] = in_b = b[0..511]
//   ISTREAM[2] = in_w = w[8 x 0, 8 x 16, 8 x 32,.., 8 x 496] -> 8 x 32 entries = 256

// Let c be the output vector
//   OSTREAM[0] = c[0..1023]   out

KERNELDEF(fft8c, "fft_new/fft_new_kc.uc");

kernel fft8c(istream<complex> in_a,
             istream<complex> in_b,
             istream<complex> in_w,
             uc<int>& uc_stage_num,
             uc<int>& uc_tw_idx_inc,
             ostream<complex> out)
{
  uc<int> perm_e = 0x37261504;
  uc<int> perm_f = 0x73625140;

  int two = 1 + 1;
  int four = two + two;

  int odd = cid() & 1;
  int even = 1 - odd;
  cc cc_even = itocc((cid() & 1) == 0);

  // cc cc_even = itocc(even);
  cc low = itocc(cid() < four);
  int lows, his;
  lows = select(low, 1, 0);
  his = select(low, 0, 1);

  uc<int> perm_1, perm_2, perm_3;
  perm_1 = 0x32107654;
  perm_2 = 0x73625140;
  perm_3 = 0x37261504;

  complex temp;

  // load twiddle factors
  array<float> twiddle_real(32), twiddle_imag(32);
  uc<int> cnt_w = 32;
  int idx_w = 0;
  loop_count(cnt_w) {
    in_w >> temp;
    twiddle_real[idx_w] = temp.r;
    twiddle_imag[idx_w] = temp.i;
    idx_w = idx_w + 1;
  }

  // load per stage rotation values
  array<float> rotate_real(10), rotate_imag(10);
  uc<int> cnt_r = 10;
  int idx_r = 0;
  loop_count(cnt_r) {
    in_w >> temp;
    rotate_real[idx_r] = temp.r;
    rotate_imag[idx_r] = temp.i;
    idx_r = idx_r + 1;
  }

  // load per iteration interpolation values
  complex interp_rotate;
  in_w >> temp;
  interp_rotate.r = temp.r;
  interp_rotate.i = temp.i;
  complex interp_null;
  interp_null.r = 1.0;
  interp_null.i = 0.0;

  int stage_num = commclperm(0x8, 0, uc_stage_num);

  complex interp;
  cc first_stages = itocc(stage_num < 4);
  interp.r = select(first_stages, interp_rotate.r, interp_null.r);
  interp.i = select(first_stages, interp_rotate.i, interp_null.i);

  complex rotate;
  rotate.r = rotate_real[stage_num];
  rotate.i = rotate_imag[stage_num];
  
  int tw_idx_inc_save = commclperm(0x8, 0, uc_tw_idx_inc);
  int tw_idx_inc = 1;
  int tw_idx = 0 - tw_idx_inc_save;

  loop_stream(in_a) unroll(1) pipeline(1) {

    // Read in 2 complex data points
    complex a, b;
    in_a >> a;
    in_b >> b;

    // calculate twiddle index
    cc new_twiddle = itocc(tw_idx_inc == 1);
    tw_idx = select(new_twiddle, tw_idx + tw_idx_inc_save, tw_idx);
    tw_idx_inc = select(new_twiddle, tw_idx_inc_save, tw_idx_inc - 1);

    // Perform twiddle factor calculations
    complex w_rotated, tw;
    tw.r = twiddle_real[tw_idx];
    tw.i = twiddle_imag[tw_idx];
    w_rotated.r = ((tw.r * rotate.r) - (tw.i * rotate.i));
    w_rotated.i = ((tw.r * rotate.i) + (tw.i * rotate.r));

    // Perform butterfly calculations
    complex c, d;
    float wrbr, wrbi, wibr, wibi;
    c.r = a.r + b.r;
    c.i = a.i + b.i;
    b.r = a.r - b.r;
    b.i = a.i - b.i;
    wrbr = w_rotated.r*b.r;
    wrbi = w_rotated.r*b.i;
    wibr = w_rotated.i*b.r;
    wibi = w_rotated.i*b.i;
    d.r = wrbr - wibi;
    d.i = wrbi + wibr;

    // want this eventually
    // cluster       :  7  |  6  |  5  |  4  |  3  |  2  |  1  |  0  |
    // first output  : dr3 | cr3 | dr2 | cr2 | dr1 | cr1 | dr0 | cr0 |
    // second output : dr7 | cr7 | dr6 | cr6 | dr5 | cr5 | dr4 | cr4 |
    array<float> sel_array_real(2);
    array<float> sel_array_imag(2);
    float low_d_hi_c, low_c_hi_d;
    float odd_dlo_even_chir, even_clo_odd_dhir;
    float odd_dlo_even_chi, even_clo_odd_dhi;
    complex out0, out1;

    float e,f,g,h1,h2;

    low_d_hi_c       = select(low, d.r, c.r);
    sel_array_real[even] = commucperm(perm_e, low_d_hi_c);
    low_c_hi_d       = select(low, c.r, d.r);
    sel_array_real[odd] = commucperm(perm_f, low_c_hi_d);

    out0.r = sel_array_real[0];

    low_d_hi_c       = select(low, d.i, c.i);
    sel_array_imag[even] = commucperm(perm_e, low_d_hi_c);
    low_c_hi_d       = select(low, c.i, d.i);
    sel_array_imag[odd] = commucperm(perm_f, low_c_hi_d);

    out0.i = sel_array_imag[0];
    out1.r = sel_array_real[1];
    out1.i = sel_array_imag[1];

    out << out0 << out1;


    // Read in 2 more complex data points
    in_a >> a;
    in_b >> b;

    // Interpolate the next twiddle factor
    complex w_interp;
    w_interp.r = ((w_rotated.r * interp.r) - (w_rotated.i * interp.i));
    w_interp.i = ((w_rotated.r * interp.i) + (w_rotated.i * interp.r));

    // Perform butterfly calculations
    c.r = a.r + b.r;
    c.i = a.i + b.i;
    b.r = a.r - b.r;
    b.i = a.i - b.i;
    wrbr = w_interp.r*b.r;
    wrbi = w_interp.r*b.i;
    wibr = w_interp.i*b.r;
    wibi = w_interp.i*b.i;
    d.r = wrbr - wibi;
    d.i = wrbi + wibr;

    // want this eventually
    // cluster       :  7  |  6  |  5  |  4  |  3  |  2  |  1  |  0  |
    // first output  : dr3 | cr3 | dr2 | cr2 | dr1 | cr1 | dr0 | cr0 |
    // second output : dr7 | cr7 | dr6 | cr6 | dr5 | cr5 | dr4 | cr4 |
    array<float> sel_2r(2), sel_2i(2);
    
    sel_2r[lows] = commucperm(perm_1, d.r);
    sel_2r[his] = c.r;

    out0.r = commucperm(perm_2, sel_2r[0]);

    sel_2i[lows] = commucperm(perm_1, d.i);
    sel_2i[his] = c.i;

    out0.i = commucperm(perm_2, sel_2i[0]);
    out1.r = commucperm(perm_3, sel_2r[1]);
    out1.i = commucperm(perm_3, sel_2i[1]);

    out << out0 << out1;
  }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -