⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 c_fft6.c

📁 The code performs a number (ITERS) of iterations of the Bailey s 6-step FFT alg
💻 C
📖 第 1 页 / 共 2 页
字号:
  }  /* butterfly computations */  ijDiff = 1;  stride = 2;  spowerOfW = ndv2;  for (stage = 0; stage < logn; ++stage) {    /* Invariant: stride = 2 ** stage        Invariant: ijDiff = 2 ** (stage - 1) */    first = 0;    for (powerOfW = 0; powerOfW < ndv2; powerOfW += spowerOfW) {			pw = w[powerOfW];      /* Invariant: pwr + sqrt(-1)*pwi = W**(powerOfW - 1) */      for (i = first; i < n; i += stride) {        j = i + ijDiff;				jj = a[j];        ii = a[i];        temp.re = jj.re * pw.re - jj.im * pw.im;        temp.im = jj.re * pw.im + jj.im * pw.re;        a[j].re = ii.re - temp.re;        a[j].im = ii.im - temp.im;        a[i].re = ii.re + temp.re;				a[i].im = ii.im + temp.im;      }      ++first;    }    ijDiff = stride;    stride <<= 1;    spowerOfW /= 2;  }  return 0;} /* fft *//* ----------------------------------------------------------------- */int scale(complex *a, complex *v, const int n) {  register int i, j, index;	complex aa, vv;#pragma omp parallel for private(i, j, index, aa, vv)  for (i = 0; i < n; ++i) {    for (j = 0; j < n; ++j) {			index = i * n + j;      aa = a[index];			vv = v[index];			a[index].re = aa.re * vv.re - aa.im * vv.im;			a[index].im = aa.re * vv.im + aa.im * vv.re;    }  }  return 0;} /* ----------------------------------------------------------------- */int scale_seq(complex *a, complex *v, const int n) {  register int i, j, index;	complex aa, vv;  for (i = 0; i < n; ++i) {    for (j = 0; j < n; ++j) {			index = i * n + j;      aa = a[index];			vv = v[index];			a[index].re = aa.re * vv.re - aa.im * vv.im;			a[index].im = aa.re * vv.im + aa.im * vv.re;    }  }  return 0;} /* -----------------------------------------------------------------------       chkmat - check the output matrix for correctness    * ----------------------------------------------------------------------- */int chkmat(complex *a, int n) {  int sign, i, j, nn, errors;  real_type EPSILON = 1e-4f;  errors = 0;	nn = n * n;  for (i = 0; i < n; ++i) {    sign = 1;    for (j = 0; j < n; ++j) {      if (a[i * n + j].re > nn * sign + EPSILON)         ++errors;      if (a[i * n + j].re < nn * sign - EPSILON)         ++errors;      sign = -sign;    }  }  if (errors > 0) {		printf("Errors = %d\n", errors);		exit(-1);  }  return 0;} /* ----------------------------------------------------------------- */int prperf(double tpose_time, double scale_time, double ffts_time, int nn, int lognn, int iters) {   double secs;   double fpercent, spercent, tpercent;   double flops, mflops;  tpose_time /= iters;  scale_time /= iters;  ffts_time /= iters;  secs = tpose_time + scale_time + ffts_time;  tpercent = tpose_time / secs * 100;  spercent = scale_time / secs * 100;  fpercent = ffts_time / secs * 100;  flops = (real_type) (nn * 5 * lognn);  mflops = flops / 1e6f / secs;  printf("***********************\n");  printf("1D FFT %d points\n" ,nn);  printf("***********************\n");  printf("Time per input vector:\n");  printf("tpose    : %lf %lf percent\n", tpose_time, tpercent);  printf("scale    : %lf %lf percent\n", scale_time, spercent);  printf("ffts     : %lf %lf percent\n", ffts_time, fpercent);  printf("total(s) : %lf\n", secs);  printf("mflop/s  : %lf\n", mflops);  return 0;} /* prperf *//* -----------------------------------------------------------------------    Base 2 logarithm  * ----------------------------------------------------------------------- */int log2_int(int n) {		register int i, aux;		aux = 1;		for (i = 0; i <= 128; i++) {				if (aux > n) 					 return (i - 1);					aux <<= 1;		}  return -1;}/* ----------------------------------------------------------------- */int print_mat(complex *a, int n) {  register int i, j;  for (i = 0; i < n; ++i) {    for (j = 0; j < n; ++j) {			printf(" (%.0f, %.0f)", a[i * n + j].re, a[i * n + j].im);    }		printf("\n");  }	printf("\n");  return 0;} /* ----------------------------------------------------------------- */int print_vec(complex *a, int n) {  register int i;  for (i = 0; i < n*n; ++i) 		printf("  (%.0f, %.0f)", a[i].re, a[i].im);	printf("\n\n");  return 0;} /* ----------------------------------------------------------------- */int main(int argc, char *argv[]) {  complex **xin;          /* input vector                   */     complex **aux;          /* intermediate array (same size) */  /* precomputed vectors */  int *brt;               /* bit reverse table for 1d fft's */	complex *w;             /* twiddles for 1d fft's          */	complex *v;             /* twiddles for 2d fft            */  double total_time;      /* timing                         */  int k;	int NUMTHREADS, MAX_THREADS; #define ID  omp_get_thread_num()  float    MEMORY;             /* number of bytes required */	char *PARAM_NAMES[NUM_ARGS] = {"Size (a power of 2).", "No. of iterations"};	char *TIMERS_NAMES[NUM_TIMERS] = {"Total_time", "Tpose time", "Scale time", "Column FFTs time" };  char *DEFAULT_VALUES[NUM_ARGS] = {"64", "10"};  NUMTHREADS = omp_get_max_threads();  OSCR_init (NUMTHREADS, "Bailey's '6-step' Fast Fourier Transform", "Use 'fft6' <size (in K)> <iters>", NUM_ARGS,                PARAM_NAMES, DEFAULT_VALUES , NUM_TIMERS, NUM_TIMERS, TIMERS_NAMES,                argc, argv);  /* Command line arguments processing */	N = OSCR_getarg_int(1);	ITERS = OSCR_getarg_int(2);  /*  if(argc == 3) {    N = atoi(argv[1]);    ITERS = atoi(argv[2]);	}  else {                      		printf("Usage: %s N ITERS\n", argv[0]);		printf("N is the size of the input signal. Should be a power of two.\n");    N = 64;		ITERS = 10;	}  */	MAX_THREADS = omp_get_max_threads();  NN = N * N;	LOGN = log2_int(N);  LOGNN = log2_int(NN);	MEMORY = N * sizeof(int) + 		       2 * (NN) * sizeof(complex) * MAX_THREADS + (NN) * sizeof(complex) + 				   (N / 2) * sizeof(complex);	/* Memory allocation */	xin = OSCR_calloc(MAX_THREADS, sizeof(*xin));	aux = OSCR_calloc(MAX_THREADS, sizeof(*aux));	brt =     (int *)OSCR_calloc(N,       sizeof(int));	v   = (complex *)OSCR_calloc(N * N,   sizeof(complex)); /* twiddles for 2d fft */	w   = (complex *)OSCR_calloc((N / 2), sizeof(complex)); /* twiddles for 1d fft's */	printf("Input values:\n");	printf("=============\n");	printf("N           : %u\n", N);	printf("ITERS       : %u\n", ITERS);	printf("NN          : %u\n", NN);	printf("LOGN        : %u\n", LOGN);	printf("LOGNN       : %u\n", LOGNN);	printf("Memory      : %.2f Kbytes\n", MEMORY / 1024);  /* precompute the input array and fft constants */    gen_bit_reverse_table(brt, N);  gen_w_table(w, N, LOGN, N / 2);  gen_v_table(v, N);	for (k = 0; k < MAX_THREADS; k++) {		xin[k] = (complex *)OSCR_calloc(N * N, sizeof(complex));		aux[k] = (complex *)OSCR_calloc(N * N, sizeof(complex));	}#define TOTAL 0OSCR_timer_start(TOTAL);#pragma omp parallel for private(k)  for(k = 0; k < ITERS; k++) {    dgen(xin[ID], N);		tpose(xin[ID], aux[ID], N);				cffts(aux[ID], brt, w, N, LOGN, N / 2);		scale(aux[ID], v, N);		tpose(aux[ID], xin[ID], N);				cffts(xin[ID], brt, w, N, LOGN, N / 2);		tpose(xin[ID], aux[ID], N);				chkmat(aux[ID], N);                	}	OSCR_timer_stop(TOTAL);	total_time = OSCR_timer_read(TOTAL);		/* Display results and time */	OSCR_report(1, TIMERS_NAMES);		printf("============================");	printf("\n# THREADS : %d\n", NUMTHREADS);	printf("N         : %d\n", N);	printf("ITERS     : %d\n", ITERS);	printf("total TIME: %.6lf secs.\n", total_time);  #undef ID#undef MAX_THREADS	return 0;}/* ----------------------------------------------------------------- *//* * vim:ts=2:sw=2:set nonu: */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -