📄 seminaive.cpp

📁 普林斯顿开发的快速球面调和变换算法
💻 CPP
📖 第 1 页 / 共 3 页
字号:
    }
  else*/
    *runtime = 0.0;

  free(CoswSave);



}


void seminaive::SemiNaiveX(double *data, int bw, int m, int k, double *result, double *cos_pml_table, double *runtime, int loops, double *workspace, int errorflag)
{
	 int i, j, n, counter;
  const double *weights;
  double *weighted_data;
  double *cos_data;
  double *scratchpad;
  double tstart, tstop;
  double *cos_even, *cos_odd;
  double *result_ptr;
  
  int ctr;
  double d_bw;
  double eresult0, eresult1, eresult2, eresult3;
  double oresult0, oresult1;  
  double *pml_ptr_even, *pml_ptr_odd;
  double *CoswSave;

  n = 2*bw;
  d_bw = (double) bw;

  cos_even = (double *) malloc(sizeof(double) * ( bw + 0));
  cos_odd = cos_even + (bw/2) + 0;

  /* assign workspace */
  weighted_data = workspace;
  cos_data = workspace + (2*bw);
  scratchpad = cos_data + (2*bw); /* scratchpad needs (4*bw) */
  /* total workspace = (8 * bw) */

  /* precompute for dct */
  CoswSave = newf3.precomp_dct( n );

  fprintf(stdout,"SEMI_NAIVE\n");
  fprintf(stderr,"bw = %d\t m = %d\n", bw, m);

  result_ptr = result;

  /*
    need to apply quadrature weights to the data and compute
    the cosine transform: if the order is even, get the regular
    weights; if the order is odd, get the oddweights (those
    where I've already multiplied the sin factor in
    */

  if( (m % 2) == 0)
    weights = wei1.get_weights(bw);
  else
    weights = owei1.get_oddweights(bw);

 // tstart = csecond();

  for(counter = 0 ; counter < loops ; counter ++ )
    {

      /*
	smooth the weighted signal
	*/
      if(errorflag == 0)
	for ( i = 0; i < n    ; ++i )
	  cos_data[i] = data[ i ] * weights[ i ];
      else
	for ( i = 0; i < n    ; ++i )
	  cos_data[i] = *data++ * weights[ i ];
      newf3.DCTf( cos_data, n, bw, CoswSave );

      /* normalize the data for this problem */
      if (m == 0)
	{
	  for (i=0; i<bw; i++)
	    cos_data[i] *= (d_bw * 0.5);
	}
      else
	{
	  for (i=0; i<bw; i++)
	    cos_data[i] *= d_bw;
	}
      cos_data[0] *= 2.0;
      
      /* separate cos_data into even and odd indexed arrays */
      ctr = 0;
      for(i = 0; i < bw; i += 2)
	{
	  cos_even[ctr] = cos_data[i];
	  cos_odd[ctr]  = cos_data[i+1];
	  ctr++;
	}
      
      /*
	do the projections; Note that the cos_pml_table has
	had all the zeroes stripped out so the indexing is
	complicated somewhat
	*/

      /*
	this loop is unrolled to compute two coefficients
	at a time (for efficiency)
	*/
      for(i = 0; i < (bw - m) - ( (bw - m) % 2 ); i += 2)
	{
	  /* get ptrs to precomputed data */
	  pml_ptr_even = cos_pml_table + cosp3.NewTableOffset(m, m + i);
	  pml_ptr_odd  = cos_pml_table + cosp3.NewTableOffset(m, m + i + 1);

	  /* zero out the accumulators */
	  eresult0 = 0.0; eresult1 = 0.0;
	  oresult0 = 0.0; oresult1 = 0.0;

	  for(j = 0; j < (((m + i)/2) + 1) % 2; ++j)
	    {
	      eresult0 += cos_even[j] * pml_ptr_even[j];
	      oresult0 += cos_odd[j] * pml_ptr_odd[j];
	    }
	  for( ;  j < ((m + i)/2) + 1 ; j += 2)
	    {
	      eresult0 += cos_even[j] * pml_ptr_even[j];
	      eresult1 += cos_even[j + 1] * pml_ptr_even[j + 1];
	      oresult0 += cos_odd[j] * pml_ptr_odd[j];
	      oresult1 += cos_odd[j + 1] * pml_ptr_odd[j + 1];
	    }

	  /* save the result */
	  result_ptr[i] = eresult0 + eresult1;
	  result_ptr[i + 1] = oresult0 + oresult1;
	}
      
      /* if there are an odd number of coefficients to compute
	 at this order, get that last coefficient! */
      if( ( (bw - m) % 2 ) == 1)
	{
	  pml_ptr_even = cos_pml_table + cosp3.NewTableOffset(m, bw - 1);
	  
	  eresult0 = 0.0; eresult1 = 0.0;
	  eresult2 = 0.0; eresult3 = 0.0;
	  
	  for(j = 0; j < (((bw - 1)/2) + 1) % 4; ++j)
	    {
	      eresult0 += cos_even[j] * pml_ptr_even[j];
	    }
	  
	  for( ;  j < ((bw - 1)/2) + 1; j += 4)
	    {
	      eresult0 += cos_even[j] * pml_ptr_even[j];
	      eresult1 += cos_even[j + 1] * pml_ptr_even[j + 1];
	      eresult2 += cos_even[j + 2] * pml_ptr_even[j + 2];
	      eresult3 += cos_even[j + 3] * pml_ptr_even[j + 3];
	    }
	  result_ptr[bw - m - 1] = eresult0 + eresult1 + eresult2 + eresult3;
	}

      if(errorflag == 1)
	result_ptr += (bw - m);

    }
//  tstop = csecond();

#ifndef WALLCLOCK
  fprintf(stdout,
	  "loops = %d:\ntotal cpu time =\t%.4e secs\n",
	  loops, tstop-tstart);
  fprintf(stdout, "avg cpu time:\t\t%.4e secs\n",
	  (tstop-tstart)/((double) loops));
#else
  fprintf(stdout,
	  "loops = %d:\ntotal wall time =\t%.4e secs\n",
	  loops, tstop-tstart);
  fprintf(stdout, "avg wall time:\t\t%.4e secs\n",
	  (tstop-tstart)/((double) loops));
#endif

  free(CoswSave);

  free(cos_even);


}


void seminaive::SemiNaiveReduced(double *data, int bw, int m, double *result, double *cos_pml_table, double *workspace, double *cos_even, double *CoswSave)
{
	 int i, j, n;
  const double *weights;
  double *weighted_data;
  double *cos_data;
  double *scratchpad;

  double *cos_odd;

  double eresult0, eresult1, eresult2, eresult3;
  double oresult0, oresult1;  
  double *pml_ptr_even, *pml_ptr_odd;

  int ctr;
  double d_bw;

  n = 2*bw;
  d_bw = (double) bw;

  cos_odd = cos_even + (bw/2);

  /* assign workspace */
  weighted_data = workspace;
  cos_data = workspace + (2*bw);
  scratchpad = cos_data + (2*bw); /* scratchpad needs (4*bw) */
  /* total workspace = (8 * bw) */
 
  /*
    need to apply quadrature weights to the data and compute
    the cosine transform: if the order is even, get the regular
    weights; if the order is odd, get the oddweights (those
    where I've already multiplied the sin factor in
    */

  if( (m % 2) == 0)
    weights = wei1.get_weights(bw);
  else
    weights = owei1.get_oddweights(bw);

 
  /*
    smooth the weighted signal
    */
  for ( i = 0; i < n    ; ++i )
    cos_data[i] = data[ i ] * weights[ i ];
  newf3.DCTf( cos_data, n, bw, CoswSave );
  
  /* normalize the data for this problem */
  if (m == 0)
    {
      for (i=0; i<bw; i++)
	cos_data[i] *= (d_bw * 0.5);
    }
  else
    {
      for (i=0; i<bw; i++)
	cos_data[i] *= d_bw ;
    }

  cos_data[0] *= 2.0;


  /*** separate cos_data into even and odd indexed arrays ***/
  ctr = 0;
  for(i = 0; i < bw; i += 2)
    {
      cos_even[ctr] = cos_data[i];
      cos_odd[ctr]  = cos_data[i+1];
      ctr++;
    }
  
  /*
    do the projections; Note that the cos_pml_table has
    had all the zeroes stripped out so the indexing is
    complicated somewhat
    */
  

  /******** this is the original routine 
    for (i=m; i<bw; i++)
    {
    pml_ptr = cos_pml_table + NewTableOffset(m,i);

    if ((m % 2) == 0)
    {
    for (j=0; j<(i/2)+1; j++)
    result[i-m] += cos_data[(2*j)+toggle] * pml_ptr[j];
    }
    else
    {
    if (((i-m) % 2) == 0)
    {
    for (j=0; j<(i/2)+1; j++)
    result[i-m] += cos_data[(2*j)+toggle] * pml_ptr[j];
    }
    else
    {
    for (j=0; j<(i/2); j++)
    result[i-m] += cos_data[(2*j)+toggle] * pml_ptr[j];
    }
    } 
      
    toggle = (toggle+1) % 2;
    }

    end of original routine ***/

  /*** new version of the above for-loop; I'll
    try to remove some conditionals and move others;
    this may make things run faster ... I hope ! ***/

  /*
    this loop is unrolled to compute two coefficients
    at a time (for efficiency)
    */

  for(i = 0; i < (bw - m) - ( (bw - m) % 2 ); i += 2)
    {
      /* get ptrs to precomputed data */
      pml_ptr_even = cos_pml_table + cosp3.NewTableOffset(m, m + i);
      pml_ptr_odd  = cos_pml_table + cosp3.NewTableOffset(m, m + i + 1);

      eresult0 = 0.0; eresult1 = 0.0;
      oresult0 = 0.0; oresult1 = 0.0;

      for(j = 0; j < (((m + i)/2) + 1) % 2; ++j)
	{
	  eresult0 += cos_even[j] * pml_ptr_even[j];
	  oresult0 += cos_odd[j] * pml_ptr_odd[j];
	}
      for( ;  j < ((m + i)/2) + 1; j += 2)
	{
	  eresult0 += cos_even[j] * pml_ptr_even[j];
	  eresult1 += cos_even[j + 1] * pml_ptr_even[j + 1];
	  oresult0 += cos_odd[j] * pml_ptr_odd[j];
	  oresult1 += cos_odd[j + 1] * pml_ptr_odd[j + 1];
	}

      /* save the result */
      result[i] = eresult0 + eresult1;
      result[i + 1] = oresult0 + oresult1;
    }

  /* if there are an odd number of coefficients to compute
     at this order, get that last coefficient! */
  if( ( (bw - m) % 2 ) == 1)
    {
      pml_ptr_even = cos_pml_table + cosp3.NewTableOffset(m, bw - 1);

      eresult0 = 0.0; eresult1 = 0.0;
      eresult2 = 0.0; eresult3 = 0.0;

      for(j = 0; j < (((bw - 1)/2) + 1) % 4; ++j)
	{
	  eresult0 += cos_even[j] * pml_ptr_even[j];
	}

      for( ;  j < ((bw - 1)/2) + 1; j += 4)
	{
	  eresult0 += cos_even[j] * pml_ptr_even[j];
	  eresult1 += cos_even[j + 1] * pml_ptr_even[j + 1];
	  eresult2 += cos_even[j + 2] * pml_ptr_even[j + 2];
	  eresult3 += cos_even[j + 3] * pml_ptr_even[j + 3];
	}
      result[bw - m - 1] = eresult0 + eresult1 + eresult2 + eresult3;
    }


}

void seminaive::SemiNaive3(double *data, int bw, int m, int k, double *result, double *cos_pml_table, int timing, double *runtime, int loops, double *workspace)
{
	  int i, j, l, n, toggle;
  const double *weights;
  double *weighted_data, *eval_pts, *pml_ptr;
  double *cos_data;
  double *scratchpad;
  double total_time;
  double tstart, tstop;
  double d_bw, tmp;
  struct lowhigh *lowpoly, *highpoly;

  n = 2*bw;
  d_bw = (double) bw;

  lowpoly = (struct lowhigh *) malloc(sizeof(struct lowhigh) * n);
  highpoly = (struct lowhigh *) malloc(sizeof(struct lowhigh) * n);

  /* assign workspace */
  weighted_data = workspace;
  eval_pts = workspace + (2*bw);
  cos_data = eval_pts + (2*bw);
  scratchpad = cos_data + (2*bw); /* scratchpad needs (4*bw) */
  /* total workspace = (10 * bw) */
  
  /*
    need to apply quadrature weights to the data and compute
    the cosine transform: if the order is even, get the regular
    weights; if the order is odd, get the oddweights (those
    where I've already multiplied the sin factor in
    */
  if ( (m % 2) == 0)
    weights = wei1.get_weights(bw);
  else
    weights = owei1.get_oddweights(bw);

  /* start the timer */
 // if (timing)
 //   tstart = csecond();

  /* main timing loop */
  for (l=0; l< loops; l++)
    {

      for (i=0; i<n; i++)
	weighted_data[i] = data[i] * weights[i];
      /*
	smooth the weighted signal: use either kFCT
	or KFCTX (which might be little faster)
	*/
      /* kFCT(weighted_data, cos_data, scratchpad, n, bw, 1); */
      newf3.kFCTX(weighted_data, cos_data, scratchpad, n, bw, 1,
	    lowpoly, highpoly);

      /* normalize the data for this problem */
      if (m == 0)
	{
	  for (i=0; i<bw; i++)
	    cos_data[i] *= (d_bw * 0.5);
	}
      else
	{
	  for (i=0; i<bw; i++)
	    cos_data[i] *= d_bw;
	}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -