📄 seminaive.cpp

📁 普林斯顿开发的快速球面调和变换算法
💻 CPP
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
// seminaive.cpp: implementation of the seminaive class.
//
//////////////////////////////////////////////////////////////////////

#include "stdafx.h"
#include "seminaive.h"

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>   /** for memcpy **/

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif

//#pragma  comment(lib,"LIBC");
//#pragma  comment(lib,"LIBCMT");

extern "C"
{

	int fprintf( FILE *stream, const char *format,...);
	
}
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////

seminaive::seminaive()
{

}

seminaive::~seminaive()
{

}


void seminaive::SemiNaiveReducedX(double *data, int bw, int m, int k, double *result, double *cos_pml_table, double *cos_even)
{
	register int i, j ;

  double *cos_odd;
  double eresult0, eresult1, eresult2, eresult3;
  double oresult0, oresult1;  
  double *pml_ptr_even, *pml_ptr_odd;
  double times_two;

  int fudge, fudge2;

  cos_odd = cos_even + (bw/2);

  if(m == 0)
    times_two = (double) 1;
  else
    times_two = (double) 2;

 
  /*** separate cos_data into even and odd indexed arrays ***/
  for(i = 0; i < bw; i += 2)
    {
      *cos_even++ = data[i];
      *cos_odd++  = data[i+1];
    }
  cos_even -= (bw / 2);
  cos_odd -= (bw / 2);
  
  /*
    do the projections; Note that the cos_pml_table has
    had all the zeroes stripped out so the indexing is
    complicated somewhat
    */

  /*
    this loop is unrolled to compute two coefficients
    at a time (for efficiency)
    */
  
  fudge2 = k % 2 ;
  for(i = 0; i < k - fudge2 ; i += 2)
    {
      /* get ptrs to precomputed data */
      pml_ptr_even = cos_pml_table + cosp3.NewTableOffset(m, m + i);
      pml_ptr_odd  = cos_pml_table + cosp3.NewTableOffset(m, m + i + 1);

      eresult0 = 0.0; eresult1 = 0.0;
      oresult0 = 0.0; oresult1 = 0.0;

      fudge = (((m + i)/2) + 1);

      for(j = 0; j < fudge % 2; ++j)
	{
	  eresult0 += cos_even[j] * *pml_ptr_even++;
	  oresult0 += cos_odd[j] * *pml_ptr_odd++;
	}
      for( ;  j < fudge; j += 2)
	{
	  eresult0 += cos_even[j] * *pml_ptr_even++;
	  eresult1 += cos_even[j + 1] * *pml_ptr_even++;
	  oresult0 += cos_odd[j] * *pml_ptr_odd++;
	  oresult1 += cos_odd[j + 1] * *pml_ptr_odd++;
	}
      *result++ = times_two * ( eresult0 + eresult1 );
      *result++ = times_two * ( oresult0 + oresult1 );
    }

  /* if there are an odd number of coefficients to compute,
     get that last coefficient! */
  
  if( ( k % 2 ) == 1)
    {
      pml_ptr_even = cos_pml_table + cosp3.NewTableOffset(m, m + k - 1);

      eresult0 = 0.0; eresult1 = 0.0;
      eresult2 = 0.0; eresult3 = 0.0;

      fudge = (((m + k - 1)/2) + 1);

      for(j = 0; j < fudge % 4; ++j)
	{
	  eresult0 += cos_even[j] * pml_ptr_even[j];
	}
      for( ;  j < fudge ; j += 4)
	{
	  eresult0 += cos_even[j] * pml_ptr_even[j];
	  eresult1 += cos_even[j + 1] * pml_ptr_even[j + 1];
	  eresult2 += cos_even[j + 2] * pml_ptr_even[j + 2];
	  eresult3 += cos_even[j + 3] * pml_ptr_even[j + 3];
	}
      *result = times_two *
	(eresult0 + eresult1 + eresult2 + eresult3);
    }


}


void seminaive::InvSemiNaiveReduced(double *assoc_legendre_series, int bw, int m, double *result, double *trans_cos_pml_table, double *sin_values, double *workspace)
{
	double *fcos; /* buffer for cosine series rep of result */
  double *trans_tableptr;
  double *scratchpad;
  double *assoc_offset;
  int i, j, n, rowsize;

  double *p;
  double fcos0, fcos1, fcos2, fcos3;

  fcos = workspace; /* needs bw */
  scratchpad = fcos + bw; /* needs (4*bw) */

  /* total workspace is 5*bw */

  trans_tableptr = trans_cos_pml_table;
  p = trans_cos_pml_table;

  /* main loop - compute each value of fcos

     Note that all zeroes have been stripped out of the
     trans_cos_pml_table, so indexing is somewhat complicated.
     */

  /***** original version of the loop (easier to see
    how things are being done)

    for (i=0; i<bw; i++)
    {
    if (i == (bw-1))
    {
    if ((m % 2) == 1)
    {
    fcos[bw-1] = 0.0;
    break;
    }
    }

    fcos[i] = 0.0;
    rowsize = Transpose_RowSize(i, m, bw);

    if (i > m)
    assoc_offset = assoc_legendre_series + (i - m) + (m % 2);
    else
    assoc_offset = assoc_legendre_series + (i % 2);

    for (j=0; j<rowsize; j++)
    fcos[i] += assoc_offset[2*j] * trans_tableptr[j];

    trans_tableptr += rowsize;
    }
    
    end of the original version ******/

  /*** this is a more efficient version of the above, original
    loop ***/

  for (i=0; i<=m; i++)
    {
      rowsize = cosp3.Transpose_RowSize(i, m, bw);

      /* need to point to correct starting value of Legendre series */
      assoc_offset = assoc_legendre_series + (i % 2);

      fcos0 = 0.0 ; fcos1 = 0.0; fcos2 = 0.0; fcos3 = 0.0;

      for (j = 0; j < rowsize % 4; ++j)
	fcos0 += assoc_offset[2*j] * trans_tableptr[j];

      for ( ; j < rowsize; j += 4){
	fcos0 += assoc_offset[2*j] * trans_tableptr[j];
	fcos1 += assoc_offset[2*(j+1)] * trans_tableptr[j+1];
	fcos2 += assoc_offset[2*(j+2)] * trans_tableptr[j+2];
	fcos3 += assoc_offset[2*(j+3)] * trans_tableptr[j+3];
      }

      fcos[i] = fcos0 + fcos1 + fcos2 + fcos3;

      trans_tableptr += rowsize;      
    }
  
  for (i=m+1; i<bw-1; i++)
    {

      rowsize = cosp3.Transpose_RowSize(i, m, bw);

      /* need to point to correct starting value of Legendre series */
      assoc_offset = assoc_legendre_series + (i - m) + (m % 2);

      fcos0 = 0.0 ; fcos1 = 0.0; fcos2 = 0.0; fcos3 = 0.0;
 
      for (j = 0; j < rowsize % 4; ++j)
	fcos0 += assoc_offset[2*j] * trans_tableptr[j];

      for ( ; j < rowsize; j += 4){
	fcos0 += assoc_offset[2*j] * trans_tableptr[j];
	fcos1 += assoc_offset[2*(j+1)] * trans_tableptr[j+1];
	fcos2 += assoc_offset[2*(j+2)] * trans_tableptr[j+2];
	fcos3 += assoc_offset[2*(j+3)] * trans_tableptr[j+3];
      }
      fcos[i] = fcos0 + fcos1 + fcos2 + fcos3;
      
      trans_tableptr += rowsize;
    }

  if((m % 2) == 1)
    /* if m odd, no need to do last row - all zeroes */
    fcos[bw-1] = 0.0;
  else
    {
      rowsize = cosp3.Transpose_RowSize(bw-1, m, bw);

      /* need to point to correct starting value of Legendre series */
      assoc_offset = assoc_legendre_series + (bw - 1 - m) + (m % 2);
      
      fcos0 = 0.0; fcos1 = 0.0; fcos2 = 0.0; fcos3 = 0.0;

      for(j = 0; j < rowsize % 4; ++j)
	fcos0 += assoc_offset[2*j] * trans_tableptr[j];

      for ( ; j < rowsize; j += 4){
	fcos0 += assoc_offset[2*j] * trans_tableptr[j];
	fcos1 += assoc_offset[2*(j+1)] * trans_tableptr[j+1];
	fcos2 += assoc_offset[2*(j+2)] * trans_tableptr[j+2];
	fcos3 += assoc_offset[2*(j+3)] * trans_tableptr[j+3];
      }

      fcos[bw - 1] = fcos0 + fcos1 + fcos2 + fcos3;

      trans_tableptr += rowsize;
    }


  /* now we have the cosine series for the result,
     so now evaluate the cosine series at 2*bw Chebyshev nodes 
     */

  newf3.ExpIFCT(fcos, result, scratchpad, 2*bw, bw, 1);

  /* if m is odd, then need to multiply by sin(x) at Chebyshev nodes */

  if ((m % 2) == 1)
    {
      n = 2*bw;

      for (j=0; j<n; j++)
	result[j] *= sin_values[j];
    }

  trans_tableptr = p;

  /* amscray */



}

void seminaive::SemiNaive(double *data, int bw, int m, int k, double *result, double *cos_pml_table, int timing, double *runtime, int loops, double *workspace)
{
	  int i, j, l, n, toggle;
  const double *weights;
  double *weighted_data, *eval_pts, *pml_ptr;
  double *cos_data;
  double *scratchpad;
  double total_time;
  double tstart, tstop;
  double d_bw, tmp;
  double *CoswSave;

  n = 2*bw;
  d_bw = (double) bw;

  /* precompute for dct */
  CoswSave = newf3.precomp_dct( n );

  /* assign workspace */
  weighted_data = workspace;
  eval_pts = workspace + (2*bw);
  cos_data = eval_pts + (2*bw);
  scratchpad = cos_data + (2*bw); /* scratchpad needs (4*bw) */
  /* total workspace = (10 * bw) */
  
  /*
    need to apply quadrature weights to the data and compute
    the cosine transform: if the order is even, get the regular
    weights; if the order is odd, get the oddweights (those
    where I've already multiplied the sin factor in
    */
  if ( (m % 2) == 0)
    weights = wei1.get_weights(bw);
  else
    weights = owei1.get_oddweights(bw);

  /* start the timer */
//  if (timing)
//    tstart = csecond();

  /* main timing loop */
  for (l=0; l< loops; l++)
    {

      for (i=0; i<n; i++)
	cos_data[i] = data[i] * weights[i];
      newf3.DCTf( cos_data, n, bw, CoswSave );

      /* normalize the data for this problem */
      if (m == 0)
	{
	  for (i=0; i<bw; i++)
	    cos_data[i] *= (d_bw * 0.5);
	}
      else
	{
	  for (i=0; i<bw; i++)
	    cos_data[i] *= d_bw;
	}
      cos_data[0] *= 2.0;

      toggle = 0;

      /*
	do the projections; Note that the cos_pml_table has
	had all the zeroes stripped out so the indexing is
	complicated somewhat
	*/

      for (i=m; i<k+m; i++)
	{
	  /* get offset for Pml coefficients in cos_pml_table */
	  pml_ptr = cos_pml_table + cosp3.NewTableOffset(m,i);

	  if ((m % 2) == 0)
	    {
	      tmp = 0.0;
	      for (j=0; j<(i/2)+1; j++)
		tmp += cos_data[(2*j)+toggle] * pml_ptr[j];
	      result[i-m] = tmp;
	    }
	  else
	    {
	      if (((i-m) % 2) == 0)
		{
		  tmp = 0.0;
		  for (j=0; j<(i/2)+1; j++)
		    tmp += cos_data[(2*j)+toggle] * pml_ptr[j];
		  result[i-m] = tmp;
		}
	      else
		{
		  tmp = 0.0;
		  for (j=0; j<(i/2); j++)
		    tmp += cos_data[(2*j)+toggle] * pml_ptr[j];
		  result[i-m] = tmp;
		}
	    } 

	  toggle = (toggle+1) % 2;
	}

    } /* closes main timing loop */

  /** have to normalize "out" the loops **/
  for(i = 0; i < bw; i++)
    result[i] /= ((double) loops);

/*  if (timing)
    {
      tstop = csecond();
      total_time = tstop - tstart;
      *runtime = total_time;

      fprintf(stdout,"\n");
      fprintf(stdout,"Program: Seminaive Legendre Transform \n");
      fprintf(stdout,"m = %d\n", m);
      fprintf(stdout,"Bandwidth = %d\n", bw);
      fprintf(stdout,"# of coefficients computed: %d\n", k);
#ifndef WALLCLOCK
      fprintf(stdout,"Total elapsed cpu time: %.8f seconds.\n\n", total_time); 
#else
      fprintf(stdout,"Total elapsed wall time: %.8f seconds.\n\n", total_time); 
#endif
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -