📄 seminaive.c

📁 基于java的3d开发库。对坐java3d的朋友有很大的帮助。
💻 C
📖 第 1 页 / 共 3 页
字号:
	  for (i=0; i<bw; i++)	    cos_data[i] *= d_bw;	}      cos_data[0] *= 2.0;      toggle = 0;      /*	do the projections; Note that the cos_pml_table has	had all the zeroes stripped out so the indexing is	complicated somewhat	*/      for (i=m; i<k+m; i++)	{	  /* get offset for Pml coefficients in cos_pml_table */	  pml_ptr = cos_pml_table + NewTableOffset(m,i);	  if ((m % 2) == 0)	    {	      tmp = 0.0;	      for (j=0; j<(i/2)+1; j++)		tmp += cos_data[(2*j)+toggle] * pml_ptr[j];	      result[i-m] = tmp;	    }	  else	    {	      if (((i-m) % 2) == 0)		{		  tmp = 0.0;		  for (j=0; j<(i/2)+1; j++)		    tmp += cos_data[(2*j)+toggle] * pml_ptr[j];		  result[i-m] = tmp;		}	      else		{		  tmp = 0.0;		  for (j=0; j<(i/2); j++)		    tmp += cos_data[(2*j)+toggle] * pml_ptr[j];		  result[i-m] = tmp;		}	    } 	  toggle = (toggle+1) % 2;	}    } /* closes main timing loop */  /** have to normalize "out" the loops **/  for(i = 0; i < bw; i++)    result[i] /= ((double) loops);  if (timing)    {      tstop = csecond();      total_time = tstop - tstart;      *runtime = total_time;      fprintf(stdout,"\n");      fprintf(stdout,"Program: Seminaive Legendre Transform \n");      fprintf(stdout,"m = %d\n", m);      fprintf(stdout,"Bandwidth = %d\n", bw);      fprintf(stdout,"# of coefficients computed: %d\n", k);#ifndef WALLCLOCK      fprintf(stdout,"Total elapsed cpu time: %.8f seconds.\n\n", total_time); #else      fprintf(stdout,"Total elapsed wall time: %.8f seconds.\n\n", total_time); #endif    }  else    *runtime = 0.0;  free(highpoly);  free(lowpoly);}/*********************************************************//***  This should be a faster version of SemiNaive().  Note: no "timing" input, unlike SemiNaive().  There is also the following additional function argument:  errorflag: = 0 -> don't care about measuring error, just want                    to time the algorithm             = 1 -> care about measuring the error; in this case	            loops <= 10  (this is because of memory constraints,		    given how the "true" and "calculated" coefficients		    are stored)*****/void SemiNaiveX(double *data, 		int bw, 		int m, 		int k, 		double *result, 		double *cos_pml_table, 		double *runtime, 		int loops, 		double *workspace,		int errorflag){  int i, j, n, counter;  const double *weights;  double *weighted_data;  double *cos_data;  double *scratchpad;  double tstart, tstop;  double *cos_even, *cos_odd;  double *result_ptr;    int ctr;  double d_bw;  double eresult0, eresult1, eresult2, eresult3;  double oresult0, oresult1;    double *pml_ptr_even, *pml_ptr_odd;  n = 2*bw;  d_bw = (double) bw;  cos_even = (double *) malloc(sizeof(double) * ( bw + 0));  cos_odd = cos_even + (bw/2) + 0;  /* assign workspace */  weighted_data = workspace;  cos_data = workspace + (2*bw);  scratchpad = cos_data + (2*bw); /* scratchpad needs (4*bw) */  /* total workspace = (8 * bw) */  fprintf(stdout,"SEMI_NAIVE\n");  fprintf(stderr,"bw = %d\t m = %d\n", bw, m);  result_ptr = result;  /*    need to apply quadrature weights to the data and compute    the cosine transform: if the order is even, get the regular    weights; if the order is odd, get the oddweights (those    where I've already multiplied the sin factor in    */  if( (m % 2) == 0)    weights = get_weights(bw);  else    weights = get_oddweights(bw);  tstart = csecond();  for(counter = 0 ; counter < loops ; counter ++ )    {      /*	smooth the weighted signal	*/      if(errorflag == 0)	for ( i = 0; i < n    ; ++i )	  weighted_data[i] = data[ i ] * weights[ i ];      else	for ( i = 0; i < n    ; ++i )	  weighted_data[i] = *data++ * weights[ i ];      kFCT(weighted_data, cos_data, scratchpad, n, bw, 1);      /* normalize the data for this problem */      if (m == 0)	{	  for (i=0; i<bw; i++)	    cos_data[i] *= (d_bw * 0.5);	}      else	{	  for (i=0; i<bw; i++)	    cos_data[i] *= d_bw;	}      cos_data[0] *= 2.0;            /* separate cos_data into even and odd indexed arrays */      ctr = 0;      for(i = 0; i < bw; i += 2)	{	  cos_even[ctr] = cos_data[i];	  cos_odd[ctr]  = cos_data[i+1];	  ctr++;	}            /*	do the projections; Note that the cos_pml_table has	had all the zeroes stripped out so the indexing is	complicated somewhat	*/      /*	this loop is unrolled to compute two coefficients	at a time (for efficiency)	*/      for(i = 0; i < (bw - m) - ( (bw - m) % 2 ); i += 2)	{	  /* get ptrs to precomputed data */	  pml_ptr_even = cos_pml_table + NewTableOffset(m, m + i);	  pml_ptr_odd  = cos_pml_table + NewTableOffset(m, m + i + 1);	  /* zero out the accumulators */	  eresult0 = 0.0; eresult1 = 0.0;	  oresult0 = 0.0; oresult1 = 0.0;	  for(j = 0; j < (((m + i)/2) + 1) % 2; ++j)	    {	      eresult0 += cos_even[j] * pml_ptr_even[j];	      oresult0 += cos_odd[j] * pml_ptr_odd[j];	    }	  for( ;  j < ((m + i)/2) + 1 ; j += 2)	    {	      eresult0 += cos_even[j] * pml_ptr_even[j];	      eresult1 += cos_even[j + 1] * pml_ptr_even[j + 1];	      oresult0 += cos_odd[j] * pml_ptr_odd[j];	      oresult1 += cos_odd[j + 1] * pml_ptr_odd[j + 1];	    }	  /* save the result */	  result_ptr[i] = eresult0 + eresult1;	  result_ptr[i + 1] = oresult0 + oresult1;	}            /* if there are an odd number of coefficients to compute	 at this order, get that last coefficient! */      if( ( (bw - m) % 2 ) == 1)	{	  pml_ptr_even = cos_pml_table + NewTableOffset(m, bw - 1);	  	  eresult0 = 0.0; eresult1 = 0.0;	  eresult2 = 0.0; eresult3 = 0.0;	  	  for(j = 0; j < (((bw - 1)/2) + 1) % 4; ++j)	    {	      eresult0 += cos_even[j] * pml_ptr_even[j];	    }	  	  for( ;  j < ((bw - 1)/2) + 1; j += 4)	    {	      eresult0 += cos_even[j] * pml_ptr_even[j];	      eresult1 += cos_even[j + 1] * pml_ptr_even[j + 1];	      eresult2 += cos_even[j + 2] * pml_ptr_even[j + 2];	      eresult3 += cos_even[j + 3] * pml_ptr_even[j + 3];	    }	  result_ptr[bw - m - 1] = eresult0 + eresult1 + eresult2 + eresult3;	}      if(errorflag == 1)	result_ptr += (bw - m);    }  tstop = csecond();#ifndef WALLCLOCK  fprintf(stdout,	  "loops = %d:\ntotal cpu time =\t%.4e secs\n",	  loops, tstop-tstart);  fprintf(stdout, "avg cpu time:\t\t%.4e secs\n",	  (tstop-tstart)/((double) loops));#else  fprintf(stdout,	  "loops = %d:\ntotal wall time =\t%.4e secs\n",	  loops, tstop-tstart);  fprintf(stdout, "avg wall time:\t\t%.4e secs\n",	  (tstop-tstart)/((double) loops));#endif  free(cos_even);}/************************************************************************//* SemiNaiveReduced computes the Legendre transform of data.   This function has been designed to be a component in   a full spherical harmonic transform.     data - pointer to double array of size (2*bw) containing          function to be transformed.  Assumes sampling at Chebyshev nodes   bw   - bandwidth of the problem - power of 2 expected   m   - order of the problem.  0 <= m < bw   result - pointer to double array of length bw for returning computed            Legendre coefficients.  Contains             bw-m coeffs, with the <f,P(m,m)> coefficient            located in result[0]   cos_pml_table - a pointer to an array containing the cosine                   series coefficients of the Pmls (or Gmls)		   for this problem.  This table can be computed		   using the CosPmlTableGen() function, and		   the offset for a particular Pml can be had		   by calling the function TableOffset().		   The size of the table is computed using		   the TableSize() function.  Note that		   since the cosine series are always zero-striped,		   the zeroes have been removed.   workspace - a pointer to a double array of size (8 * bw)   cos_even - ptr to double array of length bw, used as "dummy"              workspace   CoswSave - ptr to double array holding the precomputed data           the fftpack's dct needs, for a dct of length 2*bw*/void SemiNaiveReduced(double *data, 		      int bw, 		      int m, 		      double *result, 		      double *cos_pml_table, 		      double *workspace,		      double *cos_even){  int i, j, n;  const double *weights;  double *weighted_data;  double *cos_data;  double *scratchpad;  double *cos_odd;  double eresult0, eresult1, eresult2, eresult3;  double oresult0, oresult1;    double *pml_ptr_even, *pml_ptr_odd;  int ctr;  double d_bw;  n = 2*bw;  d_bw = (double) bw;  cos_odd = cos_even + (bw/2);  /* assign workspace */  weighted_data = workspace;  cos_data = workspace + (2*bw);  scratchpad = cos_data + (2*bw); /* scratchpad needs (4*bw) */  /* total workspace = (8 * bw) */   /*    need to apply quadrature weights to the data and compute    the cosine transform: if the order is even, get the regular    weights; if the order is odd, get the oddweights (those    where I've already multiplied the sin factor in    */  if( (m % 2) == 0)    weights = get_weights(bw);  else    weights = get_oddweights(bw);   /*    smooth the weighted signal    */  for ( i = 0; i < n    ; ++i )    weighted_data[i] = data[ i ] * weights[ i ];    kFCT(weighted_data, cos_data, scratchpad, n, bw, 1);    /* normalize the data for this problem */  if (m == 0)    {      for (i=0; i<bw; i++)	cos_data[i] *= (d_bw * 0.5);    }  else    {      for (i=0; i<bw; i++)	cos_data[i] *= d_bw ;    }  cos_data[0] *= 2.0;  /*** separate cos_data into even and odd indexed arrays ***/  ctr = 0;  for(i = 0; i < bw; i += 2)    {      cos_even[ctr] = cos_data[i];      cos_odd[ctr]  = cos_data[i+1];      ctr++;    }    /*    do the projections; Note that the cos_pml_table has    had all the zeroes stripped out so the indexing is    complicated somewhat    */    /******** this is the original routine     for (i=m; i<bw; i++)    {    pml_ptr = cos_pml_table + NewTableOffset(m,i);    if ((m % 2) == 0)    {    for (j=0; j<(i/2)+1; j++)    result[i-m] += cos_data[(2*j)+toggle] * pml_ptr[j];    }    else    {    if (((i-m) % 2) == 0)    {    for (j=0; j<(i/2)+1; j++)    result[i-m] += cos_data[(2*j)+toggle] * pml_ptr[j];    }    else    {    for (j=0; j<(i/2); j++)    result[i-m] += cos_data[(2*j)+toggle] * pml_ptr[j];    }    }           toggle = (toggle+1) % 2;    }    end of original routine ***/  /*** new version of the above for-loop; I'll    try to remove some conditionals and move others;    this may make things run faster ... I hope ! ***/  /*    this loop is unrolled to compute two coefficients    at a time (for efficiency)    */  for(i = 0; i < (bw - m) - ( (bw - m) % 2 ); i += 2)    {      /* get ptrs to precomputed data */      pml_ptr_even = cos_pml_table + NewTableOffset(m, m + i);      pml_ptr_odd  = cos_pml_table + NewTableOffset(m, m + i + 1);      eresult0 = 0.0; eresult1 = 0.0;      oresult0 = 0.0; oresult1 = 0.0;      for(j = 0; j < (((m + i)/2) + 1) % 2; ++j)	{	  eresult0 += cos_even[j] * pml_ptr_even[j];	  oresult0 += cos_odd[j] * pml_ptr_odd[j];	}      for( ;  j < ((m + i)/2) + 1; j += 2)	{	  eresult0 += cos_even[j] * pml_ptr_even[j];	  eresult1 += cos_even[j + 1] * pml_ptr_even[j + 1];	  oresult0 += cos_odd[j] * pml_ptr_odd[j];	  oresult1 += cos_odd[j + 1] * pml_ptr_odd[j + 1];	}      /* save the result */      result[i] = eresult0 + eresult1;      result[i + 1] = oresult0 + oresult1;    }  /* if there are an odd number of coefficients to compute     at this order, get that last coefficient! */  if( ( (bw - m) % 2 ) == 1)    {      pml_ptr_even = cos_pml_table + NewTableOffset(m, bw - 1);      eresult0 = 0.0; eresult1 = 0.0;      eresult2 = 0.0; eresult3 = 0.0;      for(j = 0; j < (((bw - 1)/2) + 1) % 4; ++j)	{	  eresult0 += cos_even[j] * pml_ptr_even[j];	}      for( ;  j < ((bw - 1)/2) + 1; j += 4)	{	  eresult0 += cos_even[j] * pml_ptr_even[j];	  eresult1 += cos_even[j + 1] * pml_ptr_even[j + 1];	  eresult2 += cos_even[j + 2] * pml_ptr_even[j + 2];	  eresult3 += cos_even[j + 3] * pml_ptr_even[j + 3];	}      result[bw - m - 1] = eresult0 + eresult1 + eresult2 + eresult3;    }}/************************************************************************//************************************************************************/#else   /* FFTPACK is defined *//************************************************************************//************************************************************************//************************************************************************//* seminaive computes the Legendre transform of data.   This function has been designed with speed testing   of associated Legendre transforms in mind, and thus   contains a lot of extra arguments.  For a procedure   that does not contain this extra baggage, use SemiNaiveReduced().   data - pointer to double array of size (2*bw) containing          function to be transformed. Assumes sampling at Chebyshev nodes.   bw   - bandwidth of the problem - power of 2 expected    m   - order of the problem.  0 <= m < bw    k   - number of Legendre coeffients desired. 1 <= k <= bw-m          If value of k is illegal, defaults to bw-m   result - pointer to double array of length bw for returning computed            Legendre coefficients.  Contains at most            bw-m coeffs, with the <f,P(m,m)> coefficient            located in result[0]   cos_pml_table - a pointer to an array containing the cosine                   series coefficients of the Pmls (or Gmls)		   for this problem.  This table can be computed		   using the CosPmlTableGen() function, and		   the offset for a particular Pml can be had		   by calling the function NewTableOffset().		   The size of the table is computed using		   the TableSize() function.  Note that		   since the cosine series are always zero-striped,		   the zeroes have been removed.   timing - a flag indicating whether timing information is            desired.  0 if no timing is wanted, 1 will print            out some nice information regarding how long            the procedure took to execute.   runtime - a pointer to a double location for recording             the total runtime of the procedure.   loops - the number of times to loop thru the timed           part of the routine.  This is intended to reduce	   timing noise due to multiprocessing and discretization errors   workspace - a pointer to a double array of size (10 * bw)*/void SemiNaive( double *data, 		int bw, 		int m, 		int k, 		double *result, 		double *cos_pml_table, 		int timing, 		double *runtime, 		int loops, 		double *workspace){
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -