📄 seminaive.c
字号:
for (i=0; i<bw; i++) cos_data[i] *= d_bw; } cos_data[0] *= 2.0; toggle = 0; /* do the projections; Note that the cos_pml_table has had all the zeroes stripped out so the indexing is complicated somewhat */ for (i=m; i<k+m; i++) { /* get offset for Pml coefficients in cos_pml_table */ pml_ptr = cos_pml_table + NewTableOffset(m,i); if ((m % 2) == 0) { tmp = 0.0; for (j=0; j<(i/2)+1; j++) tmp += cos_data[(2*j)+toggle] * pml_ptr[j]; result[i-m] = tmp; } else { if (((i-m) % 2) == 0) { tmp = 0.0; for (j=0; j<(i/2)+1; j++) tmp += cos_data[(2*j)+toggle] * pml_ptr[j]; result[i-m] = tmp; } else { tmp = 0.0; for (j=0; j<(i/2); j++) tmp += cos_data[(2*j)+toggle] * pml_ptr[j]; result[i-m] = tmp; } } toggle = (toggle+1) % 2; } } /* closes main timing loop */ /** have to normalize "out" the loops **/ for(i = 0; i < bw; i++) result[i] /= ((double) loops); if (timing) { tstop = csecond(); total_time = tstop - tstart; *runtime = total_time; fprintf(stdout,"\n"); fprintf(stdout,"Program: Seminaive Legendre Transform \n"); fprintf(stdout,"m = %d\n", m); fprintf(stdout,"Bandwidth = %d\n", bw); fprintf(stdout,"# of coefficients computed: %d\n", k);#ifndef WALLCLOCK fprintf(stdout,"Total elapsed cpu time: %.8f seconds.\n\n", total_time); #else fprintf(stdout,"Total elapsed wall time: %.8f seconds.\n\n", total_time); #endif } else *runtime = 0.0; free(highpoly); free(lowpoly);}/*********************************************************//*** This should be a faster version of SemiNaive(). Note: no "timing" input, unlike SemiNaive(). There is also the following additional function argument: errorflag: = 0 -> don't care about measuring error, just want to time the algorithm = 1 -> care about measuring the error; in this case loops <= 10 (this is because of memory constraints, given how the "true" and "calculated" coefficients are stored)*****/void SemiNaiveX(double *data, int bw, int m, int k, double *result, double *cos_pml_table, double *runtime, int loops, double *workspace, int errorflag){ int i, j, n, counter; const double *weights; double *weighted_data; double *cos_data; double *scratchpad; double tstart, tstop; double *cos_even, *cos_odd; double *result_ptr; int ctr; double d_bw; double eresult0, eresult1, eresult2, eresult3; double oresult0, oresult1; double *pml_ptr_even, *pml_ptr_odd; n = 2*bw; d_bw = (double) bw; cos_even = (double *) malloc(sizeof(double) * ( bw + 0)); cos_odd = cos_even + (bw/2) + 0; /* assign workspace */ weighted_data = workspace; cos_data = workspace + (2*bw); scratchpad = cos_data + (2*bw); /* scratchpad needs (4*bw) */ /* total workspace = (8 * bw) */ fprintf(stdout,"SEMI_NAIVE\n"); fprintf(stderr,"bw = %d\t m = %d\n", bw, m); result_ptr = result; /* need to apply quadrature weights to the data and compute the cosine transform: if the order is even, get the regular weights; if the order is odd, get the oddweights (those where I've already multiplied the sin factor in */ if( (m % 2) == 0) weights = get_weights(bw); else weights = get_oddweights(bw); tstart = csecond(); for(counter = 0 ; counter < loops ; counter ++ ) { /* smooth the weighted signal */ if(errorflag == 0) for ( i = 0; i < n ; ++i ) weighted_data[i] = data[ i ] * weights[ i ]; else for ( i = 0; i < n ; ++i ) weighted_data[i] = *data++ * weights[ i ]; kFCT(weighted_data, cos_data, scratchpad, n, bw, 1); /* normalize the data for this problem */ if (m == 0) { for (i=0; i<bw; i++) cos_data[i] *= (d_bw * 0.5); } else { for (i=0; i<bw; i++) cos_data[i] *= d_bw; } cos_data[0] *= 2.0; /* separate cos_data into even and odd indexed arrays */ ctr = 0; for(i = 0; i < bw; i += 2) { cos_even[ctr] = cos_data[i]; cos_odd[ctr] = cos_data[i+1]; ctr++; } /* do the projections; Note that the cos_pml_table has had all the zeroes stripped out so the indexing is complicated somewhat */ /* this loop is unrolled to compute two coefficients at a time (for efficiency) */ for(i = 0; i < (bw - m) - ( (bw - m) % 2 ); i += 2) { /* get ptrs to precomputed data */ pml_ptr_even = cos_pml_table + NewTableOffset(m, m + i); pml_ptr_odd = cos_pml_table + NewTableOffset(m, m + i + 1); /* zero out the accumulators */ eresult0 = 0.0; eresult1 = 0.0; oresult0 = 0.0; oresult1 = 0.0; for(j = 0; j < (((m + i)/2) + 1) % 2; ++j) { eresult0 += cos_even[j] * pml_ptr_even[j]; oresult0 += cos_odd[j] * pml_ptr_odd[j]; } for( ; j < ((m + i)/2) + 1 ; j += 2) { eresult0 += cos_even[j] * pml_ptr_even[j]; eresult1 += cos_even[j + 1] * pml_ptr_even[j + 1]; oresult0 += cos_odd[j] * pml_ptr_odd[j]; oresult1 += cos_odd[j + 1] * pml_ptr_odd[j + 1]; } /* save the result */ result_ptr[i] = eresult0 + eresult1; result_ptr[i + 1] = oresult0 + oresult1; } /* if there are an odd number of coefficients to compute at this order, get that last coefficient! */ if( ( (bw - m) % 2 ) == 1) { pml_ptr_even = cos_pml_table + NewTableOffset(m, bw - 1); eresult0 = 0.0; eresult1 = 0.0; eresult2 = 0.0; eresult3 = 0.0; for(j = 0; j < (((bw - 1)/2) + 1) % 4; ++j) { eresult0 += cos_even[j] * pml_ptr_even[j]; } for( ; j < ((bw - 1)/2) + 1; j += 4) { eresult0 += cos_even[j] * pml_ptr_even[j]; eresult1 += cos_even[j + 1] * pml_ptr_even[j + 1]; eresult2 += cos_even[j + 2] * pml_ptr_even[j + 2]; eresult3 += cos_even[j + 3] * pml_ptr_even[j + 3]; } result_ptr[bw - m - 1] = eresult0 + eresult1 + eresult2 + eresult3; } if(errorflag == 1) result_ptr += (bw - m); } tstop = csecond();#ifndef WALLCLOCK fprintf(stdout, "loops = %d:\ntotal cpu time =\t%.4e secs\n", loops, tstop-tstart); fprintf(stdout, "avg cpu time:\t\t%.4e secs\n", (tstop-tstart)/((double) loops));#else fprintf(stdout, "loops = %d:\ntotal wall time =\t%.4e secs\n", loops, tstop-tstart); fprintf(stdout, "avg wall time:\t\t%.4e secs\n", (tstop-tstart)/((double) loops));#endif free(cos_even);}/************************************************************************//* SemiNaiveReduced computes the Legendre transform of data. This function has been designed to be a component in a full spherical harmonic transform. data - pointer to double array of size (2*bw) containing function to be transformed. Assumes sampling at Chebyshev nodes bw - bandwidth of the problem - power of 2 expected m - order of the problem. 0 <= m < bw result - pointer to double array of length bw for returning computed Legendre coefficients. Contains bw-m coeffs, with the <f,P(m,m)> coefficient located in result[0] cos_pml_table - a pointer to an array containing the cosine series coefficients of the Pmls (or Gmls) for this problem. This table can be computed using the CosPmlTableGen() function, and the offset for a particular Pml can be had by calling the function TableOffset(). The size of the table is computed using the TableSize() function. Note that since the cosine series are always zero-striped, the zeroes have been removed. workspace - a pointer to a double array of size (8 * bw) cos_even - ptr to double array of length bw, used as "dummy" workspace CoswSave - ptr to double array holding the precomputed data the fftpack's dct needs, for a dct of length 2*bw*/void SemiNaiveReduced(double *data, int bw, int m, double *result, double *cos_pml_table, double *workspace, double *cos_even){ int i, j, n; const double *weights; double *weighted_data; double *cos_data; double *scratchpad; double *cos_odd; double eresult0, eresult1, eresult2, eresult3; double oresult0, oresult1; double *pml_ptr_even, *pml_ptr_odd; int ctr; double d_bw; n = 2*bw; d_bw = (double) bw; cos_odd = cos_even + (bw/2); /* assign workspace */ weighted_data = workspace; cos_data = workspace + (2*bw); scratchpad = cos_data + (2*bw); /* scratchpad needs (4*bw) */ /* total workspace = (8 * bw) */ /* need to apply quadrature weights to the data and compute the cosine transform: if the order is even, get the regular weights; if the order is odd, get the oddweights (those where I've already multiplied the sin factor in */ if( (m % 2) == 0) weights = get_weights(bw); else weights = get_oddweights(bw); /* smooth the weighted signal */ for ( i = 0; i < n ; ++i ) weighted_data[i] = data[ i ] * weights[ i ]; kFCT(weighted_data, cos_data, scratchpad, n, bw, 1); /* normalize the data for this problem */ if (m == 0) { for (i=0; i<bw; i++) cos_data[i] *= (d_bw * 0.5); } else { for (i=0; i<bw; i++) cos_data[i] *= d_bw ; } cos_data[0] *= 2.0; /*** separate cos_data into even and odd indexed arrays ***/ ctr = 0; for(i = 0; i < bw; i += 2) { cos_even[ctr] = cos_data[i]; cos_odd[ctr] = cos_data[i+1]; ctr++; } /* do the projections; Note that the cos_pml_table has had all the zeroes stripped out so the indexing is complicated somewhat */ /******** this is the original routine for (i=m; i<bw; i++) { pml_ptr = cos_pml_table + NewTableOffset(m,i); if ((m % 2) == 0) { for (j=0; j<(i/2)+1; j++) result[i-m] += cos_data[(2*j)+toggle] * pml_ptr[j]; } else { if (((i-m) % 2) == 0) { for (j=0; j<(i/2)+1; j++) result[i-m] += cos_data[(2*j)+toggle] * pml_ptr[j]; } else { for (j=0; j<(i/2); j++) result[i-m] += cos_data[(2*j)+toggle] * pml_ptr[j]; } } toggle = (toggle+1) % 2; } end of original routine ***/ /*** new version of the above for-loop; I'll try to remove some conditionals and move others; this may make things run faster ... I hope ! ***/ /* this loop is unrolled to compute two coefficients at a time (for efficiency) */ for(i = 0; i < (bw - m) - ( (bw - m) % 2 ); i += 2) { /* get ptrs to precomputed data */ pml_ptr_even = cos_pml_table + NewTableOffset(m, m + i); pml_ptr_odd = cos_pml_table + NewTableOffset(m, m + i + 1); eresult0 = 0.0; eresult1 = 0.0; oresult0 = 0.0; oresult1 = 0.0; for(j = 0; j < (((m + i)/2) + 1) % 2; ++j) { eresult0 += cos_even[j] * pml_ptr_even[j]; oresult0 += cos_odd[j] * pml_ptr_odd[j]; } for( ; j < ((m + i)/2) + 1; j += 2) { eresult0 += cos_even[j] * pml_ptr_even[j]; eresult1 += cos_even[j + 1] * pml_ptr_even[j + 1]; oresult0 += cos_odd[j] * pml_ptr_odd[j]; oresult1 += cos_odd[j + 1] * pml_ptr_odd[j + 1]; } /* save the result */ result[i] = eresult0 + eresult1; result[i + 1] = oresult0 + oresult1; } /* if there are an odd number of coefficients to compute at this order, get that last coefficient! */ if( ( (bw - m) % 2 ) == 1) { pml_ptr_even = cos_pml_table + NewTableOffset(m, bw - 1); eresult0 = 0.0; eresult1 = 0.0; eresult2 = 0.0; eresult3 = 0.0; for(j = 0; j < (((bw - 1)/2) + 1) % 4; ++j) { eresult0 += cos_even[j] * pml_ptr_even[j]; } for( ; j < ((bw - 1)/2) + 1; j += 4) { eresult0 += cos_even[j] * pml_ptr_even[j]; eresult1 += cos_even[j + 1] * pml_ptr_even[j + 1]; eresult2 += cos_even[j + 2] * pml_ptr_even[j + 2]; eresult3 += cos_even[j + 3] * pml_ptr_even[j + 3]; } result[bw - m - 1] = eresult0 + eresult1 + eresult2 + eresult3; }}/************************************************************************//************************************************************************/#else /* FFTPACK is defined *//************************************************************************//************************************************************************//************************************************************************//* seminaive computes the Legendre transform of data. This function has been designed with speed testing of associated Legendre transforms in mind, and thus contains a lot of extra arguments. For a procedure that does not contain this extra baggage, use SemiNaiveReduced(). data - pointer to double array of size (2*bw) containing function to be transformed. Assumes sampling at Chebyshev nodes. bw - bandwidth of the problem - power of 2 expected m - order of the problem. 0 <= m < bw k - number of Legendre coeffients desired. 1 <= k <= bw-m If value of k is illegal, defaults to bw-m result - pointer to double array of length bw for returning computed Legendre coefficients. Contains at most bw-m coeffs, with the <f,P(m,m)> coefficient located in result[0] cos_pml_table - a pointer to an array containing the cosine series coefficients of the Pmls (or Gmls) for this problem. This table can be computed using the CosPmlTableGen() function, and the offset for a particular Pml can be had by calling the function NewTableOffset(). The size of the table is computed using the TableSize() function. Note that since the cosine series are always zero-striped, the zeroes have been removed. timing - a flag indicating whether timing information is desired. 0 if no timing is wanted, 1 will print out some nice information regarding how long the procedure took to execute. runtime - a pointer to a double location for recording the total runtime of the procedure. loops - the number of times to loop thru the timed part of the routine. This is intended to reduce timing noise due to multiprocessing and discretization errors workspace - a pointer to a double array of size (10 * bw)*/void SemiNaive( double *data, int bw, int m, int k, double *result, double *cos_pml_table, int timing, double *runtime, int loops, double *workspace){
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -