📄 seminaive.cpp
字号:
// seminaive.cpp: implementation of the seminaive class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "seminaive.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h> /** for memcpy **/
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
//#pragma comment(lib,"LIBC");
//#pragma comment(lib,"LIBCMT");
extern "C"
{
int fprintf( FILE *stream, const char *format,...);
}
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
seminaive::seminaive()
{
}
seminaive::~seminaive()
{
}
void seminaive::SemiNaiveReducedX(double *data, int bw, int m, int k, double *result, double *cos_pml_table, double *cos_even)
{
register int i, j ;
double *cos_odd;
double eresult0, eresult1, eresult2, eresult3;
double oresult0, oresult1;
double *pml_ptr_even, *pml_ptr_odd;
double times_two;
int fudge, fudge2;
cos_odd = cos_even + (bw/2);
if(m == 0)
times_two = (double) 1;
else
times_two = (double) 2;
/*** separate cos_data into even and odd indexed arrays ***/
for(i = 0; i < bw; i += 2)
{
*cos_even++ = data[i];
*cos_odd++ = data[i+1];
}
cos_even -= (bw / 2);
cos_odd -= (bw / 2);
/*
do the projections; Note that the cos_pml_table has
had all the zeroes stripped out so the indexing is
complicated somewhat
*/
/*
this loop is unrolled to compute two coefficients
at a time (for efficiency)
*/
fudge2 = k % 2 ;
for(i = 0; i < k - fudge2 ; i += 2)
{
/* get ptrs to precomputed data */
pml_ptr_even = cos_pml_table + cosp3.NewTableOffset(m, m + i);
pml_ptr_odd = cos_pml_table + cosp3.NewTableOffset(m, m + i + 1);
eresult0 = 0.0; eresult1 = 0.0;
oresult0 = 0.0; oresult1 = 0.0;
fudge = (((m + i)/2) + 1);
for(j = 0; j < fudge % 2; ++j)
{
eresult0 += cos_even[j] * *pml_ptr_even++;
oresult0 += cos_odd[j] * *pml_ptr_odd++;
}
for( ; j < fudge; j += 2)
{
eresult0 += cos_even[j] * *pml_ptr_even++;
eresult1 += cos_even[j + 1] * *pml_ptr_even++;
oresult0 += cos_odd[j] * *pml_ptr_odd++;
oresult1 += cos_odd[j + 1] * *pml_ptr_odd++;
}
*result++ = times_two * ( eresult0 + eresult1 );
*result++ = times_two * ( oresult0 + oresult1 );
}
/* if there are an odd number of coefficients to compute,
get that last coefficient! */
if( ( k % 2 ) == 1)
{
pml_ptr_even = cos_pml_table + cosp3.NewTableOffset(m, m + k - 1);
eresult0 = 0.0; eresult1 = 0.0;
eresult2 = 0.0; eresult3 = 0.0;
fudge = (((m + k - 1)/2) + 1);
for(j = 0; j < fudge % 4; ++j)
{
eresult0 += cos_even[j] * pml_ptr_even[j];
}
for( ; j < fudge ; j += 4)
{
eresult0 += cos_even[j] * pml_ptr_even[j];
eresult1 += cos_even[j + 1] * pml_ptr_even[j + 1];
eresult2 += cos_even[j + 2] * pml_ptr_even[j + 2];
eresult3 += cos_even[j + 3] * pml_ptr_even[j + 3];
}
*result = times_two *
(eresult0 + eresult1 + eresult2 + eresult3);
}
}
void seminaive::InvSemiNaiveReduced(double *assoc_legendre_series, int bw, int m, double *result, double *trans_cos_pml_table, double *sin_values, double *workspace)
{
double *fcos; /* buffer for cosine series rep of result */
double *trans_tableptr;
double *scratchpad;
double *assoc_offset;
int i, j, n, rowsize;
double *p;
double fcos0, fcos1, fcos2, fcos3;
fcos = workspace; /* needs bw */
scratchpad = fcos + bw; /* needs (4*bw) */
/* total workspace is 5*bw */
trans_tableptr = trans_cos_pml_table;
p = trans_cos_pml_table;
/* main loop - compute each value of fcos
Note that all zeroes have been stripped out of the
trans_cos_pml_table, so indexing is somewhat complicated.
*/
/***** original version of the loop (easier to see
how things are being done)
for (i=0; i<bw; i++)
{
if (i == (bw-1))
{
if ((m % 2) == 1)
{
fcos[bw-1] = 0.0;
break;
}
}
fcos[i] = 0.0;
rowsize = Transpose_RowSize(i, m, bw);
if (i > m)
assoc_offset = assoc_legendre_series + (i - m) + (m % 2);
else
assoc_offset = assoc_legendre_series + (i % 2);
for (j=0; j<rowsize; j++)
fcos[i] += assoc_offset[2*j] * trans_tableptr[j];
trans_tableptr += rowsize;
}
end of the original version ******/
/*** this is a more efficient version of the above, original
loop ***/
for (i=0; i<=m; i++)
{
rowsize = cosp3.Transpose_RowSize(i, m, bw);
/* need to point to correct starting value of Legendre series */
assoc_offset = assoc_legendre_series + (i % 2);
fcos0 = 0.0 ; fcos1 = 0.0; fcos2 = 0.0; fcos3 = 0.0;
for (j = 0; j < rowsize % 4; ++j)
fcos0 += assoc_offset[2*j] * trans_tableptr[j];
for ( ; j < rowsize; j += 4){
fcos0 += assoc_offset[2*j] * trans_tableptr[j];
fcos1 += assoc_offset[2*(j+1)] * trans_tableptr[j+1];
fcos2 += assoc_offset[2*(j+2)] * trans_tableptr[j+2];
fcos3 += assoc_offset[2*(j+3)] * trans_tableptr[j+3];
}
fcos[i] = fcos0 + fcos1 + fcos2 + fcos3;
trans_tableptr += rowsize;
}
for (i=m+1; i<bw-1; i++)
{
rowsize = cosp3.Transpose_RowSize(i, m, bw);
/* need to point to correct starting value of Legendre series */
assoc_offset = assoc_legendre_series + (i - m) + (m % 2);
fcos0 = 0.0 ; fcos1 = 0.0; fcos2 = 0.0; fcos3 = 0.0;
for (j = 0; j < rowsize % 4; ++j)
fcos0 += assoc_offset[2*j] * trans_tableptr[j];
for ( ; j < rowsize; j += 4){
fcos0 += assoc_offset[2*j] * trans_tableptr[j];
fcos1 += assoc_offset[2*(j+1)] * trans_tableptr[j+1];
fcos2 += assoc_offset[2*(j+2)] * trans_tableptr[j+2];
fcos3 += assoc_offset[2*(j+3)] * trans_tableptr[j+3];
}
fcos[i] = fcos0 + fcos1 + fcos2 + fcos3;
trans_tableptr += rowsize;
}
if((m % 2) == 1)
/* if m odd, no need to do last row - all zeroes */
fcos[bw-1] = 0.0;
else
{
rowsize = cosp3.Transpose_RowSize(bw-1, m, bw);
/* need to point to correct starting value of Legendre series */
assoc_offset = assoc_legendre_series + (bw - 1 - m) + (m % 2);
fcos0 = 0.0; fcos1 = 0.0; fcos2 = 0.0; fcos3 = 0.0;
for(j = 0; j < rowsize % 4; ++j)
fcos0 += assoc_offset[2*j] * trans_tableptr[j];
for ( ; j < rowsize; j += 4){
fcos0 += assoc_offset[2*j] * trans_tableptr[j];
fcos1 += assoc_offset[2*(j+1)] * trans_tableptr[j+1];
fcos2 += assoc_offset[2*(j+2)] * trans_tableptr[j+2];
fcos3 += assoc_offset[2*(j+3)] * trans_tableptr[j+3];
}
fcos[bw - 1] = fcos0 + fcos1 + fcos2 + fcos3;
trans_tableptr += rowsize;
}
/* now we have the cosine series for the result,
so now evaluate the cosine series at 2*bw Chebyshev nodes
*/
newf3.ExpIFCT(fcos, result, scratchpad, 2*bw, bw, 1);
/* if m is odd, then need to multiply by sin(x) at Chebyshev nodes */
if ((m % 2) == 1)
{
n = 2*bw;
for (j=0; j<n; j++)
result[j] *= sin_values[j];
}
trans_tableptr = p;
/* amscray */
}
void seminaive::SemiNaive(double *data, int bw, int m, int k, double *result, double *cos_pml_table, int timing, double *runtime, int loops, double *workspace)
{
int i, j, l, n, toggle;
const double *weights;
double *weighted_data, *eval_pts, *pml_ptr;
double *cos_data;
double *scratchpad;
double total_time;
double tstart, tstop;
double d_bw, tmp;
double *CoswSave;
n = 2*bw;
d_bw = (double) bw;
/* precompute for dct */
CoswSave = newf3.precomp_dct( n );
/* assign workspace */
weighted_data = workspace;
eval_pts = workspace + (2*bw);
cos_data = eval_pts + (2*bw);
scratchpad = cos_data + (2*bw); /* scratchpad needs (4*bw) */
/* total workspace = (10 * bw) */
/*
need to apply quadrature weights to the data and compute
the cosine transform: if the order is even, get the regular
weights; if the order is odd, get the oddweights (those
where I've already multiplied the sin factor in
*/
if ( (m % 2) == 0)
weights = wei1.get_weights(bw);
else
weights = owei1.get_oddweights(bw);
/* start the timer */
// if (timing)
// tstart = csecond();
/* main timing loop */
for (l=0; l< loops; l++)
{
for (i=0; i<n; i++)
cos_data[i] = data[i] * weights[i];
newf3.DCTf( cos_data, n, bw, CoswSave );
/* normalize the data for this problem */
if (m == 0)
{
for (i=0; i<bw; i++)
cos_data[i] *= (d_bw * 0.5);
}
else
{
for (i=0; i<bw; i++)
cos_data[i] *= d_bw;
}
cos_data[0] *= 2.0;
toggle = 0;
/*
do the projections; Note that the cos_pml_table has
had all the zeroes stripped out so the indexing is
complicated somewhat
*/
for (i=m; i<k+m; i++)
{
/* get offset for Pml coefficients in cos_pml_table */
pml_ptr = cos_pml_table + cosp3.NewTableOffset(m,i);
if ((m % 2) == 0)
{
tmp = 0.0;
for (j=0; j<(i/2)+1; j++)
tmp += cos_data[(2*j)+toggle] * pml_ptr[j];
result[i-m] = tmp;
}
else
{
if (((i-m) % 2) == 0)
{
tmp = 0.0;
for (j=0; j<(i/2)+1; j++)
tmp += cos_data[(2*j)+toggle] * pml_ptr[j];
result[i-m] = tmp;
}
else
{
tmp = 0.0;
for (j=0; j<(i/2); j++)
tmp += cos_data[(2*j)+toggle] * pml_ptr[j];
result[i-m] = tmp;
}
}
toggle = (toggle+1) % 2;
}
} /* closes main timing loop */
/** have to normalize "out" the loops **/
for(i = 0; i < bw; i++)
result[i] /= ((double) loops);
/* if (timing)
{
tstop = csecond();
total_time = tstop - tstart;
*runtime = total_time;
fprintf(stdout,"\n");
fprintf(stdout,"Program: Seminaive Legendre Transform \n");
fprintf(stdout,"m = %d\n", m);
fprintf(stdout,"Bandwidth = %d\n", bw);
fprintf(stdout,"# of coefficients computed: %d\n", k);
#ifndef WALLCLOCK
fprintf(stdout,"Total elapsed cpu time: %.8f seconds.\n\n", total_time);
#else
fprintf(stdout,"Total elapsed wall time: %.8f seconds.\n\n", total_time);
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -