📄 seminaive.cpp
字号:
}
else*/
*runtime = 0.0;
free(CoswSave);
}
void seminaive::SemiNaiveX(double *data, int bw, int m, int k, double *result, double *cos_pml_table, double *runtime, int loops, double *workspace, int errorflag)
{
int i, j, n, counter;
const double *weights;
double *weighted_data;
double *cos_data;
double *scratchpad;
double tstart, tstop;
double *cos_even, *cos_odd;
double *result_ptr;
int ctr;
double d_bw;
double eresult0, eresult1, eresult2, eresult3;
double oresult0, oresult1;
double *pml_ptr_even, *pml_ptr_odd;
double *CoswSave;
n = 2*bw;
d_bw = (double) bw;
cos_even = (double *) malloc(sizeof(double) * ( bw + 0));
cos_odd = cos_even + (bw/2) + 0;
/* assign workspace */
weighted_data = workspace;
cos_data = workspace + (2*bw);
scratchpad = cos_data + (2*bw); /* scratchpad needs (4*bw) */
/* total workspace = (8 * bw) */
/* precompute for dct */
CoswSave = newf3.precomp_dct( n );
fprintf(stdout,"SEMI_NAIVE\n");
fprintf(stderr,"bw = %d\t m = %d\n", bw, m);
result_ptr = result;
/*
need to apply quadrature weights to the data and compute
the cosine transform: if the order is even, get the regular
weights; if the order is odd, get the oddweights (those
where I've already multiplied the sin factor in
*/
if( (m % 2) == 0)
weights = wei1.get_weights(bw);
else
weights = owei1.get_oddweights(bw);
// tstart = csecond();
for(counter = 0 ; counter < loops ; counter ++ )
{
/*
smooth the weighted signal
*/
if(errorflag == 0)
for ( i = 0; i < n ; ++i )
cos_data[i] = data[ i ] * weights[ i ];
else
for ( i = 0; i < n ; ++i )
cos_data[i] = *data++ * weights[ i ];
newf3.DCTf( cos_data, n, bw, CoswSave );
/* normalize the data for this problem */
if (m == 0)
{
for (i=0; i<bw; i++)
cos_data[i] *= (d_bw * 0.5);
}
else
{
for (i=0; i<bw; i++)
cos_data[i] *= d_bw;
}
cos_data[0] *= 2.0;
/* separate cos_data into even and odd indexed arrays */
ctr = 0;
for(i = 0; i < bw; i += 2)
{
cos_even[ctr] = cos_data[i];
cos_odd[ctr] = cos_data[i+1];
ctr++;
}
/*
do the projections; Note that the cos_pml_table has
had all the zeroes stripped out so the indexing is
complicated somewhat
*/
/*
this loop is unrolled to compute two coefficients
at a time (for efficiency)
*/
for(i = 0; i < (bw - m) - ( (bw - m) % 2 ); i += 2)
{
/* get ptrs to precomputed data */
pml_ptr_even = cos_pml_table + cosp3.NewTableOffset(m, m + i);
pml_ptr_odd = cos_pml_table + cosp3.NewTableOffset(m, m + i + 1);
/* zero out the accumulators */
eresult0 = 0.0; eresult1 = 0.0;
oresult0 = 0.0; oresult1 = 0.0;
for(j = 0; j < (((m + i)/2) + 1) % 2; ++j)
{
eresult0 += cos_even[j] * pml_ptr_even[j];
oresult0 += cos_odd[j] * pml_ptr_odd[j];
}
for( ; j < ((m + i)/2) + 1 ; j += 2)
{
eresult0 += cos_even[j] * pml_ptr_even[j];
eresult1 += cos_even[j + 1] * pml_ptr_even[j + 1];
oresult0 += cos_odd[j] * pml_ptr_odd[j];
oresult1 += cos_odd[j + 1] * pml_ptr_odd[j + 1];
}
/* save the result */
result_ptr[i] = eresult0 + eresult1;
result_ptr[i + 1] = oresult0 + oresult1;
}
/* if there are an odd number of coefficients to compute
at this order, get that last coefficient! */
if( ( (bw - m) % 2 ) == 1)
{
pml_ptr_even = cos_pml_table + cosp3.NewTableOffset(m, bw - 1);
eresult0 = 0.0; eresult1 = 0.0;
eresult2 = 0.0; eresult3 = 0.0;
for(j = 0; j < (((bw - 1)/2) + 1) % 4; ++j)
{
eresult0 += cos_even[j] * pml_ptr_even[j];
}
for( ; j < ((bw - 1)/2) + 1; j += 4)
{
eresult0 += cos_even[j] * pml_ptr_even[j];
eresult1 += cos_even[j + 1] * pml_ptr_even[j + 1];
eresult2 += cos_even[j + 2] * pml_ptr_even[j + 2];
eresult3 += cos_even[j + 3] * pml_ptr_even[j + 3];
}
result_ptr[bw - m - 1] = eresult0 + eresult1 + eresult2 + eresult3;
}
if(errorflag == 1)
result_ptr += (bw - m);
}
// tstop = csecond();
#ifndef WALLCLOCK
fprintf(stdout,
"loops = %d:\ntotal cpu time =\t%.4e secs\n",
loops, tstop-tstart);
fprintf(stdout, "avg cpu time:\t\t%.4e secs\n",
(tstop-tstart)/((double) loops));
#else
fprintf(stdout,
"loops = %d:\ntotal wall time =\t%.4e secs\n",
loops, tstop-tstart);
fprintf(stdout, "avg wall time:\t\t%.4e secs\n",
(tstop-tstart)/((double) loops));
#endif
free(CoswSave);
free(cos_even);
}
void seminaive::SemiNaiveReduced(double *data, int bw, int m, double *result, double *cos_pml_table, double *workspace, double *cos_even, double *CoswSave)
{
int i, j, n;
const double *weights;
double *weighted_data;
double *cos_data;
double *scratchpad;
double *cos_odd;
double eresult0, eresult1, eresult2, eresult3;
double oresult0, oresult1;
double *pml_ptr_even, *pml_ptr_odd;
int ctr;
double d_bw;
n = 2*bw;
d_bw = (double) bw;
cos_odd = cos_even + (bw/2);
/* assign workspace */
weighted_data = workspace;
cos_data = workspace + (2*bw);
scratchpad = cos_data + (2*bw); /* scratchpad needs (4*bw) */
/* total workspace = (8 * bw) */
/*
need to apply quadrature weights to the data and compute
the cosine transform: if the order is even, get the regular
weights; if the order is odd, get the oddweights (those
where I've already multiplied the sin factor in
*/
if( (m % 2) == 0)
weights = wei1.get_weights(bw);
else
weights = owei1.get_oddweights(bw);
/*
smooth the weighted signal
*/
for ( i = 0; i < n ; ++i )
cos_data[i] = data[ i ] * weights[ i ];
newf3.DCTf( cos_data, n, bw, CoswSave );
/* normalize the data for this problem */
if (m == 0)
{
for (i=0; i<bw; i++)
cos_data[i] *= (d_bw * 0.5);
}
else
{
for (i=0; i<bw; i++)
cos_data[i] *= d_bw ;
}
cos_data[0] *= 2.0;
/*** separate cos_data into even and odd indexed arrays ***/
ctr = 0;
for(i = 0; i < bw; i += 2)
{
cos_even[ctr] = cos_data[i];
cos_odd[ctr] = cos_data[i+1];
ctr++;
}
/*
do the projections; Note that the cos_pml_table has
had all the zeroes stripped out so the indexing is
complicated somewhat
*/
/******** this is the original routine
for (i=m; i<bw; i++)
{
pml_ptr = cos_pml_table + NewTableOffset(m,i);
if ((m % 2) == 0)
{
for (j=0; j<(i/2)+1; j++)
result[i-m] += cos_data[(2*j)+toggle] * pml_ptr[j];
}
else
{
if (((i-m) % 2) == 0)
{
for (j=0; j<(i/2)+1; j++)
result[i-m] += cos_data[(2*j)+toggle] * pml_ptr[j];
}
else
{
for (j=0; j<(i/2); j++)
result[i-m] += cos_data[(2*j)+toggle] * pml_ptr[j];
}
}
toggle = (toggle+1) % 2;
}
end of original routine ***/
/*** new version of the above for-loop; I'll
try to remove some conditionals and move others;
this may make things run faster ... I hope ! ***/
/*
this loop is unrolled to compute two coefficients
at a time (for efficiency)
*/
for(i = 0; i < (bw - m) - ( (bw - m) % 2 ); i += 2)
{
/* get ptrs to precomputed data */
pml_ptr_even = cos_pml_table + cosp3.NewTableOffset(m, m + i);
pml_ptr_odd = cos_pml_table + cosp3.NewTableOffset(m, m + i + 1);
eresult0 = 0.0; eresult1 = 0.0;
oresult0 = 0.0; oresult1 = 0.0;
for(j = 0; j < (((m + i)/2) + 1) % 2; ++j)
{
eresult0 += cos_even[j] * pml_ptr_even[j];
oresult0 += cos_odd[j] * pml_ptr_odd[j];
}
for( ; j < ((m + i)/2) + 1; j += 2)
{
eresult0 += cos_even[j] * pml_ptr_even[j];
eresult1 += cos_even[j + 1] * pml_ptr_even[j + 1];
oresult0 += cos_odd[j] * pml_ptr_odd[j];
oresult1 += cos_odd[j + 1] * pml_ptr_odd[j + 1];
}
/* save the result */
result[i] = eresult0 + eresult1;
result[i + 1] = oresult0 + oresult1;
}
/* if there are an odd number of coefficients to compute
at this order, get that last coefficient! */
if( ( (bw - m) % 2 ) == 1)
{
pml_ptr_even = cos_pml_table + cosp3.NewTableOffset(m, bw - 1);
eresult0 = 0.0; eresult1 = 0.0;
eresult2 = 0.0; eresult3 = 0.0;
for(j = 0; j < (((bw - 1)/2) + 1) % 4; ++j)
{
eresult0 += cos_even[j] * pml_ptr_even[j];
}
for( ; j < ((bw - 1)/2) + 1; j += 4)
{
eresult0 += cos_even[j] * pml_ptr_even[j];
eresult1 += cos_even[j + 1] * pml_ptr_even[j + 1];
eresult2 += cos_even[j + 2] * pml_ptr_even[j + 2];
eresult3 += cos_even[j + 3] * pml_ptr_even[j + 3];
}
result[bw - m - 1] = eresult0 + eresult1 + eresult2 + eresult3;
}
}
void seminaive::SemiNaive3(double *data, int bw, int m, int k, double *result, double *cos_pml_table, int timing, double *runtime, int loops, double *workspace)
{
int i, j, l, n, toggle;
const double *weights;
double *weighted_data, *eval_pts, *pml_ptr;
double *cos_data;
double *scratchpad;
double total_time;
double tstart, tstop;
double d_bw, tmp;
struct lowhigh *lowpoly, *highpoly;
n = 2*bw;
d_bw = (double) bw;
lowpoly = (struct lowhigh *) malloc(sizeof(struct lowhigh) * n);
highpoly = (struct lowhigh *) malloc(sizeof(struct lowhigh) * n);
/* assign workspace */
weighted_data = workspace;
eval_pts = workspace + (2*bw);
cos_data = eval_pts + (2*bw);
scratchpad = cos_data + (2*bw); /* scratchpad needs (4*bw) */
/* total workspace = (10 * bw) */
/*
need to apply quadrature weights to the data and compute
the cosine transform: if the order is even, get the regular
weights; if the order is odd, get the oddweights (those
where I've already multiplied the sin factor in
*/
if ( (m % 2) == 0)
weights = wei1.get_weights(bw);
else
weights = owei1.get_oddweights(bw);
/* start the timer */
// if (timing)
// tstart = csecond();
/* main timing loop */
for (l=0; l< loops; l++)
{
for (i=0; i<n; i++)
weighted_data[i] = data[i] * weights[i];
/*
smooth the weighted signal: use either kFCT
or KFCTX (which might be little faster)
*/
/* kFCT(weighted_data, cos_data, scratchpad, n, bw, 1); */
newf3.kFCTX(weighted_data, cos_data, scratchpad, n, bw, 1,
lowpoly, highpoly);
/* normalize the data for this problem */
if (m == 0)
{
for (i=0; i<bw; i++)
cos_data[i] *= (d_bw * 0.5);
}
else
{
for (i=0; i<bw; i++)
cos_data[i] *= d_bw;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -