📄 naive_synthesis.cpp
字号:
}
void naive_synthesis::Naive_Analysis_Timing(double *data, int bw, int m, double *result, int timing, double *runtime, int loops, double *workspace)
{
double *prev, *prevprev, *temp1, *temp2, *temp3, *temp4, *x_i, *eval_args;
double *wdata;
const double *weight_vec;
int i, j, l;
double total_time, tstart, tstop;
prevprev = workspace;
prev = prevprev + (2*bw);
temp1 = prev + (2*bw);
temp2 = temp1 + (2*bw);
temp3 = temp2 + (2*bw);
temp4 = temp3 + (2*bw);
x_i = temp4 + (2*bw);
eval_args = x_i + (2*bw);
wdata = eval_args + (2*bw);
/* get the evaluation nodes */
ns_EvalPts(2*bw,x_i);
ns_ArcCosEvalPts(2*bw,eval_args);
/* start the timer
if (timing)
gettimeofday(&ftstart,0); */
// if (timing)
// tstart = csecond();
/* main timing loop */
for (l=0; l< loops; l++)
{
/* set initial values of first two Pmls */
for (i=0; i<2*bw; i++)
prevprev[i] = 0.0;
if (m == 0) {
for (i=0; i<2*bw; i++) {
prev[i] = 0.5;
}
}
else
ns_Pmm_L2(m, eval_args, 2*bw, prev);
/* make sure result is zeroed out */
for (i=0; i<(bw-m); i++)
result[i] = 0.0;
/* apply quadrature weights */
weight_vec = wei.get_weights(bw);
for (i=0; i<(2*bw); i++)
wdata[i] = data[i] * weight_vec[i];
/* compute Pmm coefficient */
for (j=0; j<(2*bw); j++)
result[0] += wdata[j] * prev[j];
/* now generate remaining pmls while computing coefficients */
for (i=0; i<bw-m-1; i++) {
ns_vec_mul(ns_L2_cn(m,m+i),prevprev,temp1,2*bw);
ns_vec_pt_mul(prev, x_i, temp2, 2*bw);
ns_vec_mul(ns_L2_an(m,m+i), temp2, temp3, 2*bw);
ns_vec_add(temp3, temp1, temp4, 2*bw); /* temp4 now contains P(m,m+i+1) */
/* compute this coefficient */
for (j=0; j<(2*bw); j++)
result[i+1] += wdata[j] * temp4[j];
/* now update Pi and P(i+1) */
memcpy(prevprev, prev, sizeof(double) * 2 * bw);
memcpy(prev, temp4, sizeof(double) * 2 * bw);
}
} /* closes main timing loop */
/*
if (timing)
{
tstop = csecond();
total_time = tstop - tstart;
*runtime = total_time;
fprintf(stdout,"\n");
fprintf(stdout,"Program: Naive Legendre Transform \n");
fprintf(stdout,"m = %d\n", m);
fprintf(stdout,"Bandwidth = %d\n", bw);
#ifndef WALLCLOCK
fprintf(stdout,"Total elapsed cpu time: %f seconds.\n\n", total_time);
#else
fprintf(stdout,"Total elapsed wall time: %f seconds.\n\n", total_time);
#endif
}*/
// else
*runtime = 0.0;
}
void naive_synthesis::Naive_Analysis_TimingX(double *data, int bw, int m, double *result, int timming, double *runtime, int loops, double *workspace)
{
double *prev, *prevprev, *temp1, *temp2, *temp3, *temp4, *x_i, *eval_args;
double *wdata;
const double *weight_vec;
int i, j, l;
double *storeplm, *storeplm_ptr;
double result0, result1, result2, result3;
double total_time, tstart, tstop;
prevprev = workspace;
prev = prevprev + (2*bw);
temp1 = prev + (2*bw);
temp2 = temp1 + (2*bw);
temp3 = temp2 + (2*bw);
temp4 = temp3 + (2*bw);
x_i = temp4 + (2*bw);
eval_args = x_i + (2*bw);
wdata = eval_args + (2*bw);
storeplm = (double *) malloc(sizeof(double) * 2 * bw *
(bw - m));
storeplm_ptr = storeplm;
/* get the evaluation nodes */
ns_EvalPts(2*bw,x_i);
ns_ArcCosEvalPts(2*bw,eval_args);
/* set initial values of first two Pmls */
for (i=0; i<2*bw; i++)
prevprev[i] = 0.0;
if (m == 0)
for (i=0; i<2*bw; i++)
prev[i] = 0.5;
else
ns_Pmm_L2(m, eval_args, 2*bw, prev);
memcpy(storeplm, prev, (size_t) sizeof(double) * 2 * bw);
for(i = 0; i < bw - m - 1; i++)
{
ns_vec_mul(ns_L2_cn(m,m+i),prevprev,temp1,2*bw);
ns_vec_pt_mul(prev, x_i, temp2, 2*bw);
ns_vec_mul(ns_L2_an(m,m+i), temp2, temp3, 2*bw);
ns_vec_add(temp3, temp1, temp4, 2*bw); /* temp4 now contains P(m,m+i+1) */
storeplm += (2 * bw);
memcpy(storeplm, temp4, (size_t) sizeof(double) * 2 * bw);
memcpy(prevprev, prev, (size_t) sizeof(double) * 2 * bw);
memcpy(prev, temp4, (size_t) sizeof(double) * 2 * bw);
}
storeplm = storeplm_ptr;
/* start the timer */
// if (timing)
// tstart = csecond();
/* main timing loop */
for (l=0; l< loops; l++)
{
/* make sure result is zeroed out */
for (i=0; i<(bw-m); i++)
result[i] = 0.0;
/* apply quadrature weights */
weight_vec = wei.get_weights(bw);
ns_vec_pt_mul(data, weight_vec, wdata, 2 * bw);
storeplm = storeplm_ptr;
for (i = 0; i < bw - m; i++)
{
result0 = 0.0; result1 = 0.0;
result2 = 0.0; result3 = 0.0;
for(j = 0; j < (2 * bw) % 4; ++j)
result0 += wdata[j] * storeplm[j];
for( ; j < (2 * bw); j += 4)
{
result0 += wdata[j] * storeplm[j];
result1 += wdata[j + 1] * storeplm[j + 1];
result2 += wdata[j + 2] * storeplm[j + 2];
result3 += wdata[j + 3] * storeplm[j + 3];
}
result[i] = result0 + result1 + result2 + result3;
storeplm += (2 * bw);
}
} /* closes main timing loop */
/* if (timing)
{
tstop = csecond();
total_time = tstop - tstart;
*runtime = total_time;
fprintf(stdout,"\n");
fprintf(stdout,"Program: Naive Legendre Transform \n");
fprintf(stdout,"m = %d\n", m);
fprintf(stdout,"Bandwidth = %d\n", bw);
#ifndef WALLCLOCK
fprintf(stdout,"Total elapsed cpu time: %f seconds.\n\n", total_time);
#else
fprintf(stdout,"Total elapsed wall time: %f seconds.\n\n", total_time);
#endif
}
else*/
*runtime = 0.0;
storeplm = storeplm_ptr;
free(storeplm);
}
void naive_synthesis::Naive_AnalysisX(double *data, int bw, int m, double *result, double *plmtable, double *workspace)
{
int i, j;
const double *weight_vec;
double result0, result1, result2, result3;
register double *wdata;
wdata = workspace;
/* make sure result is zeroed out */
for (i=0; i<(bw-m); i++)
result[i] = 0.0;
/* apply quadrature weights */
weight_vec = wei.get_weights(bw);
/* ns_vec_pt_mul(data, weight_vec, wdata, 2 * bw); */
for(i = 0; i < 2 * bw; i++)
wdata[i] = data[i] * weight_vec[i];
for (i = 0; i < bw - m; i++)
{
result0 = 0.0; result1 = 0.0;
result2 = 0.0; result3 = 0.0;
for(j = 0; j < (2 * bw) % 4; ++j)
result0 += wdata[j] * plmtable[j];
for( ; j < (2 * bw); j += 4)
{
result0 += wdata[j] * plmtable[j];
result1 += wdata[j + 1] * plmtable[j + 1];
result2 += wdata[j + 2] * plmtable[j + 2];
result3 += wdata[j + 3] * plmtable[j + 3];
}
result[i] = result0 + result1 + result2 + result3;
plmtable += (2 * bw);
}
}
void naive_synthesis::Naive_SynthesizeX(double *coeffs, int bw, int m, double *result, double *plmtable)
{
int i, j;
double tmpcoef;
/* make sure result is zeroed out
for (i=0; i<(2*bw); i++)
result[i] = 0.0;
*/
/* add in Pmm contribution */
tmpcoef = coeffs[0];
if (tmpcoef != 0.0)
{
if (m == 0)
for (j=0; j<(2*bw); j++)
result[j] = tmpcoef;
else
for (j=0; j<(2*bw); j++)
result[j] = tmpcoef * plmtable[j];
}
plmtable += ( 2 * bw );
/* now generate remaining pmls while synthesizing function */
if (m == 0)
{
for (i=0; i<bw-m-1; i++) {
/* add in contribution */
tmpcoef = coeffs[i+1] * 2.0;
if (tmpcoef != 0.0)
{
for (j=0; j<(2*bw); j++)
result[j] += (tmpcoef * plmtable[j]);
plmtable += (2 * bw);
}
}
}
else
{
for (i=0; i<bw-m-1; i++) {
/* add in contribution */
tmpcoef = coeffs[i+1];
if (tmpcoef != 0.0)
{
for (j=0; j<(2*bw); j++)
result[j] += (tmpcoef * plmtable[j]);
plmtable += (2 * bw);
}
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -