📄 float.c
字号:
getcycle();
getcycle();
getcycle();
for (j = 0; j < NTIMES; ++j)
{
/* Call sin NCALLS times and see how long it took. */
cycles_sin[j] = getcycle();
for (i = 0; i < NCALLS; ++i)
{
foutput[i] = (float)sin((double)finput[i]);
}
cycles_sin[j] = getcycle() - cycles_sin[j];
cycles_cos[j] = getcycle();
for (i = 0; i < NCALLS; ++i)
{
foutput[i] = (float)cos((double)finput[i]);
}
cycles_cos[j] = getcycle() - cycles_cos[j];
cycles_fsin[j] = getcycle();
for (i = 0; i < NCALLS; ++i)
{
foutput[i] = fsin(finput[i]);
}
cycles_fsin[j] = getcycle() - cycles_fsin[j];
cycles_fcos[j] = getcycle();
for (i = 0; i < NCALLS; ++i)
{
foutput[i] = fcos(finput[i]);
}
cycles_fcos[j] = getcycle() - cycles_fcos[j];
}
for (j = 0; j < NTIMES; ++j)
{
sprintf(szMsg, "testtrig: %d: sin = %.1f cycles; fsin = %.1f cycles; cos = %.1f cycles; fcos = %.1f cycles\n",
j,
(float)cycles_sin[j]/(float)NCALLS,
(float)cycles_fsin[j]/(float)NCALLS,
(float)cycles_cos[j]/(float)NCALLS,
(float)cycles_fcos[j]/(float)NCALLS);
message(szMsg);
}
/* Check for maximum error. */
/* The increment in the angle going through the table is
(2 PI/TABLESIZE). The sin curve is steepest close to 0 (well, anywhere the
curve cuts the x axis) so an angle close to 0 will give the greatest error.
You might expect the greatest error at (2 PI/TABLESIZE - epsilon), but the
bias trick for floating point conversion rounds off to the nearest integer,
so the greatest error is actually at (2 PI/TABLESIZE * 0.5). And using the
fact that sin(a) ~= a for small angles, we can say the worst case error is
about PI/TABLESIZE.
In the test we've run here, this is obscured by the fact that we've chosen
angles that go way outside the range 0 to 2 PI. Uncomment the next line
to convince yourself this analysis is correct. */
finput[0] = 3.14159f / (float)SINTABLESIZE;
for (i = 0; i < NCALLS; ++i)
{
ferr = (float)fabs(fsin(finput[i]) - sin(finput[i]));
if (ferr > maxsinerr)
{
maxsinerr = ferr;
maxsintheta = finput[i];
}
ferr = (float)fabs(fcos(finput[i]) - cos(finput[i]));
if (ferr > maxcoserr)
{
maxcoserr = ferr;
maxcostheta = finput[i];
}
}
sprintf(szMsg, "max sin error = %.6f (@theta = %.4f)", maxsinerr, maxsintheta);
message(szMsg);
sprintf(szMsg, "max cos error = %.6f (@theta = %.4f)", maxcoserr, maxcostheta);
message(szMsg);
}
void testsqrt(void)
{
/* Count cycles for each function. */
unsigned int cycles_sqrt[NTIMES];
unsigned int cycles_fsqrt[NTIMES];
float maxerr = 0.0f;
float maxval;
float ferr;
int j, i;
sqrtinit();
/* Set tables to zero, hopefully will get tables into cache. */
memset(finput, 0, NCALLS * sizeof(float));
memset(foutput, 0, NCALLS * sizeof(float));
/* Generate some random values to take the square root of. */
for (i = 0; i < NCALLS; ++i)
{
finput[i] = (((float)(rand() % 10000)) / 10000.0f) * 10000.0f;
}
/* Pentium rdtsc instruction may behave erratically on first couple calls,
so make a couple extra calls initially. */
getcycle();
getcycle();
getcycle();
for (j = 0; j < NTIMES; ++j)
{
/* Call sqrt NCALLS times and see how long it took. */
cycles_sqrt[j] = getcycle();
for (i = 0; i < NCALLS; ++i)
{
foutput[i] = (float)sqrt((double)finput[i]);
}
cycles_sqrt[j] = getcycle() - cycles_sqrt[j];
cycles_fsqrt[j] = getcycle();
for (i = 0; i < NCALLS; ++i)
{
foutput[i] = fsqrt(finput[i]);
}
cycles_fsqrt[j] = getcycle() - cycles_fsqrt[j];
}
for (j = 0; j < NTIMES; ++j)
{
sprintf(szMsg, "testsqrt: %d: sqrt = %.1f cycles; fsqrt = %.1f cycles\n",
j,
(float)cycles_sqrt[j]/(float)NCALLS,
(float)cycles_fsqrt[j]/(float)NCALLS);
message(szMsg);
}
/* Check for maximum percentage error. Maximum absolute error will increase with
larger input values, since we only have 8 bits of precision. Percentage error
should be reasonably constant. */
for (i = 0; i < NCALLS; ++i)
{
ferr = 100.0f * (float)(fabs(fsqrt(finput[i]) - sqrt(finput[i]))/sqrt(finput[i]));
if (ferr > maxerr)
{
maxerr = ferr;
maxval = finput[i];
}
}
sprintf(szMsg, "max sqrt error = %.1f%% (@ %.6f)", maxerr, maxval);
message(szMsg);
}
/*
Test sin() implemented with LUT API.
*/
/* mysin is just like sin, but with floats. */
float mysin(float fTheta)
{
return (float)sin((double)fTheta);
}
void testlutsin(void)
{
/* Count cycles for each function. */
unsigned int cycles_sin[NTIMES];
unsigned int cycles_fsin[NTIMES];
float maxsinerr = 0.0f;
float maxsintheta;
float ferr;
int j, i;
LUT *lutsin;
lutsin = LUT_init(mysin, 0.0f, 2.0f * 3.14159265f, 256);
/* Set tables to zero, hopefully will get tables into cache. */
memset(finput, 0, NCALLS * sizeof(float));
memset(foutput, 0, NCALLS * sizeof(float));
/* Generate some random angles. */
for (i = 0; i < NCALLS; ++i)
{
finput[i] = (((float)(rand() % 10000)) / 10000.0f - 0.5f) * 6.28f * 1000.0f;
}
/* Pentium rdtsc instruction may behave erratically on first couple calls,
so make a couple extra calls initially. */
getcycle();
getcycle();
getcycle();
for (j = 0; j < NTIMES; ++j)
{
/* Call sin NCALLS times and see how long it took. */
cycles_sin[j] = getcycle();
for (i = 0; i < NCALLS; ++i)
{
foutput[i] = (float)sin((double)finput[i]);
}
cycles_sin[j] = getcycle() - cycles_sin[j];
cycles_fsin[j] = getcycle();
for (i = 0; i < NCALLS; ++i)
{
foutput[i] = LUT_compute(finput[i], lutsin);
}
cycles_fsin[j] = getcycle() - cycles_fsin[j];
}
for (j = 0; j < NTIMES; ++j)
{
sprintf(szMsg, "testlutsin: %d: sin = %.1f cycles; lutsin = %.1f cycles\n",
j,
(float)cycles_sin[j]/(float)NCALLS,
(float)cycles_fsin[j]/(float)NCALLS);
message(szMsg);
}
/* Check for maximum error. */
/* The increment in the angle going through the table is
(2 PI/TABLESIZE). The sin curve is steepest close to 0 (well, anywhere the
curve cuts the x axis) so an angle close to 0 will give the greatest error.
You might expect the greatest error at (2 PI/TABLESIZE - epsilon), but the
bias trick for floating point conversion rounds off to the nearest integer,
so the greatest error is actually at (2 PI/TABLESIZE * 0.5). And using the
fact that sin(a) ~= a for small angles, we can say the worst case error is
about PI/TABLESIZE.
In the test we've run here, this is obscured by the fact that we've chosen
angles that go way outside the range 0 to 2 PI. Uncomment the next line
to convince yourself this analysis is correct. */
finput[0] = 3.14159f / (float)SINTABLESIZE;
for (i = 0; i < NCALLS; ++i)
{
ferr = (float)fabs(LUT_compute(finput[i], lutsin) - sin(finput[i]));
if (ferr > maxsinerr)
{
maxsinerr = ferr;
maxsintheta = finput[i];
}
}
sprintf(szMsg, "max lutsin error = %.6f (@theta = %.4f)", maxsinerr, maxsintheta);
message(szMsg);
}
unsigned int testftoi(int count)
{
volatile float f = 321.123f;
volatile unsigned int n;
int i;
unsigned int cycles;
getcycle();
getcycle();
cycles = getcycle();
/* disassemble this to see nasty "call _ftol", which
changes FPU rounding mode, converts, changes rounding
mode back. Bad on Pentium, worse on P3/4. */
for (i = 0; i < count; ++i)
{
n = (unsigned int)f;
}
cycles = getcycle() - cycles;
return cycles;
}
unsigned int testitof(int count)
{
volatile unsigned int n = 123;
volatile float f;
int i;
unsigned int cycles;
getcycle();
getcycle();
cycles = getcycle();
for (i = 0; i < count; ++i)
{
f = (float)n;
}
cycles = getcycle() - cycles;
return cycles;
}
unsigned int testfbiasi(int count)
{
// - define or variable takes same length of time
// use 23-n to get n bits after the binary point
// #define FTOIBIAS 8388608.0f
static unsigned int ftoibias = ((23-0 + 127) << 23); // add (1 << 22) to handle negative numbers
volatile unsigned int n;
volatile float f = -321.123456789f;
float ftmp;
int i;
unsigned int cycles;
getcycle();
getcycle();
cycles = getcycle();
for (i = 0; i < count; ++i)
{
// ftmp = f + FTOIBIAS;
ftmp = f + *(float *)&ftoibias;
n = (*(unsigned int *)&ftmp) & 0x003fffff; // mask off upper 10 bits of garbage in exponent and mantissa
// Need to duplicate sign bit into upper bits to handle
// negative numbers correctly.
}
cycles = getcycle() - cycles;
return cycles;
}
unsigned int testfbiasi2(int count)
{
// - define or variable takes same length of time
// use 23-n to get n bits after the binary point
// #define FTOIBIAS 8388608.0f
INTORFLOAT ftoibias = {((23-0 + 127) << 23) /*+ (1 << 22)*/}; // add (1 << 22) to handle negative numbers
volatile int n = -321;
volatile float f = 8388609.0f;
INTORFLOAT ftmp;
int i;
unsigned int cycles;
getcycle();
getcycle();
cycles = getcycle();
for (i = 0; i < count; ++i)
{
#if 0 // int to float
ftmp.i = n;
ftmp.i += ftoibias.i;
ftmp.f -= ftoibias.f;
f = ftmp.f;
#else // float to int
ftmp.f = f;
ftmp.f += ftoibias.f;
ftmp.i -= ftoibias.i;
n = ftmp.i;
#endif
}
cycles = getcycle() - cycles;
return cycles;
}
int main(int argc, char *argv[])
{
testtrig();
testsqrt();
testlutsin();
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -