⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 float.c

📁 游戏编程精粹2第三章源码
💻 C
📖 第 1 页 / 共 2 页
字号:
    getcycle();
    getcycle();
    getcycle();
    
    for (j = 0; j < NTIMES; ++j)
    {
        /* Call sin NCALLS times and see how long it took. */
        cycles_sin[j] = getcycle();
        for (i = 0; i < NCALLS; ++i)
        {
            foutput[i] = (float)sin((double)finput[i]);
        }
        cycles_sin[j] = getcycle() - cycles_sin[j];

        cycles_cos[j] = getcycle();
        for (i = 0; i < NCALLS; ++i)
        {
            foutput[i] = (float)cos((double)finput[i]);
        }
        cycles_cos[j] = getcycle() - cycles_cos[j];

        cycles_fsin[j] = getcycle();
        for (i = 0; i < NCALLS; ++i)
        {
            foutput[i] = fsin(finput[i]);
        }
        cycles_fsin[j] = getcycle() - cycles_fsin[j];

        cycles_fcos[j] = getcycle();
        for (i = 0; i < NCALLS; ++i)
        {
            foutput[i] = fcos(finput[i]);
        }
        cycles_fcos[j] = getcycle() - cycles_fcos[j];
    }

    for (j = 0; j < NTIMES; ++j)
    {
        sprintf(szMsg, "testtrig: %d: sin = %.1f cycles; fsin = %.1f cycles; cos = %.1f cycles; fcos = %.1f cycles\n",
                        j,
                        (float)cycles_sin[j]/(float)NCALLS,
                        (float)cycles_fsin[j]/(float)NCALLS,
                        (float)cycles_cos[j]/(float)NCALLS,
                        (float)cycles_fcos[j]/(float)NCALLS);
        message(szMsg);
    }

    /* Check for maximum error. */
    /* The increment in the angle going through the table is
       (2 PI/TABLESIZE). The sin curve is steepest close to 0 (well, anywhere the
       curve cuts the x axis) so an angle close to 0 will give the greatest error.
       You might expect the greatest error at (2 PI/TABLESIZE - epsilon), but the
       bias trick for floating point conversion rounds off to the nearest integer,
       so the greatest error is actually at (2 PI/TABLESIZE * 0.5). And using the
       fact that sin(a) ~= a for small angles, we can say the worst case error is
       about PI/TABLESIZE.
       
       In the test we've run here, this is obscured by the fact that we've chosen
       angles that go way outside the range 0 to 2 PI. Uncomment the next line
       to convince yourself this analysis is correct. */
    finput[0] = 3.14159f / (float)SINTABLESIZE;
    for (i = 0; i < NCALLS; ++i)
    {
        ferr = (float)fabs(fsin(finput[i]) - sin(finput[i]));
        if (ferr > maxsinerr)
        {
            maxsinerr = ferr;
            maxsintheta = finput[i];
        }

        ferr = (float)fabs(fcos(finput[i]) - cos(finput[i]));
        if (ferr > maxcoserr)
        {
            maxcoserr = ferr;
            maxcostheta = finput[i];
        }
    }

    sprintf(szMsg, "max sin error = %.6f (@theta = %.4f)", maxsinerr, maxsintheta);
    message(szMsg);

    sprintf(szMsg, "max cos error = %.6f (@theta = %.4f)", maxcoserr, maxcostheta);
    message(szMsg);
}

void    testsqrt(void)
{
    /* Count cycles for each function. */
    unsigned int    cycles_sqrt[NTIMES];
    unsigned int    cycles_fsqrt[NTIMES];

    float   maxerr = 0.0f;
    float   maxval;
    float   ferr;

    int             j, i;

    sqrtinit();

    /* Set tables to zero, hopefully will get tables into cache. */
    memset(finput,  0, NCALLS * sizeof(float));
    memset(foutput, 0, NCALLS * sizeof(float));

    /* Generate some random values to take the square root of. */
    for (i = 0; i < NCALLS; ++i)
    {
        finput[i] = (((float)(rand() % 10000)) / 10000.0f) * 10000.0f;
    }

    /* Pentium rdtsc instruction may behave erratically on first couple calls,
       so make a couple extra calls initially. */
    getcycle();
    getcycle();
    getcycle();
    
    for (j = 0; j < NTIMES; ++j)
    {
        /* Call sqrt NCALLS times and see how long it took. */
        cycles_sqrt[j] = getcycle();
        for (i = 0; i < NCALLS; ++i)
        {
            foutput[i] = (float)sqrt((double)finput[i]);
        }
        cycles_sqrt[j] = getcycle() - cycles_sqrt[j];

        cycles_fsqrt[j] = getcycle();
        for (i = 0; i < NCALLS; ++i)
        {
            foutput[i] = fsqrt(finput[i]);
        }
        cycles_fsqrt[j] = getcycle() - cycles_fsqrt[j];
    }

    for (j = 0; j < NTIMES; ++j)
    {
        sprintf(szMsg, "testsqrt: %d: sqrt = %.1f cycles; fsqrt = %.1f cycles\n",
                        j,
                        (float)cycles_sqrt[j]/(float)NCALLS,
                        (float)cycles_fsqrt[j]/(float)NCALLS);
        message(szMsg);
    }

    /* Check for maximum percentage error. Maximum absolute error will increase with
       larger input values, since we only have 8 bits of precision. Percentage error
       should be reasonably constant. */
    for (i = 0; i < NCALLS; ++i)
    {
        ferr = 100.0f * (float)(fabs(fsqrt(finput[i]) - sqrt(finput[i]))/sqrt(finput[i]));
        if (ferr > maxerr)
        {
            maxerr = ferr;
            maxval = finput[i];
        }
    }

    sprintf(szMsg, "max sqrt error = %.1f%% (@ %.6f)", maxerr, maxval);
    message(szMsg);
}

/*
    Test sin() implemented with LUT API.
*/

/* mysin is just like sin, but with floats. */
float   mysin(float fTheta)
{
    return (float)sin((double)fTheta);
}

void    testlutsin(void)
{
    /* Count cycles for each function. */
    unsigned int    cycles_sin[NTIMES];
    unsigned int    cycles_fsin[NTIMES];

    float   maxsinerr = 0.0f;
    float   maxsintheta;
    float   ferr;

    int     j, i;
    LUT     *lutsin;

    lutsin = LUT_init(mysin, 0.0f, 2.0f * 3.14159265f, 256);

    /* Set tables to zero, hopefully will get tables into cache. */
    memset(finput,  0, NCALLS * sizeof(float));
    memset(foutput, 0, NCALLS * sizeof(float));

    /* Generate some random angles. */
    for (i = 0; i < NCALLS; ++i)
    {
        finput[i] = (((float)(rand() % 10000)) / 10000.0f - 0.5f) * 6.28f * 1000.0f;
    }

    /* Pentium rdtsc instruction may behave erratically on first couple calls,
       so make a couple extra calls initially. */
    getcycle();
    getcycle();
    getcycle();
    
    for (j = 0; j < NTIMES; ++j)
    {
        /* Call sin NCALLS times and see how long it took. */
        cycles_sin[j] = getcycle();
        for (i = 0; i < NCALLS; ++i)
        {
            foutput[i] = (float)sin((double)finput[i]);
        }
        cycles_sin[j] = getcycle() - cycles_sin[j];

        cycles_fsin[j] = getcycle();
        for (i = 0; i < NCALLS; ++i)
        {
            foutput[i] = LUT_compute(finput[i], lutsin);
        }
        cycles_fsin[j] = getcycle() - cycles_fsin[j];
    }

    for (j = 0; j < NTIMES; ++j)
    {
        sprintf(szMsg, "testlutsin: %d: sin = %.1f cycles; lutsin = %.1f cycles\n",
                        j,
                        (float)cycles_sin[j]/(float)NCALLS,
                        (float)cycles_fsin[j]/(float)NCALLS);
        message(szMsg);
    }

    /* Check for maximum error. */
    /* The increment in the angle going through the table is
       (2 PI/TABLESIZE). The sin curve is steepest close to 0 (well, anywhere the
       curve cuts the x axis) so an angle close to 0 will give the greatest error.
       You might expect the greatest error at (2 PI/TABLESIZE - epsilon), but the
       bias trick for floating point conversion rounds off to the nearest integer,
       so the greatest error is actually at (2 PI/TABLESIZE * 0.5). And using the
       fact that sin(a) ~= a for small angles, we can say the worst case error is
       about PI/TABLESIZE.
       
       In the test we've run here, this is obscured by the fact that we've chosen
       angles that go way outside the range 0 to 2 PI. Uncomment the next line
       to convince yourself this analysis is correct. */
    finput[0] = 3.14159f / (float)SINTABLESIZE;
    for (i = 0; i < NCALLS; ++i)
    {
        ferr = (float)fabs(LUT_compute(finput[i], lutsin) - sin(finput[i]));
        if (ferr > maxsinerr)
        {
            maxsinerr = ferr;
            maxsintheta = finput[i];
        }
    }

    sprintf(szMsg, "max lutsin error = %.6f (@theta = %.4f)", maxsinerr, maxsintheta);
    message(szMsg);
}

unsigned int    testftoi(int count)
{
    volatile float  f = 321.123f;
    volatile unsigned int    n;
    int             i;
    unsigned int    cycles;

    getcycle();
    getcycle();
    cycles = getcycle();

    /* disassemble this to see nasty "call _ftol", which
       changes FPU rounding mode, converts, changes rounding
       mode back. Bad on Pentium, worse on P3/4. */
    for (i = 0; i < count; ++i)
    {
        n = (unsigned int)f;
    }

    cycles = getcycle() - cycles;
    return cycles;
}

unsigned int    testitof(int count)
{
    volatile unsigned int   n = 123;
    volatile float          f;
    int                     i;
    unsigned int            cycles;

    getcycle();
    getcycle();
    cycles = getcycle();

    for (i = 0; i < count; ++i)
    {
        f = (float)n;
    }

    cycles = getcycle() - cycles;
    return cycles;
}

unsigned int    testfbiasi(int count)
{
    // - define or variable takes same length of time
    // use 23-n to get n bits after the binary point
//    #define FTOIBIAS        8388608.0f
    static unsigned int     ftoibias = ((23-0 + 127) << 23);    // add (1 << 22) to handle negative numbers
    volatile unsigned int    n;
    volatile float  f = -321.123456789f;
    float           ftmp;
    int             i;
    unsigned int    cycles;

    getcycle();
    getcycle();
    cycles = getcycle();

    for (i = 0; i < count; ++i)
    {
//        ftmp = f + FTOIBIAS;
        ftmp = f + *(float *)&ftoibias;
        n = (*(unsigned int *)&ftmp) & 0x003fffff;  // mask off upper 10 bits of garbage in exponent and mantissa
                                                    // Need to duplicate sign bit into upper bits to handle
                                                    // negative numbers correctly.
    }

    cycles = getcycle() - cycles;

    return cycles;
}

unsigned int    testfbiasi2(int count)
{
    // - define or variable takes same length of time
    // use 23-n to get n bits after the binary point
//    #define FTOIBIAS        8388608.0f
    INTORFLOAT   ftoibias = {((23-0 + 127) << 23) /*+ (1 << 22)*/};    // add (1 << 22) to handle negative numbers
    volatile int    n = -321;
    volatile float  f = 8388609.0f;
    INTORFLOAT   ftmp;
    int          i;
    unsigned int cycles;

    getcycle();
    getcycle();
    cycles = getcycle();

    for (i = 0; i < count; ++i)
    {
#if 0   // int to float
        ftmp.i = n;
        ftmp.i += ftoibias.i;
        ftmp.f -= ftoibias.f;
        f = ftmp.f;
#else   // float to int
        ftmp.f = f;
        ftmp.f += ftoibias.f;
        ftmp.i -= ftoibias.i;
        n = ftmp.i;
#endif
    }

    cycles = getcycle() - cycles;

    return cycles;
}

int	main(int argc, char *argv[])
{
    testtrig();
    testsqrt();
    testlutsin();
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -