⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 float.c

📁 游戏编程精粹2第三章源码
💻 C
📖 第 1 页 / 共 2 页
字号:
/* Copyright (C) Yossarian King, 2001. 
 * All rights reserved worldwide.
 *
 * This software is provided "as is" without express or implied
 * warranties. You may freely copy and compile this source into
 * applications you distribute provided that the copyright text
 * below is included in the resulting source code, for example:
 * "Portions Copyright (C) Yossarian King, 2001"
 */
/*
    float.c

    Sample code for "Floating-point Tricks -- Improving
    performance with IEEE floating-point", from Graphics
    Gems 2.

    Yossarian King / Electronic Arts Canada / April 2001
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>


/**************************************************************
    Portability

    Hopefully you can just modify stuff in this section to get
    this to work on other compilers and platforms. I've tried
    to make the code easily portable, but it's only been compiled
    with Microsoft Visual C++ and run on Windows PC's. 
 **************************************************************/

/* Message display. */
extern void     message(char *sz);
static char     szMsg[256];

/* Define this however your compiler likes it. */
#define INLINE      __inline 

/* Get cycle count -- this is extremely Pentium-specific. Any very precise and
   stable timer should work. If precision is limited you may get better results
   by increasing the NCALLS define below. */

INLINE  unsigned int    getcycle(void)
{
    __asm
    {
        cpuid
        rdtsc
    }
}

/**************************************************************
    Common structures and defines

     Stuff used throughout the code below.
 **************************************************************/

/* INTORFLOAT union for easy access to bits of a float. */
typedef union
{
    int     i;          // as integer
    float   f;          // as float
    struct              // as bit fields
    {
        unsigned int    sign:1;
        unsigned int    biasedexponent:8;
        unsigned int    significand;
    }
            bits;
}
INTORFLOAT;

/* Bias constant used for fast conversions between int and float. First element
   in INTORFLOAT union is integer -- we are storing biased exponent 23, mantissa .1, which
   is equivalent to 1.5 x 2^23. */
INTORFLOAT  bias = {((23 + 127) << 23) + (1 << 22)};

/**************************************************************
    Linear lookup for sine and cosine

    This section implements a lookup table version of sin
    and cos, both using the same table.
 **************************************************************/

#define SINTABLESIZE    256
#define TWOPISCALE      ((float)SINTABLESIZE / (2.0f * 3.14159265f))

static float    sintable[SINTABLESIZE];

/*
    Initialize the lookup table.
*/

void    triginit(void)
{
    int     i;
    double  theta;

    /* Integer range 0 to SINTABLESIZE is converted to floating
       point 0 to 2 pi range (radians for a full circle). */
    for (i = 0; i < SINTABLESIZE; ++i)
    {
        theta = (double)i/TWOPISCALE;
        sintable[i] = (float)sin(theta);
    }
}

/*
    fsin - sin calculation with lookup table.
*/

INLINE float    fsin(float theta)
{
    unsigned int    i;

#if 1   /* Use biasing trick for fast float to int conversion. */
    INTORFLOAT  fi;
    fi.f = theta * TWOPISCALE + bias.f; /* Scale to table index range and add conversion bias. */
    i = fi.i & (SINTABLESIZE - 1);         /* Mask off lower bits, assuming SINTABLESIZE is power of 2. */
#else   /* Use typecasting for slow float to int conversion. */
    i = ((unsigned int)(theta * TWOPISCALE)) & (SINTABLESIZE - 1);
#endif

    /* Return sine from table. */
    return sintable[i];
}

/*
    fcos - cos calculation with lookup table.
*/

INLINE float    fcos(float theta)
{
    unsigned int    i;

    INTORFLOAT  fi;
    fi.f = theta * TWOPISCALE + bias.f;             /* Scale to table index range and add conversion bias. */

    /* Note how adding SINTABLESIZE/4 lets us compute cos instead of sin,
       since cos(theta) = sin(theta + Pi/4). */
    i = (fi.i + (SINTABLESIZE/4)) & (SINTABLESIZE - 1);   /* Mask off lower bits, assuming SINTABLESIZE is power of 2. */

    return sintable[i];
}

/**************************************************************
    Logarithmic lookup for square root.
 **************************************************************/

#define SQRTTABLESIZE       256             /* Note: code below assumes this is 256. */
static unsigned int sqrttable[SQRTTABLESIZE];

void    sqrtinit(void)
{
    int             i;
    INTORFLOAT      fi;
    unsigned int    e0;

    /* These two tables are indexed into by the least significant bit (e0)
       of the biased exponent. exponent[] gives the biased exponent
       corresponding to e0, while biasrestore[] gives the amount to be
       added back to the exponent after dividing it by 2 in order to
       restore the bias to the correct value. */
    unsigned int    exponent[2] = { (1+127)<<23, (0+127)<<23 };
    unsigned int    biasrestore[2] = { 63<<23, 64<<23 };

    for (i = 0; i < SQRTTABLESIZE; ++i)
    {
        /* Build the floating point number for this table entry. Table index
           is 8 bits, which we use as e0m22m21...m16. To turn this into a
           floating point number we add in the correct exponent bias and set
           m15 to 1 (which is like adding 0.5, so we hit the middle of the
           range of numbers that we're estimating). */
        e0 = (i >> 7);
        fi.i = exponent[e0] | (i << 16) | (1 << 15);

        /* Compute square root. */
        fi.f = (float)sqrt((double)fi.f);

        /* Extract 23 bit mantissa of square root. */
        sqrttable[i] = fi.i & ((1 << 23) - 1);

        /* Exponent is calculated programmatically by fsqrt, so we don't
           include it in the table. However, the programmatic calculation
           divides the exponent bias by 2, so we put a bias restore into
           the table -- basically adding 63 or 64, depending on whether the
           exponent was odd or even, which we can determine from e0. */
        sqrttable[i] += biasrestore[e0];
    }
}

INLINE float    fsqrt(float f)
{
    INTORFLOAT      fi;
    unsigned int    e, n;

    /* Get raw bits of floating point f. */
    fi.f = f;
    n = fi.i;

    /* Divide exponent by 2 -- this is done in-place, no need to shift all
       the way down to 0 the least significant bits and then back up again.
       Note that we are also dividing the exponent bias (127) by 2, this
       gets corrected when we add in the sqrttable entry. */
    e = (n >> 1) & 0x3f800000;

    /* n is the table index -- we're using 1 bit from the original exponent
       (e0) plus 7 bits from the mantissa. */
    n = (n >> 16) & 0xff;

    /* Add calculated exponent to mantissa + re-biasing constant from table. */
    fi.i = e + sqrttable[n];

    /* Return floating point result. */
    return fi.f;
}

/**************************************************************
    API for general linear lookup tables.
 **************************************************************/

/* Following structure stores everything required for the lookup table
   for a particular function. */
typedef struct
{
    int     nSize;          /* Number of table entries. */
    float   indexScale;     /* For scaling table index to get float. */
    float   indexOffset;    /* And offset. */
    float   indexOffsetBias;/* Used for float to int conversion combined with offset. */
    float   *pTable;        /* Memory for table. */
}
LUT;

LUT *LUT_init(float (*pFunc)(float fArg), float fMin, float fMax, int nSize)
{
    LUT *lut;
    int i;

    lut = (LUT *)malloc(sizeof(LUT));
    lut->pTable = (float *)malloc(nSize * sizeof(float));

    lut->nSize = nSize - 1;     /* Will be used as a mask; assumes power of 2. */

    /* Given a float f, index into table is calculated as:
            i = (f - offset) * scale + bias
              = f * scale + (bias - offset * scale)
       where 'bias' is for converting float to int. */
    lut->indexScale = ((float)nSize / (fMax - fMin));
    lut->indexOffset = fMin;
    lut->indexOffsetBias = bias.f - lut->indexOffset * lut->indexScale;

    /* Fill in table. */
    for (i = 0; i < nSize; ++i)
    {
        lut->pTable[i] = pFunc((float)i / lut->indexScale + lut->indexOffset);
    }

    return lut;
}

__inline float  LUT_compute(float fArg, LUT *lut)
{
    unsigned int    i;
    INTORFLOAT      fi;

    fi.f = fArg * lut->indexScale + lut->indexOffsetBias;
    i = fi.i & lut->nSize;

    return lut->pTable[i];
}


/**************************************************************
    Other odds and ends.
 **************************************************************/

/* --------- clamping -------- */

/* Clamp with floats. */
__inline float  clampf0(float f)
{
    if (f < 0.0f)
        return 0.0f;

    return f;
}

/* Clamp with ints. */
__inline float  clampi0(float f)
{
// NOTE: here's a fast absolute value:
//    *(int *)&f &= 0x7fffffff;

#if 1   // no compares
    int s = (*(int *)&f) >> 31;
    s = ~s;
    *(int *)&f &= s;
#else
    if (*(int *)&f < 0)
        return 0.0f;
#endif

    return f;
}

/* Clampt to 1. */
__inline float  clampf1(float f)
{
    if (f > 1.0f)
        return 1.0f;

    return f;
}

__inline float  clampi1(float f)
{
    if (*(int *)&f > 1.0f)
        return 1.0f;

    return f;
}

/**************************************************************
    Benchmark code for all that stuff above.
 **************************************************************/

/* Run each test NTIMES times, make NCALLS calls to the function
   being tested. Running the test multiple times ensures the cache
   is warmed up, making many calls gives more accurate timing. */
#define NTIMES          3
#define NCALLS          1000

/* Tables to store input and output values. If values are stored in
   a scalar variable then compiler will typically optimize it into
   a register, or even optimize the calculation away entirely, which
   makes benchmarking difficult. */
float   finput[NCALLS];
float   foutput[NCALLS];

void    testtrig(void)
{
    /* Count cycles for each function. */
    unsigned int    cycles_sin[NTIMES];
    unsigned int    cycles_cos[NTIMES];
    unsigned int    cycles_fsin[NTIMES];
    unsigned int    cycles_fcos[NTIMES];

    float   maxsinerr = 0.0f;
    float   maxsintheta;
    float   maxcoserr = 0.0f;
    float   maxcostheta;
    float   ferr;

    int             j, i;

    triginit();

    /* Set tables to zero, hopefully will get tables into cache. */
    memset(finput,  0, NCALLS * sizeof(float));
    memset(foutput, 0, NCALLS * sizeof(float));

    /* Generate some random angles. */
    for (i = 0; i < NCALLS; ++i)
    {
        finput[i] = (((float)(rand() % 10000)) / 10000.0f - 0.5f) * 6.28f * 1000.0f;
    }

    /* Pentium rdtsc instruction may behave erratically on first couple calls,
       so make a couple extra calls initially. */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -