📄 imports.c
字号:
#endif}/** inv_sqrt - A single precision 1/sqrt routine for IEEE format floats. written by Josh Vanderhoof, based on newsgroup posts by James Van Buskirk and Vesa Karvonen.*/float_mesa_inv_sqrtf(float n){#if defined(USE_IEEE) && !defined(DEBUG) float r0, x0, y0; float r1, x1, y1; float r2, x2, y2;#if 0 /* not used, see below -BP */ float r3, x3, y3;#endif union { float f; unsigned int i; } u; unsigned int magic; /* Exponent part of the magic number - We want to: 1. subtract the bias from the exponent, 2. negate it 3. divide by two (rounding towards -inf) 4. add the bias back Which is the same as subtracting the exponent from 381 and dividing by 2. floor(-(x - 127) / 2) + 127 = floor((381 - x) / 2) */ magic = 381 << 23; /* Significand part of magic number - With the current magic number, "(magic - u.i) >> 1" will give you: for 1 <= u.f <= 2: 1.25 - u.f / 4 for 2 <= u.f <= 4: 1.00 - u.f / 8 This isn't a bad approximation of 1/sqrt. The maximum difference from 1/sqrt will be around .06. After three Newton-Raphson iterations, the maximum difference is less than 4.5e-8. (Which is actually close enough to make the following bias academic...) To get a better approximation you can add a bias to the magic number. For example, if you subtract 1/2 of the maximum difference in the first approximation (.03), you will get the following function: for 1 <= u.f <= 2: 1.22 - u.f / 4 for 2 <= u.f <= 3.76: 0.97 - u.f / 8 for 3.76 <= u.f <= 4: 0.72 - u.f / 16 (The 3.76 to 4 range is where the result is < .5.) This is the closest possible initial approximation, but with a maximum error of 8e-11 after three NR iterations, it is still not perfect. If you subtract 0.0332281 instead of .03, the maximum error will be 2.5e-11 after three NR iterations, which should be about as close as is possible. for 1 <= u.f <= 2: 1.2167719 - u.f / 4 for 2 <= u.f <= 3.73: 0.9667719 - u.f / 8 for 3.73 <= u.f <= 4: 0.7167719 - u.f / 16 */ magic -= (int)(0.0332281 * (1 << 25)); u.f = n; u.i = (magic - u.i) >> 1; /* Instead of Newton-Raphson, we use Goldschmidt's algorithm, which allows more parallelism. From what I understand, the parallelism comes at the cost of less precision, because it lets error accumulate across iterations. */ x0 = 1.0f; y0 = 0.5f * n; r0 = u.f; x1 = x0 * r0; y1 = y0 * r0 * r0; r1 = 1.5f - y1; x2 = x1 * r1; y2 = y1 * r1 * r1; r2 = 1.5f - y2;#if 1 return x2 * r2; /* we can stop here, and be conformant -BP */#else x3 = x2 * r2; y3 = y2 * r2 * r2; r3 = 1.5f - y3; return x3 * r3;#endif#elif defined(XFree86LOADER) && defined(IN_MODULE) return 1.0F / xf86sqrt(n);#else return (float) (1.0 / sqrt(n));#endif}/** * Wrapper around either pow() or xf86pow(). */double_mesa_pow(double x, double y){#if defined(XFree86LOADER) && defined(IN_MODULE) return xf86pow(x, y);#else return pow(x, y);#endif}/** * Find the first bit set in a word. */int_mesa_ffs(int i){#if (defined(_WIN32) && !defined(__MINGW32__) ) || defined(__IBMC__) || defined(__IBMCPP__) register int bit = 0; if (i != 0) { if ((i & 0xffff) == 0) { bit += 16; i >>= 16; } if ((i & 0xff) == 0) { bit += 8; i >>= 8; } if ((i & 0xf) == 0) { bit += 4; i >>= 4; } while ((i & 1) == 0) { bit++; i >>= 1; } } return bit;#elif defined(XFree86LOADER) && defined(IN_MODULE) return xf86ffs(i);#else return ffs(i);#endif}/** * Return number of bits set in given GLuint. */unsigned int_mesa_bitcount(unsigned int n){ unsigned int bits; for (bits = 0; n > 0; n = n >> 1) { bits += (n & 1); } return bits;}/** * Convert a 4-byte float to a 2-byte half float. * Based on code from: * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html */GLhalfARB_mesa_float_to_half(float val){ const int flt = *((int *) (void *) &val); const int flt_m = flt & 0x7fffff; const int flt_e = (flt >> 23) & 0xff; const int flt_s = (flt >> 31) & 0x1; int s, e, m = 0; GLhalfARB result; /* sign bit */ s = flt_s; /* handle special cases */ if ((flt_e == 0) && (flt_m == 0)) { /* zero */ /* m = 0; - already set */ e = 0; } else if ((flt_e == 0) && (flt_m != 0)) { /* denorm -- denorm float maps to 0 half */ /* m = 0; - already set */ e = 0; } else if ((flt_e == 0xff) && (flt_m == 0)) { /* infinity */ /* m = 0; - already set */ e = 31; } else if ((flt_e == 0xff) && (flt_m != 0)) { /* NaN */ m = 1; e = 31; } else { /* regular number */ const int new_exp = flt_e - 127; if (new_exp < -24) { /* this maps to 0 */ /* m = 0; - already set */ e = 0; } else if (new_exp < -14) { /* this maps to a denorm */ unsigned int exp_val = (unsigned int) (-14 - new_exp); /* 2^-exp_val*/ e = 0; switch (exp_val) { case 0: _mesa_warning(NULL, "float_to_half: logical error in denorm creation!\n"); /* m = 0; - already set */ break; case 1: m = 512 + (flt_m >> 14); break; case 2: m = 256 + (flt_m >> 15); break; case 3: m = 128 + (flt_m >> 16); break; case 4: m = 64 + (flt_m >> 17); break; case 5: m = 32 + (flt_m >> 18); break; case 6: m = 16 + (flt_m >> 19); break; case 7: m = 8 + (flt_m >> 20); break; case 8: m = 4 + (flt_m >> 21); break; case 9: m = 2 + (flt_m >> 22); break; case 10: m = 1; break; } } else if (new_exp > 15) { /* map this value to infinity */ /* m = 0; - already set */ e = 31; } else { /* regular */ e = new_exp + 15; m = flt_m >> 13; } } result = (s << 15) | (e << 10) | m; return result;}/** * Convert a 2-byte half float to a 4-byte float. * Based on code from: * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html */float_mesa_half_to_float(GLhalfARB val){ /* XXX could also use a 64K-entry lookup table */ const int m = val & 0x3ff; const int e = (val >> 10) & 0x1f; const int s = (val >> 15) & 0x1; int flt_m, flt_e, flt_s, flt; float result; /* sign bit */ flt_s = s; /* handle special cases */ if ((e == 0) && (m == 0)) { /* zero */ flt_m = 0; flt_e = 0; } else if ((e == 0) && (m != 0)) { /* denorm -- denorm half will fit in non-denorm single */ const float half_denorm = 1.0f / 16384.0f; /* 2^-14 */ float mantissa = ((float) (m)) / 1024.0f; float sign = s ? -1.0f : 1.0f; return sign * mantissa * half_denorm; } else if ((e == 31) && (m == 0)) { /* infinity */ flt_e = 0xff; flt_m = 0; } else if ((e == 31) && (m != 0)) { /* NaN */ flt_e = 0xff; flt_m = 1; } else { /* regular */ flt_e = e + 112; flt_m = m << 13; } flt = (flt_s << 31) | (flt_e << 23) | flt_m; result = *((float *) (void *) &flt); return result;}/*@}*//**********************************************************************//** \name Sort & Search *//*@{*//** * Wrapper for bsearch(). */void *_mesa_bsearch( const void *key, const void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *) ){#if defined(XFree86LOADER) && defined(IN_MODULE) return xf86bsearch(key, base, nmemb, size, compar);#else return bsearch(key, base, nmemb, size, compar);#endif}/*@}*//**********************************************************************//** \name Environment vars *//*@{*//** * Wrapper for getenv(). */char *_mesa_getenv( const char *var ){#if defined(XFree86LOADER) && defined(IN_MODULE) return xf86getenv(var);#elif defined(_XBOX) return NULL;#else return getenv(var);#endif}/*@}*//**********************************************************************//** \name String *//*@{*//** Wrapper around either strstr() or xf86strstr() */char *_mesa_strstr( const char *haystack, const char *needle ){#if defined(XFree86LOADER) && defined(IN_MODULE) return xf86strstr(haystack, needle);#else return strstr(haystack, needle);#endif}/** Wrapper around either strncat() or xf86strncat() */char *_mesa_strncat( char *dest, const char *src, size_t n ){#if defined(XFree86LOADER) && defined(IN_MODULE) return xf86strncat(dest, src, n);#else return strncat(dest, src, n);#endif}/** Wrapper around either strcpy() or xf86strcpy() */char *_mesa_strcpy( char *dest, const char *src ){#if defined(XFree86LOADER) && defined(IN_MODULE) return xf86strcpy(dest, src);#else return strcpy(dest, src);#endif}/** Wrapper around either strncpy() or xf86strncpy() */char *_mesa_strncpy( char *dest, const char *src, size_t n ){#if defined(XFree86LOADER) && defined(IN_MODULE) return xf86strncpy(dest, src, n);#else return strncpy(dest, src, n);#endif}/** Wrapper around either strlen() or xf86strlen() */size_t_mesa_strlen( const char *s ){#if defined(XFree86LOADER) && defined(IN_MODULE) return xf86strlen(s);#else return strlen(s);#endif}/** Wrapper around either strcmp() or xf86strcmp() */int_mesa_strcmp( const char *s1, const char *s2 ){#if defined(XFree86LOADER) && defined(IN_MODULE) return xf86strcmp(s1, s2);#else return strcmp(s1, s2);#endif}/** Wrapper around either strncmp() or xf86strncmp() */int_mesa_strncmp( const char *s1, const char *s2, size_t n ){#if defined(XFree86LOADER) && defined(IN_MODULE) return xf86strncmp(s1, s2, n);#else return strncmp(s1, s2, n);#endif}/** Implemented using _mesa_malloc() and _mesa_strcpy */char *_mesa_strdup( const char *s )
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -