📄 math.h
字号:
/*
Copyright (C) 2008 Rouslan Dimitrov
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// math.cpp and math.h should be compiled with SSE optimizations enabled
// Matrices are column-major
#pragma once
#include <xmmintrin.h>
#include "memalloc.h"
const unsigned int FABS_MASK = 0x7fffffff;
const unsigned int INV_MASK = 0x80000000;
const float EPSILON = 0.001f;
const float PI = 3.141592653589793f;
const float HALF_PI = 0.5f * PI;
#define BIT(i) (1<<i)
#define CPUCAP_NOSSE 0
#define CPUCAP_SSE1 BIT(0)
#define CPUCAP_SSE2 BIT(1)
#define CPUCAP_SSE3 BIT(2)
enum {
P_ERR = -1,
P_FRONT,
P_BACK,
P_ON,
P_CROSS
};
extern int g_cpuCaps;
inline float fabs(float f) {
int i = *reinterpret_cast<int*>(&f);
i &= FABS_MASK;
return *reinterpret_cast<float*>(&i);
}
int __fastcall MathInit();
void __fastcall MatrixVectorBatchMultiply(float *dest, float *m, float *v, int byte_size); // defined in matrix.asm
// More function declarations are at the end of this file
#define sPlane vec4
class vec4 : public CMemObj
{
public:
vec4(){};
vec4(float x);
vec4(float x1, float y1, float z1, float w1=1.0f):x(x1),y(y1),z(z1),w(w1){};
//float* operator&();
vec4 operator+(vec4& v);
vec4& operator+=(vec4& v);
vec4 operator-(vec4& v);
vec4& operator-=(vec4& v);
int operator==(vec4& v);
vec4 operator*(float f);
vec4 operator*(vec4& v); // elementwise multiplication
friend
vec4 operator*(float f, vec4& v);
vec4& operator*=(float f);
vec4 operator/(vec4& v);
vec4& operator=(vec4& v);
float& operator[](int i);
vec4 cross3D(vec4& v); // Clears w
float dot(vec4& v);
float magnitude();
void normalize();
void clear();
int equal(vec4& v, float epsilon = EPSILON);
// Component-wise min/max, useful for bounds
vec4 min(vec4 &v);
void minSelf(vec4 &v);
vec4 max(vec4 &v);
void maxSelf(vec4 &v);
// Plane related
vec4& normalFast(); // note only x,y,z are valid
vec4 normal();
float dist();
int side(vec4 &p);
int side2(vec4 &p); // returns only FRONT and BACK (if point is ON then FRONT is returned)
vec4 lineIntersect(vec4 &start, vec4 &end);
int Hash();
union {
struct {
float x;
float y;
float z;
float w;
};
__m128 dataSSE;
float data[4];
};
};
inline vec4 operator*(float f, vec4& v) {
return v*f;
}
inline vec4::vec4(float x) {
dataSSE = _mm_set1_ps(x);
}
inline vec4 vec4::operator+(vec4& v) {
vec4 r;
r.dataSSE = _mm_add_ps(dataSSE, v.dataSSE);
return r;
}
inline vec4& vec4::operator+=(vec4& v) {
dataSSE = _mm_add_ps(dataSSE, v.dataSSE);
return *this;
}
inline vec4 vec4::operator-(vec4& v) {
vec4 r;
r.dataSSE = _mm_sub_ps(dataSSE, v.dataSSE);
return r;
}
inline vec4& vec4::operator-=(vec4& v) {
dataSSE = _mm_sub_ps(dataSSE, v.dataSSE);
return *this;
}
inline int vec4::operator==(vec4& v) {
return equal(v);
}
inline vec4 vec4::operator*(float f) {
vec4 r;
__m128 f4 = _mm_load_ss(&f);
f4 = _mm_shuffle_ps(f4, f4, 0x00);
r.dataSSE = _mm_mul_ps(dataSSE, f4);
return r;
}
inline vec4 vec4::operator*(vec4& v) {
vec4 r;
r.dataSSE = _mm_mul_ps(dataSSE, v.dataSSE);
return r;
}
inline vec4 vec4::operator/(vec4& v) {
vec4 r;
r.dataSSE = _mm_div_ps(dataSSE, v.dataSSE);
return r;
}
inline vec4& vec4::operator*=(float f) {
__m128 f4 = _mm_load_ss(&f);
f4 = _mm_shuffle_ps(f4, f4, 0x00);
dataSSE = _mm_mul_ps(dataSSE, f4);
return *this;
}
inline vec4& vec4::operator=(vec4& v) {
_mm_store_ps( data, _mm_load_ps((float*)&v) );
return *this;
}
inline float& vec4::operator[](int i) {
return data[i];
}
// Hash function for planes simply returns D
inline int vec4::Hash() {
return (int)w;
}
inline vec4& vec4::normalFast() {
return *this;
}
inline vec4 vec4::normal() {
vec4 r = *this;
r.w = 0;
return r;
}
inline float vec4::dist() {
return w;
}
inline int vec4::side(vec4 &p) {
float d = this->dot(p);
if(fabs(d) < EPSILON)
return P_ON;
//int r = _mm_movemask_ps(_mm_load_ss(&d));
unsigned int r = (*reinterpret_cast<unsigned int*>((float*)&d))>>31;
return r;
}
inline int vec4::side2(vec4 &p) {
float d = this->dot(p) + EPSILON;
unsigned int r = (*reinterpret_cast<unsigned int*>((float*)&d))>>31;
return r;
}
inline vec4 vec4::lineIntersect(vec4 &start, vec4 &end) {
float d_s = this->dot(start);
float d_e = this->dot(end);
return start + (d_s)/(d_s - d_e) * (end - start);
}
inline void vec4::clear() {
dataSSE = _mm_setzero_ps();
//dataSSE = _mm_xor_ps(dataSSE, dataSSE);
}
inline int vec4::equal(vec4& v, float epsilon) {
__m128 dif = _mm_sub_ps(dataSSE, v.dataSSE);
__m128 fabs_mask4 = _mm_set1_ps(*reinterpret_cast<float*>((unsigned int*)&FABS_MASK));
__m128 epsilon4 = _mm_set1_ps(epsilon);
dif = _mm_and_ps(dif, fabs_mask4);
dif = _mm_sub_ps(epsilon4, dif);
return !_mm_movemask_ps(dif);
}
inline float vec4::dot(vec4& v) {
#if 0
__m128 r1 = _mm_mul_ps(dataSSE, v.dataSSE);
__m128 r2 = _mm_shuffle_ps(r1, r1, 0x4E);
r1 = _mm_add_ps(r1, r2);
r2 = _mm_shuffle_ps(r1, r1, 0x11);
r1 = _mm_add_ps(r1, r2);
return r1.m128_f32[0];
#else
__m128 sq = _mm_mul_ps(this->dataSSE, v.dataSSE);
return sq.m128_f32[0] + sq.m128_f32[1]
+ sq.m128_f32[2] + sq.m128_f32[3];
#endif
}
// elementwise min & max
inline vec4 min4(vec4 &a, vec4 &b) {
vec4 r;
__m128 mask = _mm_cmplt_ps(a.dataSSE, b.dataSSE);
r.dataSSE = _mm_and_ps(mask, a.dataSSE);
__m128 t = _mm_andnot_ps(mask, b.dataSSE);
r.dataSSE = _mm_or_ps(r.dataSSE, t);
return r;
}
inline vec4 max4(vec4 &a, vec4 &b) {
vec4 r;
__m128 mask = _mm_cmpgt_ps(a.dataSSE, b.dataSSE);
r.dataSSE = _mm_and_ps(mask, a.dataSSE);
__m128 t = _mm_andnot_ps(mask, b.dataSSE);
r.dataSSE = _mm_or_ps(r.dataSSE, t);
return r;
}
inline vec4 vec4::min(vec4 &v) {
return min4(*this, v);
}
inline void vec4::minSelf(vec4 &v) {
*this = this->min(v);
}
inline vec4 vec4::max(vec4 &v) {
return max4(*this, v);
}
inline void vec4::maxSelf(vec4 &v) {
*this = this->max(v);
}
class mat4 : public CMemObj
{
public:
float* operator&();
vec4 operator*(vec4& v);
mat4& operator=(float* m);
mat4& operator=(mat4& m);
float& operator[](int i);
void VectorBatchMultiply(vec4 *v, const int num);
void MultiplySelf(mat4 &m);
void TransposeSelf();
void InverseSelf();
mat4 InverseModelView();
union
{
float data[16];
__m128 dataSSE[4];
};
};
inline float* mat4::operator&() {
return data;
}
inline float& mat4::operator[](int i) {
return data[i];
}
inline void mat4::VectorBatchMultiply(vec4 *v, const int num) {
MatrixVectorBatchMultiply(v->data, data, v->data, num<<4);
}
inline void mat4::MultiplySelf(mat4 &m) {
MatrixVectorBatchMultiply(data, data, m.data, 64);
}
inline mat4& mat4::operator=(float* m) {
_mm_store_ps(&data[0], _mm_load_ps(&m[0]));
_mm_store_ps(&data[4], _mm_load_ps(&m[4]));
_mm_store_ps(&data[8], _mm_load_ps(&m[8]));
_mm_store_ps(&data[12], _mm_load_ps(&m[12]));
return *this;
}
inline mat4& mat4::operator=(mat4& m) {
*this = m.data;
return *this;
}
// SinCos performs ( sin(angle), cos(angle), ..., ... )
vec4 __fastcall SinCos(float& angle); // defined in sincos.asm
// much faster version for small angles
vec4 __fastcall SinCos_360(float& angle);
// SinCos4 performs ( sin(angles.x), sin(angles.y), cos(angles.z), cos(angles.w) )
vec4 __fastcall SinCos4(vec4& angles);
vec4 __fastcall SinCos4_360(vec4& angles);
// Slightly faster than SinCos (legacy, but only 4 lines!)
#define SinCosFPU(s, c, a) \
__asm fld a \
__asm fsincos \
__asm fstp [s] \
__asm fstp [c]
inline vec4 fabs4(vec4 *v) {
__m128 mask = _mm_set1_ps(*reinterpret_cast<float*>((unsigned int*)&FABS_MASK));
vec4 r;
r.dataSSE = _mm_and_ps(v->dataSSE, mask);
return r;
}
inline vec4 reverse4(vec4 *v) {
__m128 mask = _mm_set1_ps(*reinterpret_cast<float*>((unsigned int*)&INV_MASK));
vec4 r;
r.dataSSE = _mm_xor_ps(v->dataSSE, mask);
return r;
}
#if 0
inline int abs(int x) {
int s = x>>31;
return x^s - s;
}
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -