📄 _matrix33_sse.h
字号:
#ifndef _MATRIX33_SSE_H
#define _MATRIX33_SSE_H
//------------------------------------------------------------------------------
/**
An SSE based matrix33 class.
@author
- RadonLabs GmbH
@since
- 2005.7.06
@remarks
- 瘤肯 眠啊
*/
#include "_vector3_sse.h"
#include "quaternion.h"
#include "euler.h"
#include "matrixdefs.h"
#include <string.h>
static float _matrix33_sse_ident[12] =
{
1.0f, 0.0f, 0.0f, 0.0f,
0.0f, 1.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f,
};
//------------------------------------------------------------------------------
class _matrix33_sse
{
public:
/// constructor 1
_matrix33_sse();
/// constructor 2
_matrix33_sse(const _vector3_sse& v1, const _vector3_sse& v2, const _vector3_sse& v3);
/// constructor 3
_matrix33_sse(const _matrix33_sse& mx);
/// constructor 4
_matrix33_sse(float _m11, float _m12, float _m13, float _m21, float _m22, float _m23, float _m31, float _m32, float _m33);
/// constructor 5
_matrix33_sse(const quaternion& q);
/// constructor 6
_matrix33_sse(const __m128& _m1, const __m128& _m2, const __m128& _m3);
/// get as quaternion
quaternion get_quaternion() const;
/// get as euler representation
_vector3_sse to_euler() const;
/// set as euler
void from_euler(const _vector3_sse& ea);
/// unrestricted lookat
void lookat(const _vector3_sse& from, const _vector3_sse& to, const _vector3_sse& up);
/// restricted lookat (billboard)
void billboard(const _vector3_sse& from, const _vector3_sse& to, const _vector3_sse& up);
/// set 1
void set(float m11, float m12, float m13, float m21, float m22, float m23, float m31, float m32, float m33);
/// set 2
void set(const _vector3_sse& v1, const _vector3_sse& v2, const _vector3_sse& v3);
/// set 3
void set(const _matrix33_sse& mx);
/// set to identity
void ident();
/// set to transpose
void transpose();
/// is orthonormal?
bool orthonorm(float limit);
/// scale
void scale(const _vector3_sse& s);
/// rotate about global x
void rotate_x(const float a);
/// rotates matrix about global y
void rotate_y(const float a);
/// rotate about global z
void rotate_z(const float a);
/// rotate about local x (not very fast)
void rotate_local_x(const float a);
/// rotate about local y (not very fast)
void rotate_local_y(const float a);
/// rotate about local z (not very fast)
void rotate_local_z(const float a);
/// rotate about any axis
void rotate(const _vector3_sse& vec, float a);
/// get x component
_vector3_sse x_component(void) const;
/// get y component
_vector3_sse y_component(void) const;
/// get z component
_vector3_sse z_component(void) const;
// inplace matrix multiply
void operator *= (const _matrix33_sse& m1);
/// multiply source vector into target vector
void mult(const _vector3_sse& src, _vector3_sse& dst) const;
union
{
struct
{
__m128 m1;
__m128 m2;
__m128 m3;
};
struct
{
float m[3][4];
};
};
};
//------------------------------------------------------------------------------
/**
FIXME: OPTIMIZE -> KILL TEMPORARYS, SEE _MATRIX44_SSE
*/
static
inline
_matrix33_sse operator * (const _matrix33_sse& ma, const _matrix33_sse& mb)
{
return _matrix33_sse(
_mm_add_ps(
_mm_add_ps(
_mm_mul_ps(_mm_shuffle_ps(ma.m1, ma.m1, _MM_SHUFFLE(0,0,0,0)), mb.m1),
_mm_mul_ps(_mm_shuffle_ps(ma.m1, ma.m1, _MM_SHUFFLE(1,1,1,1)), mb.m2)),
_mm_mul_ps(_mm_shuffle_ps(ma.m1, ma.m1, _MM_SHUFFLE(2,2,2,2)), mb.m3)),
_mm_add_ps(
_mm_add_ps(
_mm_mul_ps(_mm_shuffle_ps(ma.m2, ma.m2, _MM_SHUFFLE(0,0,0,0)), mb.m1),
_mm_mul_ps(_mm_shuffle_ps(ma.m2, ma.m2, _MM_SHUFFLE(1,1,1,1)), mb.m2)),
_mm_mul_ps(_mm_shuffle_ps(ma.m2, ma.m2, _MM_SHUFFLE(2,2,2,2)), mb.m3)),
_mm_add_ps(
_mm_add_ps(
_mm_mul_ps(_mm_shuffle_ps(ma.m3, ma.m3, _MM_SHUFFLE(0,0,0,0)), mb.m1),
_mm_mul_ps(_mm_shuffle_ps(ma.m3, ma.m3, _MM_SHUFFLE(1,1,1,1)), mb.m2)),
_mm_mul_ps(_mm_shuffle_ps(ma.m3, ma.m3, _MM_SHUFFLE(2,2,2,2)), mb.m3)));
}
//------------------------------------------------------------------------------
/**
FIXME: KILL TEMPORARY
*/
static
inline
_vector3_sse operator * (const _matrix33_sse& mx, const _vector3_sse& v)
{
return _vector3_sse(
_mm_add_ps(
_mm_add_ps(
_mm_mul_ps(_mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(0,0,0,0)), mx.m1),
_mm_mul_ps(_mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(1,1,1,1)), mx.m2)),
_mm_mul_ps(_mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(2,2,2,2)), mx.m3)));
}
//------------------------------------------------------------------------------
/**
*/
inline
_matrix33_sse::_matrix33_sse()
{
memcpy(&(m[0][0]), _matrix33_sse_ident, sizeof(_matrix33_sse_ident));
}
//------------------------------------------------------------------------------
/**
*/
inline
_matrix33_sse::_matrix33_sse(const _vector3_sse& v1, const _vector3_sse& v2, const _vector3_sse& v3) :
m1(v1.m128),
m2(v2.m128),
m3(v3.m128)
{
// empty
}
//------------------------------------------------------------------------------
/**
*/
inline
_matrix33_sse::_matrix33_sse(const _matrix33_sse& mx) :
m1(mx.m1),
m2(mx.m2),
m3(mx.m3)
{
// empty
}
//------------------------------------------------------------------------------
/**
*/
inline
_matrix33_sse::_matrix33_sse(float _m11, float _m12, float _m13,
float _m21, float _m22, float _m23,
float _m31, float _m32, float _m33)
{
m1 = _mm_set_ps(0.0f, _m13, _m12, _m11);
m2 = _mm_set_ps(0.0f, _m23, _m22, _m21);
m3 = _mm_set_ps(0.0f, _m33, _m32, _m31);
}
//------------------------------------------------------------------------------
/**
*/
inline
_matrix33_sse::_matrix33_sse(const __m128& _m1, const __m128& _m2, const __m128& _m3) :
m1(_m1),
m2(_m2),
m3(_m3)
{
// empty
}
//------------------------------------------------------------------------------
/**
FIXME: SSE OPTIMIZATION!
*/
inline
_matrix33_sse::_matrix33_sse(const quaternion& q)
{
float xx = q.x*q.x; float yy = q.y*q.y; float zz = q.z*q.z;
float xy = q.x*q.y; float xz = q.x*q.z; float yz = q.y*q.z;
float wx = q.w*q.x; float wy = q.w*q.y; float wz = q.w*q.z;
m[0][0] = 1.0f - 2.0f * (yy + zz);
m[1][0] = 2.0f * (xy - wz);
m[2][0] = 2.0f * (xz + wy);
m[0][1] = 2.0f * (xy + wz);
m[1][1] = 1.0f - 2.0f * (xx + zz);
m[2][1] = 2.0f * (yz - wx);
m[0][2] = 2.0f * (xz - wy);
m[1][2] = 2.0f * (yz + wx);
m[2][2] = 1.0f - 2.0f * (xx + yy);
}
//------------------------------------------------------------------------------
/**
FIXME: SSE OPTIMIZATION!
*/
inline
quaternion
_matrix33_sse::get_quaternion() const
{
float qa[4];
float tr = m[0][0] + m[1][1] + m[2][2];
if (tr > 0.0f)
{
float s = n_sqrt (tr + 1.0f);
qa[3] = s * 0.5f;
s = 0.5f / s;
qa[0] = (m[1][2] - m[2][1]) * s;
qa[1] = (m[2][0] - m[0][2]) * s;
qa[2] = (m[0][1] - m[1][0]) * s;
}
else
{
int i, j, k, nxt[3] = {1,2,0};
i = 0;
if (m[1][1] > m[0][0]) i=1;
if (m[2][2] > m[i][i]) i=2;
j = nxt[i];
k = nxt[j];
float s = n_sqrt((m[i][i] - (m[j][j] + m[k][k])) + 1.0f);
qa[i] = s * 0.5f;
s = 0.5f / s;
qa[3] = (m[j][k] - m[k][j])* s;
qa[j] = (m[i][j] + m[j][i]) * s;
qa[k] = (m[i][k] + m[k][i]) * s;
}
quaternion q(qa[0],qa[1],qa[2],qa[3]);
return q;
}
//------------------------------------------------------------------------------
/**
FIXME: SSE OPTIMIZATION!
*/
inline
_vector3_sse
_matrix33_sse::to_euler() const
{
_vector3_sse ea;
// work on matrix with flipped row/columns
_matrix33_sse tmp(*this);
tmp.transpose();
int i,j,k,h,n,s,f;
EulGetOrd(EulOrdXYZs,i,j,k,h,n,s,f);
if (s==EulRepYes)
{
double sy = (float) sqrt(tmp.M12 * tmp.M12 + tmp.M13 * tmp.M13);
if (sy > 16*FLT_EPSILON)
{
ea.x = (float) atan2(tmp.M12, tmp.M13);
ea.y = (float) atan2(sy, tmp.M11);
ea.z = (float) atan2(tmp.M21, -tmp.M31);
} else {
ea.x = (float) atan2(-tmp.M23, tmp.M22);
ea.y = (float) atan2(sy, tmp.M11);
ea.z = 0;
}
}
else
{
double cy = sqrt(tmp.M11 * tmp.M11 + tmp.M21 * tmp.M21);
if (cy > 16*FLT_EPSILON)
{
ea.x = (float) atan2(tmp.M32, tmp.M33);
ea.y = (float) atan2(-tmp.M31, cy);
ea.z = (float) atan2(tmp.M21, tmp.M11);
}
else
{
ea.x = (float) atan2(-tmp.M23, tmp.M22);
ea.y = (float) atan2(-tmp.M31, cy);
ea.z = 0;
}
}
if (n==EulParOdd) {ea.x = -ea.x; ea.y = - ea.y; ea.z = -ea.z;}
if (f==EulFrmR) {float t = ea.x; ea.x = ea.z; ea.z = t;}
return ea;
}
//------------------------------------------------------------------------------
/**
FIXME: SSE OPTIMIZATION!
*/
inline
void
_matrix33_sse::from_euler(const _vector3_sse& ea)
{
_vector3_sse tea = ea;
double ti, tj, th, ci, cj, ch, si, sj, sh, cc, cs, sc, ss;
int i,j,k,h,n,s,f;
EulGetOrd(EulOrdXYZs,i,j,k,h,n,s,f);
if (f==EulFrmR) {float t = ea.x; tea.x = ea.z; tea.z = t;}
if (n==EulParOdd) {tea.x = -ea.x; tea.y = -ea.y; tea.z = -ea.z;}
ti = tea.x; tj = tea.y; th = tea.z;
ci = cos(ti); cj = cos(tj); ch = cos(th);
si = sin(ti); sj = sin(tj); sh = sin(th);
cc = ci*ch; cs = ci*sh; sc = si*ch; ss = si*sh;
if (s==EulRepYes)
{
M11 = (float)(cj); M12 = (float)(sj*si); M13 = (float)(sj*ci);
M21 = (float)(sj*sh); M22 = (float)(-cj*ss+cc); M23 = (float)(-cj*cs-sc);
M31 = (float)(-sj*ch); M23 = (float)( cj*sc+cs); M33 = (float)( cj*cc-ss);
}
else
{
M11 = (float)(cj*ch); M12 = (float)(sj*sc-cs); M13 = (float)(sj*cc+ss);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -