⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 _vector4_sse.h

📁 奇迹世界公用文件源代码,研究网络游戏的朋友可以研究下
💻 H
字号:
#ifndef _VECTOR4_SSE_H
#define _VECTOR4_SSE_H
//------------------------------------------------------------------------------
/**
    A vector4 class using SSE.

    @author
    - RadonLabs GmbH 

    @since
    - 2005.7.06
    @remarks
    - 瘤肯 眠啊 
*/
#include <xmmintrin.h>

//------------------------------------------------------------------------------
class _vector4_sse
{
public:
    /// constructor 1
    _vector4_sse();
    /// constructor 2
    _vector4_sse(const float _x, const float _y, const float _z, const float _w);
    /// constructor 3
    _vector4_sse(const _vector4_sse& vec);
    /// set elements 1
    void set(const float _x, const float _y, const float _z, const float _w);
    /// set elements 2 
    void set(const _vector4_sse& v);
    /// return length
    float len() const;
    /// normalize
    void norm();
    /// inplace add
    void operator +=(const _vector4_sse& v);
    /// inplace sub
    void operator -=(const _vector4_sse& v);
    /// inplace scalar mul
    void operator *=(const float s);
    /// fuzzy compare
    bool isequal(const _vector4_sse& v, float tol) const;
    /// fuzzy compare, return -1, 0, +1
    int compare(const _vector4_sse& v, float tol) const;
    /// set own components to minimum
    void minimum(const _vector4_sse& v);
    /// set own components to maximum
    void maximum(const _vector4_sse& v);
    /// inplace linear interpolation
    void lerp(const _vector4_sse& v0, float lerpVal);

    union
    {
        __m128 m128;
        struct
        {
            float x, y, z, w;
        };
    };

    /// private constructor, takes _m128
    _vector4_sse(const __m128& m);
    /// add components
	float addhorizontal(const __m128 &a);
};

//------------------------------------------------------------------------------
/**
*/
inline
_vector4_sse::_vector4_sse()
{
    m128 = _mm_setzero_ps();
}

//------------------------------------------------------------------------------
/**
*/
inline
_vector4_sse::_vector4_sse(const float _x, const float _y, const float _z, const float _w)
{
    m128 = _mm_set_ps(_w, _z, _y, _x);
}

//------------------------------------------------------------------------------
/**
*/
inline
_vector4_sse::_vector4_sse(const _vector4_sse& v)
{
    m128 = v.m128;
}

//------------------------------------------------------------------------------
/**
*/
inline
_vector4_sse::_vector4_sse(const __m128& m) :
    m128(m)
{
    // empty
}

//------------------------------------------------------------------------------
/**
*/
inline
void
_vector4_sse::set(const float _x, const float _y, const float _z, const float _w)
{
    m128 = _mm_set_ps(_w, _z, _y, _x);
}

//------------------------------------------------------------------------------
/**
*/
inline
void
_vector4_sse::set(const _vector4_sse& v)
{
    m128 = v.m128;
}

//------------------------------------------------------------------------------
/**
*/
inline
float
_vector4_sse::len() const
{
    const int X = 0;
    const int Y = 1;
    const int Z = 2;
    const int W = 3;

    __m128 a = _mm_mul_ps(m128, m128);

    // horizontal add
    __m128 b = _mm_add_ss(a, _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(X,X,X,X)), _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(Y,Y,Y,Y)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(Z,Z,Z,Z)))));
    __m128 l = _mm_sqrt_ss(b);

    return l.m128_f32[X];
}

//------------------------------------------------------------------------------
/**
*/
inline
void
_vector4_sse::norm()
{
    const int X = 0;
    const int Y = 1;
    const int Z = 2;
    const int W = 3;

    // get len
    __m128 a = _mm_mul_ps(m128, m128);
    __m128 b = _mm_add_ss(a, _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(X,X,X,X)), _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(Y,Y,Y,Y)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(Z,Z,Z,Z)))));
    
    // get reciprocal of square root of squared length
    __m128 f = _mm_rsqrt_ss(b);
    __m128 oneDivLen = _mm_shuffle_ps(f, f, _MM_SHUFFLE(X, X, X, X));
    
    m128 = _mm_mul_ps(m128, oneDivLen);
}

//------------------------------------------------------------------------------
/**
*/
inline
void
_vector4_sse::operator +=(const _vector4_sse& v)
{
    m128 = _mm_add_ps(m128, v.m128);
}

//------------------------------------------------------------------------------
/**
*/
inline
void
_vector4_sse::operator -=(const _vector4_sse& v)
{
    m128 = _mm_sub_ps(m128, v.m128);
}

//------------------------------------------------------------------------------
/**
*/
inline
void
_vector4_sse::operator *=(const float s)
{
    __m128 packed = _mm_set1_ps(s);
    m128 = _mm_mul_ps(m128, packed);
}

//------------------------------------------------------------------------------
/**
*/
inline
bool
_vector4_sse::isequal(const _vector4_sse& v, float tol) const
{
    if (fabs(v.x - x) > tol)      return false;
    else if (fabs(v.y - y) > tol) return false;
    else if (fabs(v.z - z) > tol) return false;
    else if (fabs(v.w - w) > tol) return false;
    return true;
}

//------------------------------------------------------------------------------
/**
*/
inline
int
_vector4_sse::compare(const _vector4_sse& v, float tol) const
{
    if (fabs(v.x - x) > tol)      return (v.x > x) ? +1 : -1; 
    else if (fabs(v.y - y) > tol) return (v.y > y) ? +1 : -1;
    else if (fabs(v.z - z) > tol) return (v.z > z) ? +1 : -1;
    else if (fabs(v.w - w) > tol) return (v.w > w) ? +1 : -1;
    else                          return 0;
}

//------------------------------------------------------------------------------
/**
*/
inline
void
_vector4_sse::minimum(const _vector4_sse& v)
{
    if (v.x < x) x = v.x;
    if (v.y < y) y = v.y;
    if (v.z < z) z = v.z;
    if (v.w < w) w = v.w;
}

//------------------------------------------------------------------------------
/**
*/
inline
void
_vector4_sse::maximum(const _vector4_sse& v)
{
    if (v.x > x) x = v.x;
    if (v.y > y) y = v.y;
    if (v.z > z) z = v.z;
    if (v.w > w) w = v.w;
}

//------------------------------------------------------------------------------
/**
*/
static 
inline 
_vector4_sse operator +(const _vector4_sse& v0, const _vector4_sse& v1) 
{
    return _vector4_sse(_mm_add_ps(v0.m128, v1.m128));
}

//------------------------------------------------------------------------------
/**
*/
static 
inline 
_vector4_sse operator -(const _vector4_sse& v0, const _vector4_sse& v1) 
{
    return _vector4_sse(_mm_sub_ps(v0.m128, v1.m128));
}

//------------------------------------------------------------------------------
/**
*/
static 
inline 
_vector4_sse operator *(const _vector4_sse& v0, const float& s) 
{
    __m128 packed = _mm_set1_ps(s);
    return _vector4_sse(_mm_mul_ps(v0.m128, packed));
}

//------------------------------------------------------------------------------
/**
*/
static 
inline 
_vector4_sse operator -(const _vector4_sse& v)
{
    __m128 zero = _mm_setzero_ps();
    return _vector4_sse(_mm_sub_ps(zero, v.m128));
}

//------------------------------------------------------------------------------
/**
*/
inline
void
_vector4_sse::lerp(const _vector4_sse& v0, float lerpVal)
{
    x = v0.x + ((x - v0.x) * lerpVal);
    y = v0.y + ((y - v0.y) * lerpVal);
    z = v0.z + ((z - v0.z) * lerpVal);
    w = v0.w + ((w - v0.w) * lerpVal);
}

//------------------------------------------------------------------------------
#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -