📄 dvec.h
字号:
{ return _mm_add_##opsize( a,b); } \
inline I##vect##vec##element operator- (const I##vect##vec##element &a, const I##vect##vec##element &b) \
{ return _mm_sub_##opsize( a,b); }
IVEC128_ADD_SUB(8,16, epi8)
IVEC128_ADD_SUB(u8,16, epi8)
IVEC128_ADD_SUB(s8,16, epi8)
IVEC128_ADD_SUB(16,8, epi16)
IVEC128_ADD_SUB(u16,8, epi16)
IVEC128_ADD_SUB(s16,8, epi16)
IVEC128_ADD_SUB(32,4, epi32)
IVEC128_ADD_SUB(u32,4, epi32)
IVEC128_ADD_SUB(s32,4, epi32)
IVEC128_ADD_SUB(64,2, epi64)
#undef IVEC128_ADD_SUB
/********************************* Conditional Select ****************************************/
/* version of: retval = (a OP b)? c : d; *
* Where OP is one of the possible comparision operators. *
* Example: r = select_eq(a,b,c,d); *
* if "member at position x of the vector a" == "member at position x of vector b" *
* assign the corresponding member in r from c, else assign from d. *
********************************* Conditional Select ****************************************/
#define IVEC128_SELECT(vect12,vect34,element,selop,arg1,arg2) \
inline I##vect34##vec##element select_##selop (const I##vect12##vec##element &a, const I##vect12##vec##element &b, const I##vect34##vec##element &c, const I##vect34##vec##element &d) \
{ \
I##vect12##vec##element mask = cmp##selop(a,b); \
return( I##vect34##vec##element ((mask & arg1 ) | I##vect12##vec##element ((_mm_andnot_si128(mask, arg2 ))))); \
}
IVEC128_SELECT(8,s8,16,eq,c,d)
IVEC128_SELECT(8,u8,16,eq,c,d)
IVEC128_SELECT(8,8,16,eq,c,d)
IVEC128_SELECT(8,s8,16,neq,c,d)
IVEC128_SELECT(8,u8,16,neq,c,d)
IVEC128_SELECT(8,8,16,neq,c,d)
IVEC128_SELECT(16,s16,8,eq,c,d)
IVEC128_SELECT(16,u16,8,eq,c,d)
IVEC128_SELECT(16,16,8,eq,c,d)
IVEC128_SELECT(16,s16,8,neq,c,d)
IVEC128_SELECT(16,u16,8,neq,c,d)
IVEC128_SELECT(16,16,8,neq,c,d)
IVEC128_SELECT(32,s32,4,eq,c,d)
IVEC128_SELECT(32,u32,4,eq,c,d)
IVEC128_SELECT(32,32,4,eq,c,d)
IVEC128_SELECT(32,s32,4,neq,c,d)
IVEC128_SELECT(32,u32,4,neq,c,d)
IVEC128_SELECT(32,32,4,neq,c,d)
IVEC128_SELECT(s8,s8,16,gt,c,d)
IVEC128_SELECT(s8,u8,16,gt,c,d)
IVEC128_SELECT(s8,8,16,gt,c,d)
IVEC128_SELECT(s8,s8,16,lt,c,d)
IVEC128_SELECT(s8,u8,16,lt,c,d)
IVEC128_SELECT(s8,8,16,lt,c,d)
IVEC128_SELECT(s16,s16,8,gt,c,d)
IVEC128_SELECT(s16,u16,8,gt,c,d)
IVEC128_SELECT(s16,16,8,gt,c,d)
IVEC128_SELECT(s16,s16,8,lt,c,d)
IVEC128_SELECT(s16,u16,8,lt,c,d)
IVEC128_SELECT(s16,16,8,lt,c,d)
#undef IVEC128_SELECT
class F64vec2
{
protected:
__m128d vec;
public:
/* Constructors: __m128d, 2 doubles */
F64vec2() {}
/* initialize 2 DP FP with __m128d data type */
F64vec2(__m128d m) { vec = m;}
/* initialize 2 DP FPs with 2 doubles */
F64vec2(double d1, double d0) { vec= _mm_set_pd(d1,d0); }
/* Explicitly initialize each of 2 DP FPs with same double */
explicit F64vec2(double d) { vec = _mm_set1_pd(d); }
/* Conversion functions */
operator __m128d() const { return vec; } /* Convert to __m128d */
/* Logical Operators */
friend F64vec2 operator &(const F64vec2 &a, const F64vec2 &b) { return _mm_and_pd(a,b); }
friend F64vec2 operator |(const F64vec2 &a, const F64vec2 &b) { return _mm_or_pd(a,b); }
friend F64vec2 operator ^(const F64vec2 &a, const F64vec2 &b) { return _mm_xor_pd(a,b); }
/* Arithmetic Operators */
friend F64vec2 operator +(const F64vec2 &a, const F64vec2 &b) { return _mm_add_pd(a,b); }
friend F64vec2 operator -(const F64vec2 &a, const F64vec2 &b) { return _mm_sub_pd(a,b); }
friend F64vec2 operator *(const F64vec2 &a, const F64vec2 &b) { return _mm_mul_pd(a,b); }
friend F64vec2 operator /(const F64vec2 &a, const F64vec2 &b) { return _mm_div_pd(a,b); }
F64vec2& operator +=(F64vec2 &a) { return *this = _mm_add_pd(vec,a); }
F64vec2& operator -=(F64vec2 &a) { return *this = _mm_sub_pd(vec,a); }
F64vec2& operator *=(F64vec2 &a) { return *this = _mm_mul_pd(vec,a); }
F64vec2& operator /=(F64vec2 &a) { return *this = _mm_div_pd(vec,a); }
F64vec2& operator &=(F64vec2 &a) { return *this = _mm_and_pd(vec,a); }
F64vec2& operator |=(F64vec2 &a) { return *this = _mm_or_pd(vec,a); }
F64vec2& operator ^=(F64vec2 &a) { return *this = _mm_xor_pd(vec,a); }
/* Horizontal Add */
friend double add_horizontal(F64vec2 &a)
{
F64vec2 ftemp = _mm_add_sd(a,_mm_shuffle_pd(a, a, 1));
return ftemp[0];
}
/* And Not */
friend F64vec2 andnot(const F64vec2 &a, const F64vec2 &b) { return _mm_andnot_pd(a,b); }
/* Square Root */
friend F64vec2 sqrt(const F64vec2 &a) { return _mm_sqrt_pd(a); }
/* Compares: Mask is returned */
/* Macros expand to all compare intrinsics. Example:
friend F64vec2 cmpeq(const F64vec2 &a, const F64vec2 &b)
{ return _mm_cmpeq_ps(a,b);} */
#define F64vec2_COMP(op) \
friend F64vec2 cmp##op (const F64vec2 &a, const F64vec2 &b) { return _mm_cmp##op##_pd(a,b); }
F64vec2_COMP(eq) /* expanded to cmpeq(a,b) */
F64vec2_COMP(lt) /* expanded to cmplt(a,b) */
F64vec2_COMP(le) /* expanded to cmple(a,b) */
F64vec2_COMP(gt) /* expanded to cmpgt(a,b) */
F64vec2_COMP(ge) /* expanded to cmpge(a,b) */
F64vec2_COMP(ngt) /* expanded to cmpngt(a,b) */
F64vec2_COMP(nge) /* expanded to cmpnge(a,b) */
F64vec2_COMP(neq) /* expanded to cmpneq(a,b) */
F64vec2_COMP(nlt) /* expanded to cmpnlt(a,b) */
F64vec2_COMP(nle) /* expanded to cmpnle(a,b) */
#undef F64vec2_COMP
/* Min and Max */
friend F64vec2 simd_min(const F64vec2 &a, const F64vec2 &b) { return _mm_min_pd(a,b); }
friend F64vec2 simd_max(const F64vec2 &a, const F64vec2 &b) { return _mm_max_pd(a,b); }
/* Compare lower DP FP values */
#define F64vec2_COMI(op) \
friend int comi##op (const F64vec2 &a, const F64vec2 &b) { return _mm_comi##op##_sd(a,b); }
F64vec2_COMI(eq) /* expanded to comieq(a,b) */
F64vec2_COMI(lt) /* expanded to comilt(a,b) */
F64vec2_COMI(le) /* expanded to comile(a,b) */
F64vec2_COMI(gt) /* expanded to comigt(a,b) */
F64vec2_COMI(ge) /* expanded to comige(a,b) */
F64vec2_COMI(neq) /* expanded to comineq(a,b) */
#undef F64vec2_COMI
/* Compare lower DP FP values */
#define F64vec2_UCOMI(op) \
friend int ucomi##op (const F64vec2 &a, const F64vec2 &b) { return _mm_ucomi##op##_sd(a,b); }
F64vec2_UCOMI(eq) /* expanded to ucomieq(a,b) */
F64vec2_UCOMI(lt) /* expanded to ucomilt(a,b) */
F64vec2_UCOMI(le) /* expanded to ucomile(a,b) */
F64vec2_UCOMI(gt) /* expanded to ucomigt(a,b) */
F64vec2_UCOMI(ge) /* expanded to ucomige(a,b) */
F64vec2_UCOMI(neq) /* expanded to ucomineq(a,b) */
#undef F64vec2_UCOMI
/* Debug Features */
#if defined (_ENABLE_VEC_DEBUG)
/* Output */
friend std::ostream & operator<<(std::ostream & os, const F64vec2 &a)
{
/* To use: cout << "Elements of F64vec2 fvec are: " << fvec; */
double *dp = (double*)&a;
os << " [1]:" << *(dp+1)
<< " [0]:" << *dp;
return os;
}
#endif /* defined (_ENABLE_VEC_DEBUG) */
/* Element Access Only, no modifications to elements*/
const double& operator[](int i) const
{
/* Assert enabled only during debug /DDEBUG */
_VEC_ASSERT((0 <= i) && (i <= 1)); /* User should only access elements 0-1 */
double *dp = (double*)&vec;
return *(dp+i);
}
/* Element Access and Modification*/
double& operator[](int i)
{
/* Assert enabled only during debug /DDEBUG */
_VEC_ASSERT((0 <= i) && (i <= 1)); /* User should only access elements 0-1 */
double *dp = (double*)&vec;
return *(dp+i);
}
};
/* Miscellaneous */
/* Interleave low order data elements of a and b into destination */
inline F64vec2 unpack_low(const F64vec2 &a, const F64vec2 &b)
{ return _mm_unpacklo_pd(a, b); }
/* Interleave high order data elements of a and b into target */
inline F64vec2 unpack_high(const F64vec2 &a, const F64vec2 &b)
{ return _mm_unpackhi_pd(a, b); }
/* Move Mask to Integer returns 4 bit mask formed of most significant bits of a */
inline int move_mask(const F64vec2 &a)
{ return _mm_movemask_pd(a);}
/* Data Motion Functions */
/* Load Unaligned loadu_pd: Unaligned */
inline void loadu(F64vec2 &a, double *p)
{ a = _mm_loadu_pd(p); }
/* Store Temporal storeu_pd: Unaligned */
inline void storeu(double *p, const F64vec2 &a)
{ _mm_storeu_pd(p, a); }
/* Cacheability Support */
/* Non-Temporal Store */
inline void store_nta(double *p, F64vec2 &a)
{ _mm_stream_pd(p,a);}
#define F64vec2_SELECT(op) \
inline F64vec2 select_##op (const F64vec2 &a, const F64vec2 &b, const F64vec2 &c, const F64vec2 &d) \
{ \
F64vec2 mask = _mm_cmp##op##_pd(a,b); \
return( (mask & c) | F64vec2((_mm_andnot_pd(mask,d)))); \
}
F64vec2_SELECT(eq) /* generates select_eq(a,b) */
F64vec2_SELECT(lt) /* generates select_lt(a,b) */
F64vec2_SELECT(le) /* generates select_le(a,b) */
F64vec2_SELECT(gt) /* generates select_gt(a,b) */
F64vec2_SELECT(ge) /* generates select_ge(a,b) */
F64vec2_SELECT(neq) /* generates select_neq(a,b) */
F64vec2_SELECT(nlt) /* generates select_nlt(a,b) */
F64vec2_SELECT(nle) /* generates select_nle(a,b) */
#undef F64vec2_SELECT
/* Convert the lower DP FP value of a to a 32 bit signed integer using Truncate*/
inline int F64vec2ToInt(const F64vec2 &a)
{
return _mm_cvttsd_si32(a);
}
/* Convert the 4 SP FP values of a to DP FP values */
inline F64vec2 F32vec4ToF64vec2(const F32vec4 &a)
{
return _mm_cvtps_pd(a);
}
/* Convert the 2 DP FP values of a to SP FP values */
inline F32vec4 F64vec2ToF32vec4(const F64vec2 &a)
{
return _mm_cvtpd_ps(a);
}
/* Convert the signed int in b to a DP FP value. Upper DP FP value in a passed through */
inline F64vec2 IntToF64vec2(const F64vec2 &a, int b)
{
return _mm_cvtsi32_sd(a,b);
}
#pragma pack(pop) /* 16-B aligned */
#ifdef _MSC_VER
#pragma pack(pop)
#endif /* _MSC_VER */
#endif /* defined (_M_CEE_PURE) */
#endif /* RC_INVOKED */
#endif /* _DVEC_H_INCLUDED */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -