📄 dvec.h
字号:
/***
*** Copyright (C) 1985-1999 Intel Corporation. All rights reserved.
***
*** The information and source code contained herein is the exclusive
*** property of Intel Corporation and may not be disclosed, examined
*** or reproduced in whole or in part without explicit written authorization
*** from the company.
***
****/
/*
* Definition of a C++ class interface to Willamette New Instruction intrinsics.
*
* File name : dvec.h class definitions
*
* Concept: A C++ abstraction of Willamette new intructions designed to improve
* programmer productivity. Speed and accuracy are sacrificed for utility.
* Facilitates an easy transition to compiler intrinsics
* or assembly language.
*
*/
#ifndef _DVEC_H_INCLUDED
#define _DVEC_H_INCLUDED
#ifndef RC_INVOKED
#if !defined __cplusplus
#error ERROR: This file is only supported in C++ compilations!
#endif /* !defined __cplusplus */
#if defined (_M_CEE_PURE)
#error ERROR: This file is not supported in the pure mode!
#else /* defined (_M_CEE_PURE) */
#include <emmintrin.h> /* Willamette New Instructions Intrinsics include file */
#include <fvec.h>
#include <crtdefs.h>
#ifndef _VEC_ASSERT
#ifdef NDEBUG
#define _VEC_ASSERT(_Expression) ((void)0)
#else /* NDEBUG */
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
_CRTIMP void __cdecl _wassert(_In_z_ const wchar_t * _Message, _In_z_ const wchar_t *_File, _In_ unsigned _Line);
#ifdef __cplusplus
}
#endif /* __cplusplus */
#define _VEC_ASSERT(_Expression) (void)( (!!(_Expression)) || (_wassert(_CRT_WIDE(#_Expression), _CRT_WIDE(__FILE__), __LINE__), 0) )
#endif /* NDEBUG */
#endif /* _VEC_ASSERT */
#ifdef _MSC_VER
#pragma pack(push,_CRT_PACKING)
#endif /* _MSC_VER */
/* Define _ENABLE_VEC_DEBUG to enable std::ostream inserters for debug output */
#if defined (_ENABLE_VEC_DEBUG)
#include <iostream>
#endif /* defined (_ENABLE_VEC_DEBUG) */
#pragma pack(push,16) /* Must ensure class & union 16-B aligned */
/* EMM Functionality Intrinsics */
class I8vec16; /* 16 elements, each element a signed or unsigned char data type */
class Is8vec16; /* 16 elements, each element a signed char data type */
class Iu8vec16; /* 16 elements, each element an unsigned char data type */
class I16vec8; /* 8 elements, each element a signed or unsigned short */
class Is16vec8; /* 8 elements, each element a signed short */
class Iu16vec8; /* 8 elements, each element an unsigned short */
class I32vec4; /* 4 elements, each element a signed or unsigned long */
class Is32vec4; /* 4 elements, each element a signed long */
class Iu32vec4; /* 4 elements, each element a unsigned long */
class I64vec2; /* 2 element, each a __m64 data type */
class I128vec1; /* 1 element, a __m128i data type */
#define _MM_16UB(element,vector) (*((unsigned char*)&##vector + ##element))
#define _MM_16B(element,vector) (*((signed char*)&##vector + ##element))
#define _MM_8UW(element,vector) (*((unsigned short*)&##vector + ##element))
#define _MM_8W(element,vector) (*((short*)&##vector + ##element))
#define _MM_4UDW(element,vector) (*((unsigned int*)&##vector + ##element))
#define _MM_4DW(element,vector) (*((int*)&##vector + ##element))
#define _MM_2QW(element,vector) (*((__int64*)&##vector + ##element))
/* We need a m128i constant, keeping performance in mind*/
#pragma warning(push)
#pragma warning(disable : 4640)
inline const __m128i get_mask128()
{
static const __m128i mask128 = _mm_set1_epi64(M64(0xffffffffffffffffi64));
return mask128;
}
#pragma warning(pop)
//DEVDIV Remove alais created in public\sdk\inc\winnt.h
#ifdef M128
#undef M128
#endif /* M128 */
#ifdef PM128
#undef PM128
#endif /* PM128 */
//end DEVDIV
/* M128 Class:
* 1 element, a __m128i data type
* Contructors & Logical Operations
*/
class M128
{
protected:
__m128i vec;
public:
M128() { }
M128(__m128i mm) { vec = mm; }
operator __m128i() const { return vec; }
/* Logical Operations */
M128& operator&=(const M128 &a) { return *this = (M128) _mm_and_si128(vec,a); }
M128& operator|=(const M128 &a) { return *this = (M128) _mm_or_si128(vec,a); }
M128& operator^=(const M128 &a) { return *this = (M128) _mm_xor_si128(vec,a); }
};
inline M128 operator&(const M128 &a, const M128 &b) { return _mm_and_si128(a,b); }
inline M128 operator|(const M128 &a, const M128 &b) { return _mm_or_si128(a,b); }
inline M128 operator^(const M128 &a, const M128 &b) { return _mm_xor_si128(a,b); }
inline M128 andnot(const M128 &a, const M128 &b) { return _mm_andnot_si128(a,b); }
/* I128vec1 Class:
* 1 element, a __m128i data type
* Contains Operations which can operate on any __m6128i data type
*/
class I128vec1 : public M128
{
public:
I128vec1() { }
I128vec1(__m128i mm) : M128(mm) { }
I128vec1& operator= (const M128 &a) { return *this = (I128vec1) a; }
I128vec1& operator&=(const M128 &a) { return *this = (I128vec1) _mm_and_si128(vec,a); }
I128vec1& operator|=(const M128 &a) { return *this = (I128vec1) _mm_or_si128(vec,a); }
I128vec1& operator^=(const M128 &a) { return *this = (I128vec1) _mm_xor_si128(vec,a); }
};
/* I64vec2 Class:
* 2 elements, each element signed or unsigned 64-bit integer
*/
class I64vec2 : public M128
{
public:
I64vec2() { }
I64vec2(__m128i mm) : M128(mm) { }
I64vec2(__m64 q1, __m64 q0)
{
_MM_2QW(0,vec) = *(__int64*)&q0;
_MM_2QW(1,vec) = *(__int64*)&q1;
}
/* Assignment Operator */
I64vec2& operator= (const M128 &a) { return *this = (I64vec2) a; }
/* Logical Assignment Operators */
I64vec2& operator&=(const M128 &a) { return *this = (I64vec2) _mm_and_si128(vec,a); }
I64vec2& operator|=(const M128 &a) { return *this = (I64vec2) _mm_or_si128(vec,a); }
I64vec2& operator^=(const M128 &a) { return *this = (I64vec2) _mm_xor_si128(vec,a); }
/* Addition & Subtraction Assignment Operators */
I64vec2& operator +=(const I64vec2 &a) { return *this = (I64vec2) _mm_add_epi64(vec,a); }
I64vec2& operator -=(const I64vec2 &a) { return *this = (I64vec2) _mm_sub_epi64(vec,a); }
/* Shift Logical Operators */
I64vec2 operator<<(const I64vec2 &a) { return _mm_sll_epi64(vec,a); }
I64vec2 operator<<(int count) { return _mm_slli_epi64(vec,count); }
I64vec2& operator<<=(const I64vec2 &a) { return *this = (I64vec2) _mm_sll_epi64(vec,a); }
I64vec2& operator<<=(int count) { return *this = (I64vec2) _mm_slli_epi64(vec,count); }
I64vec2 operator>>(const I64vec2 &a) { return _mm_srl_epi64(vec,a); }
I64vec2 operator>>(int count) { return _mm_srli_epi64(vec,count); }
I64vec2& operator>>=(const I64vec2 &a) { return *this = (I64vec2) _mm_srl_epi64(vec,a); }
I64vec2& operator>>=(int count) { return *this = (I64vec2) _mm_srli_epi64(vec,count); }
/* Element Access for Debug, No data modified */
const __int64& operator[](int i)const
{
_VEC_ASSERT(static_cast<unsigned int>(i) < 2); /* Only 2 elements to access */
return _MM_2QW(i,vec);
}
/* Element Access and Assignment for Debug */
__int64& operator[](int i)
{
_VEC_ASSERT(static_cast<unsigned int>(i) < 2); /* Only 2 elements to access */
return _MM_2QW(i,vec);
}
};
/* Unpacks */
inline I64vec2 unpack_low(const I64vec2 &a, const I64vec2 &b) {return _mm_unpacklo_epi64(a,b); }
inline I64vec2 unpack_high(const I64vec2 &a, const I64vec2 &b) {return _mm_unpackhi_epi64(a,b); }
/* I32vec4 Class:
* 4 elements, each element either a signed or unsigned int
*/
class I32vec4 : public M128
{
public:
I32vec4() { }
I32vec4(__m128i mm) : M128(mm) { }
/* Assignment Operator */
I32vec4& operator= (const M128 &a) { return *this = (I32vec4) a; }
/* Logicals Operators */
I32vec4& operator&=(const M128 &a) { return *this = (I32vec4) _mm_and_si128(vec,a); }
I32vec4& operator|=(const M128 &a) { return *this = (I32vec4) _mm_or_si128(vec,a); }
I32vec4& operator^=(const M128 &a) { return *this = (I32vec4) _mm_xor_si128(vec,a); }
/* Addition & Subtraction Assignment Operators */
I32vec4& operator +=(const I32vec4 &a) { return *this = (I32vec4)_mm_add_epi32(vec,a); }
I32vec4& operator -=(const I32vec4 &a) { return *this = (I32vec4)_mm_sub_epi32(vec,a); }
/* Shift Logical Operators */
I32vec4 operator<<(const I32vec4 &a) { return _mm_sll_epi32(vec,a); }
I32vec4 operator<<(int count) { return _mm_slli_epi32(vec,count); }
I32vec4& operator<<=(const I32vec4 &a) { return *this = (I32vec4)_mm_sll_epi32(vec,a); }
I32vec4& operator<<=(int count) { return *this = (I32vec4)_mm_slli_epi32(vec,count); }
};
inline I32vec4 cmpeq(const I32vec4 &a, const I32vec4 &b) { return _mm_cmpeq_epi32(a,b); }
inline I32vec4 cmpneq(const I32vec4 &a, const I32vec4 &b) { return _mm_andnot_si128(_mm_cmpeq_epi32(a,b), get_mask128()); }
inline I32vec4 unpack_low(const I32vec4 &a, const I32vec4 &b) { return _mm_unpacklo_epi32(a,b); }
inline I32vec4 unpack_high(const I32vec4 &a, const I32vec4 &b) { return _mm_unpackhi_epi32(a,b); }
/* Is32vec4 Class:
* 4 elements, each element signed integer
*/
class Is32vec4 : public I32vec4
{
public:
Is32vec4() { }
Is32vec4(__m128i mm) : I32vec4(mm) { }
Is32vec4(int i3, int i2, int i1, int i0)
{
_MM_4DW(0,vec) = i0;
_MM_4DW(1,vec) = i1;
_MM_4DW(2,vec) = i2;
_MM_4DW(3,vec) = i3;
}
/* Assignment Operator */
Is32vec4& operator= (const M128 &a) { return *this = (Is32vec4) a; }
/* Logical Operators */
Is32vec4& operator&=(const M128 &a) { return *this = (Is32vec4) _mm_and_si128(vec,a); }
Is32vec4& operator|=(const M128 &a) { return *this = (Is32vec4) _mm_or_si128(vec,a); }
Is32vec4& operator^=(const M128 &a) { return *this = (Is32vec4) _mm_xor_si128(vec,a); }
/* Addition & Subtraction Assignment Operators */
Is32vec4& operator +=(const I32vec4 &a) { return *this = (Is32vec4)_mm_add_epi32(vec,a); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -