📄 xmmintrin.h
字号:
static __inline int
_mm_comile_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_comile ((__v4sf)__A, (__v4sf)__B);
}
static __inline int
_mm_comigt_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_comigt ((__v4sf)__A, (__v4sf)__B);
}
static __inline int
_mm_comige_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_comige ((__v4sf)__A, (__v4sf)__B);
}
static __inline int
_mm_comineq_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_comineq ((__v4sf)__A, (__v4sf)__B);
}
static __inline int
_mm_ucomieq_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_ucomieq ((__v4sf)__A, (__v4sf)__B);
}
static __inline int
_mm_ucomilt_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_ucomilt ((__v4sf)__A, (__v4sf)__B);
}
static __inline int
_mm_ucomile_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_ucomile ((__v4sf)__A, (__v4sf)__B);
}
static __inline int
_mm_ucomigt_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_ucomigt ((__v4sf)__A, (__v4sf)__B);
}
static __inline int
_mm_ucomige_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_ucomige ((__v4sf)__A, (__v4sf)__B);
}
static __inline int
_mm_ucomineq_ss (__m128 __A, __m128 __B)
{
return __builtin_ia32_ucomineq ((__v4sf)__A, (__v4sf)__B);
}
/* Convert the lower SPFP value to a 32-bit integer according to the current
rounding mode. */
static __inline int
_mm_cvtss_si32 (__m128 __A)
{
return __builtin_ia32_cvtss2si ((__v4sf) __A);
}
static __inline int
_mm_cvt_ss2si (__m128 __A)
{
return _mm_cvtss_si32 (__A);
}
#ifdef __x86_64__
/* Convert the lower SPFP value to a 32-bit integer according to the current
rounding mode. */
static __inline long long
_mm_cvtss_si64x (__m128 __A)
{
return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
}
#endif
/* Convert the two lower SPFP values to 32-bit integers according to the
current rounding mode. Return the integers in packed form. */
static __inline __m64
_mm_cvtps_pi32 (__m128 __A)
{
return (__m64) __builtin_ia32_cvtps2pi ((__v4sf) __A);
}
static __inline __m64
_mm_cvt_ps2pi (__m128 __A)
{
return _mm_cvtps_pi32 (__A);
}
/* Truncate the lower SPFP value to a 32-bit integer. */
static __inline int
_mm_cvttss_si32 (__m128 __A)
{
return __builtin_ia32_cvttss2si ((__v4sf) __A);
}
static __inline int
_mm_cvtt_ss2si (__m128 __A)
{
return _mm_cvttss_si32 (__A);
}
#ifdef __x86_64__
/* Truncate the lower SPFP value to a 32-bit integer. */
static __inline long long
_mm_cvttss_si64x (__m128 __A)
{
return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
}
#endif
/* Truncate the two lower SPFP values to 32-bit integers. Return the
integers in packed form. */
static __inline __m64
_mm_cvttps_pi32 (__m128 __A)
{
return (__m64) __builtin_ia32_cvttps2pi ((__v4sf) __A);
}
static __inline __m64
_mm_cvtt_ps2pi (__m128 __A)
{
return _mm_cvttps_pi32 (__A);
}
/* Convert B to a SPFP value and insert it as element zero in A. */
static __inline __m128
_mm_cvtsi32_ss (__m128 __A, int __B)
{
return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
}
static __inline __m128
_mm_cvt_si2ss (__m128 __A, int __B)
{
return _mm_cvtsi32_ss (__A, __B);
}
#ifdef __x86_64__
/* Convert B to a SPFP value and insert it as element zero in A. */
static __inline __m128
_mm_cvtsi64x_ss (__m128 __A, long long __B)
{
return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
}
#endif
/* Convert the two 32-bit values in B to SPFP form and insert them
as the two lower elements in A. */
static __inline __m128
_mm_cvtpi32_ps (__m128 __A, __m64 __B)
{
return (__m128) __builtin_ia32_cvtpi2ps ((__v4sf) __A, (__v2si)__B);
}
static __inline __m128
_mm_cvt_pi2ps (__m128 __A, __m64 __B)
{
return _mm_cvtpi32_ps (__A, __B);
}
/* Convert the four signed 16-bit values in A to SPFP form. */
static __inline __m128
_mm_cvtpi16_ps (__m64 __A)
{
__v4hi __sign;
__v2si __hisi, __losi;
__v4sf __r;
/* This comparison against zero gives us a mask that can be used to
fill in the missing sign bits in the unpack operations below, so
that we get signed values after unpacking. */
__sign = (__v4hi) __builtin_ia32_mmx_zero ();
__sign = __builtin_ia32_pcmpgtw (__sign, (__v4hi)__A);
/* Convert the four words to doublewords. */
__hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, __sign);
__losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, __sign);
/* Convert the doublewords to floating point two at a time. */
__r = (__v4sf) __builtin_ia32_setzerops ();
__r = __builtin_ia32_cvtpi2ps (__r, __hisi);
__r = __builtin_ia32_movlhps (__r, __r);
__r = __builtin_ia32_cvtpi2ps (__r, __losi);
return (__m128) __r;
}
/* Convert the four unsigned 16-bit values in A to SPFP form. */
static __inline __m128
_mm_cvtpu16_ps (__m64 __A)
{
__v4hi __zero = (__v4hi) __builtin_ia32_mmx_zero ();
__v2si __hisi, __losi;
__v4sf __r;
/* Convert the four words to doublewords. */
__hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, __zero);
__losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, __zero);
/* Convert the doublewords to floating point two at a time. */
__r = (__v4sf) __builtin_ia32_setzerops ();
__r = __builtin_ia32_cvtpi2ps (__r, __hisi);
__r = __builtin_ia32_movlhps (__r, __r);
__r = __builtin_ia32_cvtpi2ps (__r, __losi);
return (__m128) __r;
}
/* Convert the low four signed 8-bit values in A to SPFP form. */
static __inline __m128
_mm_cvtpi8_ps (__m64 __A)
{
__v8qi __sign;
/* This comparison against zero gives us a mask that can be used to
fill in the missing sign bits in the unpack operations below, so
that we get signed values after unpacking. */
__sign = (__v8qi) __builtin_ia32_mmx_zero ();
__sign = __builtin_ia32_pcmpgtb (__sign, (__v8qi)__A);
/* Convert the four low bytes to words. */
__A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, __sign);
return _mm_cvtpi16_ps(__A);
}
/* Convert the low four unsigned 8-bit values in A to SPFP form. */
static __inline __m128
_mm_cvtpu8_ps(__m64 __A)
{
__v8qi __zero = (__v8qi) __builtin_ia32_mmx_zero ();
__A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, __zero);
return _mm_cvtpu16_ps(__A);
}
/* Convert the four signed 32-bit values in A and B to SPFP form. */
static __inline __m128
_mm_cvtpi32x2_ps(__m64 __A, __m64 __B)
{
__v4sf __zero = (__v4sf) __builtin_ia32_setzerops ();
__v4sf __sfa = __builtin_ia32_cvtpi2ps (__zero, (__v2si)__A);
__v4sf __sfb = __builtin_ia32_cvtpi2ps (__zero, (__v2si)__B);
return (__m128) __builtin_ia32_movlhps (__sfa, __sfb);
}
/* Convert the four SPFP values in A to four signed 16-bit integers. */
static __inline __m64
_mm_cvtps_pi16(__m128 __A)
{
__v4sf __hisf = (__v4sf)__A;
__v4sf __losf = __builtin_ia32_movhlps (__hisf, __hisf);
__v2si __hisi = __builtin_ia32_cvtps2pi (__hisf);
__v2si __losi = __builtin_ia32_cvtps2pi (__losf);
return (__m64) __builtin_ia32_packssdw (__hisi, __losi);
}
/* Convert the four SPFP values in A to four signed 8-bit integers. */
static __inline __m64
_mm_cvtps_pi8(__m128 __A)
{
__v4hi __tmp = (__v4hi) _mm_cvtps_pi16 (__A);
__v4hi __zero = (__v4hi) __builtin_ia32_mmx_zero ();
return (__m64) __builtin_ia32_packsswb (__tmp, __zero);
}
/* Selects four specific SPFP values from A and B based on MASK. */
#if 0
static __inline __m128
_mm_shuffle_ps (__m128 __A, __m128 __B, int __mask)
{
return (__m128) __builtin_ia32_shufps ((__v4sf)__A, (__v4sf)__B, __mask);
}
#else
#define _mm_shuffle_ps(A, B, MASK) \
((__m128) __builtin_ia32_shufps ((__v4sf)(A), (__v4sf)(B), (MASK)))
#endif
/* Selects and interleaves the upper two SPFP values from A and B. */
static __inline __m128
_mm_unpackhi_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_unpckhps ((__v4sf)__A, (__v4sf)__B);
}
/* Selects and interleaves the lower two SPFP values from A and B. */
static __inline __m128
_mm_unpacklo_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_unpcklps ((__v4sf)__A, (__v4sf)__B);
}
/* Sets the upper two SPFP values with 64-bits of data loaded from P;
the lower two values are passed through from A. */
static __inline __m128
_mm_loadh_pi (__m128 __A, __m64 const *__P)
{
return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (__v2si *)__P);
}
/* Stores the upper two SPFP values of A into P. */
static __inline void
_mm_storeh_pi (__m64 *__P, __m128 __A)
{
__builtin_ia32_storehps ((__v2si *)__P, (__v4sf)__A);
}
/* Moves the upper two values of B into the lower two values of A. */
static __inline __m128
_mm_movehl_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_movhlps ((__v4sf)__A, (__v4sf)__B);
}
/* Moves the lower two values of B into the upper two values of A. */
static __inline __m128
_mm_movelh_ps (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_movlhps ((__v4sf)__A, (__v4sf)__B);
}
/* Sets the lower two SPFP values with 64-bits of data loaded from P;
the upper two values are passed through from A. */
static __inline __m128
_mm_loadl_pi (__m128 __A, __m64 const *__P)
{
return (__m128) __builtin_ia32_loadlps ((__v4sf)__A, (__v2si *)__P);
}
/* Stores the lower two SPFP values of A into P. */
static __inline void
_mm_storel_pi (__m64 *__P, __m128 __A)
{
__builtin_ia32_storelps ((__v2si *)__P, (__v4sf)__A);
}
/* Creates a 4-bit mask from the most significant bits of the SPFP values. */
static __inline int
_mm_movemask_ps (__m128 __A)
{
return __builtin_ia32_movmskps ((__v4sf)__A);
}
/* Return the contents of the control register. */
static __inline unsigned int
_mm_getcsr (void)
{
return __builtin_ia32_stmxcsr ();
}
/* Read exception bits from the control register. */
static __inline unsigned int
_MM_GET_EXCEPTION_STATE (void)
{
return _mm_getcsr() & _MM_EXCEPT_MASK;
}
static __inline unsigned int
_MM_GET_EXCEPTION_MASK (void)
{
return _mm_getcsr() & _MM_MASK_MASK;
}
static __inline unsigned int
_MM_GET_ROUNDING_MODE (void)
{
return _mm_getcsr() & _MM_ROUND_MASK;
}
static __inline unsigned int
_MM_GET_FLUSH_ZERO_MODE (void)
{
return _mm_getcsr() & _MM_FLUSH_ZERO_MASK;
}
/* Set the control register to I. */
static __inline void
_mm_setcsr (unsigned int __I)
{
__builtin_ia32_ldmxcsr (__I);
}
/* Set exception bits in the control register. */
static __inline void
_MM_SET_EXCEPTION_STATE(unsigned int __mask)
{
_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | __mask);
}
static __inline void
_MM_SET_EXCEPTION_MASK (unsigned int __mask)
{
_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | __mask);
}
static __inline void
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -