📄 xmmintrin.h
字号:
extern int _mm_comineq_ss(__m128 _A, __m128 _B);
extern int _mm_ucomieq_ss(__m128 _A, __m128 _B);
extern int _mm_ucomilt_ss(__m128 _A, __m128 _B);
extern int _mm_ucomile_ss(__m128 _A, __m128 _B);
extern int _mm_ucomigt_ss(__m128 _A, __m128 _B);
extern int _mm_ucomige_ss(__m128 _A, __m128 _B);
extern int _mm_ucomineq_ss(__m128 _A, __m128 _B);
/*
* FP, conversions
*/
extern int _mm_cvt_ss2si(__m128 _A);
extern __m64 _mm_cvt_ps2pi(__m128 _A);
extern int _mm_cvtt_ss2si(__m128 _A);
extern __m64 _mm_cvtt_ps2pi(__m128 _A);
extern __m128 _mm_cvt_si2ss(__m128, int);
extern __m128 _mm_cvt_pi2ps(__m128, __m64);
extern float _mm_cvtss_f32(__m128 _A);
/*
* Support for 64-bit extension intrinsics
*/
#if defined (_M_AMD64)
extern __int64 _mm_cvtss_si64(__m128 _A);
extern __int64 _mm_cvttss_si64(__m128 _A);
extern __m128 _mm_cvtsi64_ss(__m128 _A, __int64 _B);
#endif /* defined (_M_AMD64) */
/*
* FP, misc
*/
extern __m128 _mm_shuffle_ps(__m128 _A, __m128 _B, unsigned int _Imm8);
extern __m128 _mm_unpackhi_ps(__m128 _A, __m128 _B);
extern __m128 _mm_unpacklo_ps(__m128 _A, __m128 _B);
extern __m128 _mm_loadh_pi(__m128, __m64 const*);
extern __m128 _mm_movehl_ps(__m128, __m128);
extern __m128 _mm_movelh_ps(__m128, __m128);
extern void _mm_storeh_pi(__m64 *, __m128);
extern __m128 _mm_loadl_pi(__m128, __m64 const*);
extern void _mm_storel_pi(__m64 *, __m128);
extern int _mm_movemask_ps(__m128 _A);
/*
* Integer extensions
*/
extern int _m_pextrw(__m64, int);
extern __m64 _m_pinsrw(__m64, int, int);
extern __m64 _m_pmaxsw(__m64, __m64);
extern __m64 _m_pmaxub(__m64, __m64);
extern __m64 _m_pminsw(__m64, __m64);
extern __m64 _m_pminub(__m64, __m64);
extern int _m_pmovmskb(__m64);
extern __m64 _m_pmulhuw(__m64, __m64);
extern __m64 _m_pshufw(__m64, int);
extern void _m_maskmovq(__m64, __m64, char *);
extern __m64 _m_pavgb(__m64, __m64);
extern __m64 _m_pavgw(__m64, __m64);
extern __m64 _m_psadbw(__m64, __m64);
/*
* memory & initialization
*/
extern __m128 _mm_set_ss(float _A);
extern __m128 _mm_set_ps1(float _A);
extern __m128 _mm_set_ps(float _A, float _B, float _C, float _D);
extern __m128 _mm_setr_ps(float _A, float _B, float _C, float _D);
extern __m128 _mm_setzero_ps(void);
extern __m128 _mm_load_ss(float const*_A);
extern __m128 _mm_load_ps1(float const*_A);
extern __m128 _mm_load_ps(float const*_A);
extern __m128 _mm_loadr_ps(float const*_A);
extern __m128 _mm_loadu_ps(float const*_A);
extern void _mm_store_ss(float *_V, __m128 _A);
extern void _mm_store_ps1(float *_V, __m128 _A);
extern void _mm_store_ps(float *_V, __m128 _A);
extern void _mm_storer_ps(float *_V, __m128 _A);
extern void _mm_storeu_ps(float *_V, __m128 _A);
extern void _mm_prefetch(char const*_A, int _Sel);
extern void _mm_stream_pi(__m64 *, __m64);
extern void _mm_stream_ps(float *, __m128);
extern __m128 _mm_move_ss(__m128 _A, __m128 _B);
extern void _mm_sfence(void);
extern unsigned int _mm_getcsr(void);
extern void _mm_setcsr(unsigned int);
#ifdef __ICL
extern void* __cdecl _mm_malloc(size_t _Siz, size_t _Al);
extern void __cdecl _mm_free(void *_P);
#endif /* __ICL */
/* Alternate intrinsic names definition */
#define _mm_cvtss_si32 _mm_cvt_ss2si
#define _mm_cvtps_pi32 _mm_cvt_ps2pi
#define _mm_cvttss_si32 _mm_cvtt_ss2si
#define _mm_cvttps_pi32 _mm_cvtt_ps2pi
#define _mm_cvtsi32_ss _mm_cvt_si2ss
#define _mm_cvtpi32_ps _mm_cvt_pi2ps
#define _mm_extract_pi16 _m_pextrw
#define _mm_insert_pi16 _m_pinsrw
#define _mm_max_pi16 _m_pmaxsw
#define _mm_max_pu8 _m_pmaxub
#define _mm_min_pi16 _m_pminsw
#define _mm_min_pu8 _m_pminub
#define _mm_movemask_pi8 _m_pmovmskb
#define _mm_mulhi_pu16 _m_pmulhuw
#define _mm_shuffle_pi16 _m_pshufw
#define _mm_maskmove_si64 _m_maskmovq
#define _mm_avg_pu8 _m_pavgb
#define _mm_avg_pu16 _m_pavgw
#define _mm_sad_pu8 _m_psadbw
#define _mm_set1_ps _mm_set_ps1
#define _mm_load1_ps _mm_load_ps1
#define _mm_store1_ps _mm_store_ps1
/******************************************************/
/* UTILITY INTRINSICS FUNCTION DEFINITIONS START HERE */
/******************************************************/
/*********************************************************/
/* NAME : _mm_cvtpi16_ps */
/* DESCRIPTION : Convert 4 16-bit signed integer values */
/* to 4 single-precision float values */
/* IN : __m64 a */
/* OUT : none */
/* RETURN : __m128 : (float)a */
/*********************************************************/
__inline __m128 _mm_cvtpi16_ps(__m64 a)
{
__m128 tmp;
__m64 ext_val = _mm_cmpgt_pi16(_mm_setzero_si64(), a);
tmp = _mm_cvtpi32_ps(_mm_setzero_ps(), _mm_unpackhi_pi16(a, ext_val));
return(_mm_cvtpi32_ps(_mm_movelh_ps(tmp, tmp),
_mm_unpacklo_pi16(a, ext_val)));
}
/***********************************************************/
/* NAME : _mm_cvtpu16_ps */
/* DESCRIPTION : Convert 4 16-bit unsigned integer values */
/* to 4 single-precision float values */
/* IN : __m64 a */
/* OUT : none */
/* RETURN : __m128 : (float)a */
/***********************************************************/
__inline __m128 _mm_cvtpu16_ps(__m64 a)
{
__m128 tmp;
__m64 ext_val = _mm_setzero_si64();
tmp = _mm_cvtpi32_ps(_mm_setzero_ps(), _mm_unpackhi_pi16(a, ext_val));
return(_mm_cvtpi32_ps(_mm_movelh_ps(tmp, tmp),
_mm_unpacklo_pi16(a, ext_val)));
}
/******************************************************/
/* NAME : _mm_cvtps_pi16 */
/* DESCRIPTION : Convert 4 single-precision float */
/* values to 4 16-bit integer values */
/* IN : __m128 a */
/* OUT : none */
/* RETURN : __m64 : (short)a */
/******************************************************/
__inline __m64 _mm_cvtps_pi16(__m128 a)
{
return _mm_packs_pi32(_mm_cvtps_pi32(a),
_mm_cvtps_pi32(_mm_movehl_ps(a, a)));
}
/******************************************************/
/* NAME : _mm_cvtpi8_ps */
/* DESCRIPTION : Convert 4 8-bit integer values to 4 */
/* single-precision float values */
/* IN : __m64 a */
/* OUT : none */
/* RETURN : __m128 : (float)a */
/******************************************************/
__inline __m128 _mm_cvtpi8_ps(__m64 a)
{
__m64 ext_val = _mm_cmpgt_pi8(_mm_setzero_si64(), a);
return _mm_cvtpi16_ps(_mm_unpacklo_pi8(a, ext_val));
}
/******************************************************/
/* NAME : _mm_cvtpu8_ps */
/* DESCRIPTION : Convert 4 8-bit unsigned integer */
/* values to 4 single-precision float */
/* values */
/* IN : __m64 a */
/* OUT : none */
/* RETURN : __m128 : (float)a */
/******************************************************/
__inline __m128 _mm_cvtpu8_ps(__m64 a)
{
return _mm_cvtpu16_ps(_mm_unpacklo_pi8(a, _mm_setzero_si64()));
}
/******************************************************/
/* NAME : _mm_cvtps_pi8 */
/* DESCRIPTION : Convert 4 single-precision float */
/* values to 4 8-bit integer values */
/* IN : __m128 a */
/* OUT : none */
/* RETURN : __m64 : (char)a */
/******************************************************/
__inline __m64 _mm_cvtps_pi8(__m128 a)
{
return _mm_packs_pi16(_mm_cvtps_pi16(a), _mm_setzero_si64());
}
/******************************************************/
/* NAME : _mm_cvtpi32x2_ps */
/* DESCRIPTION : Convert 4 32-bit integer values */
/* to 4 single-precision float values */
/* IN : __m64 a : operand 1 */
/* __m64 b : operand 2 */
/* OUT : none */
/* RETURN : __m128 : (float)a,(float)b */
/******************************************************/
__inline __m128 _mm_cvtpi32x2_ps(__m64 a, __m64 b)
{
return _mm_movelh_ps(_mm_cvt_pi2ps(_mm_setzero_ps(), a),
_mm_cvt_pi2ps(_mm_setzero_ps(), b));
}
#if defined __cplusplus
}; /* End "C" */
#endif /* defined __cplusplus */
#endif /* defined (_M_CEE_PURE) */
#endif /* _INCLUDED_MM2 */
#endif /* __midl */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -