📄 xmmintrin.h
字号:
{ return (__m64) __builtin_ia32_pavgb ((__v8qi)__A, (__v8qi)__B);}/* Compute the rounded averages of the unsigned 16-bit values in A and B. */static __inline __m64_mm_avg_pu16 (__m64 __A, __m64 __B){ return (__m64) __builtin_ia32_pavgw ((__v4hi)__A, (__v4hi)__B);}/* Compute the sum of the absolute differences of the unsigned 8-bit values in A and B. Return the value in the lower 16-bit word; the upper words are cleared. */static __inline __m64_mm_sad_pu8 (__m64 __A, __m64 __B){ return (__m64) __builtin_ia32_psadbw ((__v8qi)__A, (__v8qi)__B);}/* Loads one cache line from address P to a location "closer" to the processor. The selector I specifies the type of prefetch operation. */#if 0static __inline void_mm_prefetch (void *__P, enum _mm_hint __I){ __builtin_prefetch (__P, 0, __I);}#else#define _mm_prefetch(P, I) \ __builtin_prefetch ((P), 0, (I))#endif/* Stores the data in A to the address P without polluting the caches. */static __inline void_mm_stream_pi (__m64 *__P, __m64 __A){ __builtin_ia32_movntq ((unsigned long long *)__P, (unsigned long long)__A);}/* Likewise. The address must be 16-byte aligned. */static __inline void_mm_stream_ps (float *__P, __m128 __A){ __builtin_ia32_movntps (__P, (__v4sf)__A);}/* Guarantees that every preceeding store is globally visible before any subsequent store. */static __inline void_mm_sfence (void){ __builtin_ia32_sfence ();}/* The execution of the next instruction is delayed by an implementation specific amount of time. The instruction does not modify the architectural state. */static __inline void_mm_pause (void){ __asm__ __volatile__ ("rep; nop" : : );}/* Transpose the 4x4 matrix composed of row[0-3]. */#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \do { \ __v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3); \ __v4sf __t0 = __builtin_ia32_shufps (__r0, __r1, 0x44); \ __v4sf __t2 = __builtin_ia32_shufps (__r0, __r1, 0xEE); \ __v4sf __t1 = __builtin_ia32_shufps (__r2, __r3, 0x44); \ __v4sf __t3 = __builtin_ia32_shufps (__r2, __r3, 0xEE); \ (row0) = __builtin_ia32_shufps (__t0, __t1, 0x88); \ (row1) = __builtin_ia32_shufps (__t0, __t1, 0xDD); \ (row2) = __builtin_ia32_shufps (__t2, __t3, 0x88); \ (row3) = __builtin_ia32_shufps (__t2, __t3, 0xDD); \} while (0)#ifdef __SSE2__/* SSE2 */typedef int __v2df __attribute__ ((mode (V2DF)));typedef int __v2di __attribute__ ((mode (V2DI)));typedef int __v4si __attribute__ ((mode (V4SI)));typedef int __v8hi __attribute__ ((mode (V8HI)));typedef int __v16qi __attribute__ ((mode (V16QI)));/* Create a selector for use with the SHUFPD instruction. */#define _MM_SHUFFLE2(fp1,fp0) \ (((fp1) << 1) | (fp0))#define __m128i __v2di#define __m128d __v2df/* Create a vector with element 0 as *P and the rest zero. */static __inline __m128d_mm_load_sd (double const *__P){ return (__m128d) __builtin_ia32_loadsd (__P);}/* Create a vector with all two elements equal to *P. */static __inline __m128d_mm_load1_pd (double const *__P){ __v2df __tmp = __builtin_ia32_loadsd (__P); return (__m128d) __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,0));}static __inline __m128d_mm_load_pd1 (double const *__P){ return _mm_load1_pd (__P);}/* Load two DPFP values from P. The addresd must be 16-byte aligned. */static __inline __m128d_mm_load_pd (double const *__P){ return (__m128d) __builtin_ia32_loadapd (__P);}/* Load two DPFP values from P. The addresd need not be 16-byte aligned. */static __inline __m128d_mm_loadu_pd (double const *__P){ return (__m128d) __builtin_ia32_loadupd (__P);}/* Load two DPFP values in reverse order. The addresd must be aligned. */static __inline __m128d_mm_loadr_pd (double const *__P){ __v2df __tmp = __builtin_ia32_loadapd (__P); return (__m128d) __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1));}/* Create a vector with element 0 as F and the rest zero. */static __inline __m128d_mm_set_sd (double __F){ return (__m128d) __builtin_ia32_loadsd (&__F);}/* Create a vector with all two elements equal to F. */static __inline __m128d_mm_set1_pd (double __F){ __v2df __tmp = __builtin_ia32_loadsd (&__F); return (__m128d) __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,0));}static __inline __m128d_mm_set_pd1 (double __F){ return _mm_set1_pd (__F);}/* Create the vector [Z Y]. */static __inline __m128d_mm_set_pd (double __Z, double __Y){ union { double __a[2]; __m128d __v; } __u; __u.__a[0] = __Y; __u.__a[1] = __Z; return __u.__v;}/* Create the vector [Y Z]. */static __inline __m128d_mm_setr_pd (double __Z, double __Y){ return _mm_set_pd (__Y, __Z);}/* Create a vector of zeros. */static __inline __m128d_mm_setzero_pd (void){ return (__m128d) __builtin_ia32_setzeropd ();}/* Stores the lower DPFP value. */static __inline void_mm_store_sd (double *__P, __m128d __A){ __builtin_ia32_storesd (__P, (__v2df)__A);}/* Store the lower DPFP value acrosd two words. */static __inline void_mm_store1_pd (double *__P, __m128d __A){ __v2df __va = (__v2df)__A; __v2df __tmp = __builtin_ia32_shufpd (__va, __va, _MM_SHUFFLE2 (0,0)); __builtin_ia32_storeapd (__P, __tmp);}static __inline void_mm_store_pd1 (double *__P, __m128d __A){ _mm_store1_pd (__P, __A);}/* Store two DPFP values. The addresd must be 16-byte aligned. */static __inline void_mm_store_pd (double *__P, __m128d __A){ __builtin_ia32_storeapd (__P, (__v2df)__A);}/* Store two DPFP values. The addresd need not be 16-byte aligned. */static __inline void_mm_storeu_pd (double *__P, __m128d __A){ __builtin_ia32_storeupd (__P, (__v2df)__A);}/* Store two DPFP values in reverse order. The addresd must be aligned. */static __inline void_mm_storer_pd (double *__P, __m128d __A){ __v2df __va = (__v2df)__A; __v2df __tmp = __builtin_ia32_shufpd (__va, __va, _MM_SHUFFLE2 (0,1)); __builtin_ia32_storeapd (__P, __tmp);}/* Sets the low DPFP value of A from the low value of B. */static __inline __m128d_mm_move_sd (__m128d __A, __m128d __B){ return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_add_pd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_add_sd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_sub_pd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_sub_sd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_mul_pd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_mul_sd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_div_pd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_div_sd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_sqrt_pd (__m128d __A){ return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);}/* Return pair {sqrt (A[0), B[1]}. */static __inline __m128d_mm_sqrt_sd (__m128d __A, __m128d __B){ __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B); return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);}static __inline __m128d_mm_min_pd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_min_sd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_max_pd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_max_sd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_and_pd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_andnot_pd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_or_pd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_xor_pd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_cmpeq_pd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_cmplt_pd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_cmple_pd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_cmpgt_pd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_cmpge_pd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_cmpneq_pd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_cmpnlt_pd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_cmpnle_pd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_cmpngt_pd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_cmpnge_pd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_cmpord_pd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_cmpunord_pd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_cmpeq_sd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_cmplt_sd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_cmple_sd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_cmpgt_sd (__m128d __A, __m128d __B){ return (__m128d) __builtin_ia32_movsd ((__v2df) __A, (__v2df) __builtin_ia32_cmpltsd ((__v2df) __B, (__v2df) __A));}static __inline __m128d_mm_cmpge_sd (__m128d __A, __m128d __B){ return (__m128d) __builtin_ia32_movsd ((__v2df) __A, (__v2df) __builtin_ia32_cmplesd ((__v2df) __B, (__v2df) __A));}static __inline __m128d_mm_cmpneq_sd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_cmpnlt_sd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_cmpnle_sd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_cmpngt_sd (__m128d __A, __m128d __B){ return (__m128d) __builtin_ia32_movsd ((__v2df) __A, (__v2df) __builtin_ia32_cmpnltsd ((__v2df) __B, (__v2df) __A));}static __inline __m128d_mm_cmpnge_sd (__m128d __A, __m128d __B){ return (__m128d) __builtin_ia32_movsd ((__v2df) __A, (__v2df) __builtin_ia32_cmpnlesd ((__v2df) __B, (__v2df) __A));}static __inline __m128d_mm_cmpord_sd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);}static __inline __m128d_mm_cmpunord_sd (__m128d __A, __m128d __B){ return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);}static __inline int_mm_comieq_sd (__m128d __A, __m128d __B){ return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -