📄 immintrin.h

📁 C语言库函数的原型,有用的拿去
💻 H
📖 第 1 页 / 共 4 页
字号:
 * **** VCVTPD2DQ xmm1, ymm2/m256
 * Converts four packed double-precision floating-point values in the source
 * operand to four packed signed doubleword integers in the destination
 */
extern __m128i __cdecl _mm256_cvtpd_epi32(__m256d m1);

/*
 * Convert with Truncation Packed Single Precision Floating-Point Values to
 * Packed Singed Doubleword Integer Values
 * **** VCVTTPS2DQ ymm1, ymm2/m256
 * Converts eight packed single-precision floating-point values in the source
 * operand to eight signed doubleword integers in the destination.
 * When a conversion is inexact, a truncated (round toward zero) value is
 * returned. If a converted result is larger than the maximum signed doubleword
 * integer, the floating-point invalid exception is raised, and if this
 * exception is masked, the indefinite integer value (80000000H) is returned
 */
extern __m256i __cdecl _mm256_cvttps_epi32(__m256 m1);

/*
 * Extract packed floating-point values
 * **** VEXTRACTF128 xmm1/m128, ymm2, imm8
 * Extracts 128-bits of packed floating-point values from the source operand
 * at an 128-bit offset from imm8[0] into the destination
 */
extern __m128  __cdecl _mm256_extractf128_ps(__m256 m1, const int offset);
extern __m128d __cdecl _mm256_extractf128_pd(__m256d m1, const int offset);
extern __m128i __cdecl _mm256_extractf128_si256(__m256i m1, const int offset);

/*
 * Zero All YMM registers
 * **** VZEROALL
 * Zeros contents of all YMM registers
 */
extern void __cdecl _mm256_zeroall(void);

/*
 * Zero Upper bits of YMM registers
 * **** VZEROUPPER
 * Zeros the upper 128 bits of all YMM registers. The lower 128-bits of the
 * registers (the corresponding XMM registers) are unmodified
 */
extern void __cdecl _mm256_zeroupper(void);

/*
 * Permute Single-Precision Floating-Point Values
 * **** VPERMILPS ymm1, ymm2, ymm3/m256
 * **** VPERMILPS xmm1, xmm2, xmm3/m128
 * Permute Single-Precision Floating-Point values in the first source operand
 * using 8-bit control fields in the low bytes of corresponding elements the
 * shuffle control and store results in the destination
 */
extern __m256  __cdecl _mm256_permutevar_ps(__m256 m1, __m256i control);
extern __m128  __cdecl _mm_permutevar_ps(__m128 a, __m128i control);

/*
 * Permute Single-Precision Floating-Point Values
 * **** VPERMILPS ymm1, ymm2/m256, imm8
 * **** VPERMILPS xmm1, xmm2/m128, imm8
 * Permute Single-Precision Floating-Point values in the first source operand
 * using four 2-bit control fields in the 8-bit immediate and store results
 * in the destination
 */
extern __m256  __cdecl _mm256_permute_ps(__m256 m1, int control);
extern __m128  __cdecl _mm_permute_ps(__m128 a, int control);

/*
 * Permute Double-Precision Floating-Point Values
 * **** VPERMILPD ymm1, ymm2, ymm3/m256
 * **** VPERMILPD xmm1, xmm2, xmm3/m128
 * Permute Double-Precision Floating-Point values in the first source operand
 * using 8-bit control fields in the low bytes of the second source operand
 * and store results in the destination
 */
extern __m256d __cdecl _mm256_permutevar_pd(__m256d m1, __m256i control);
extern __m128d __cdecl _mm_permutevar_pd(__m128d a, __m128i control);

/*
 * Permute Double-Precision Floating-Point Values
 * **** VPERMILPD ymm1, ymm2/m256, imm8
 * **** VPERMILPD xmm1, xmm2/m128, imm8
 * Permute Double-Precision Floating-Point values in the first source operand
 * using two, 1-bit control fields in the low 2 bits of the 8-bit immediate
 * and store results in the destination
 */
extern __m256d __cdecl _mm256_permute_pd(__m256d m1, int control);
extern __m128d __cdecl _mm_permute_pd(__m128d a, int control);

/*
 * Permute Floating-Point Values
 * **** VPERM2F128 ymm1, ymm2, ymm3/m256, imm8
 * Permute 128 bit floating-point-containing fields from the first source
 * operand and second source operand using bits in the 8-bit immediate and
 * store results in the destination
 */
extern __m256  __cdecl _mm256_permute2f128_ps(__m256 m1, __m256 m2, int control);
extern __m256d __cdecl _mm256_permute2f128_pd(__m256d m1, __m256d m2, int control);
extern __m256i __cdecl _mm256_permute2f128_si256(__m256i m1, __m256i m2, int control);

/*
 * Load with Broadcast
 * **** VBROADCASTSS ymm1, m32
 * **** VBROADCASTSS xmm1, m32
 * Load floating point values from the source operand and broadcast to all
 * elements of the destination
 */
extern __m256  __cdecl _mm256_broadcast_ss(float const *a);
extern __m128  __cdecl _mm_broadcast_ss(float const *a);

/*
 * Load with Broadcast
 * **** VBROADCASTSD ymm1, m64
 * Load floating point values from the source operand and broadcast to all
 * elements of the destination
 */
extern __m256d __cdecl _mm256_broadcast_sd(double const *a);

/*
 * Load with Broadcast
 * **** VBROADCASTF128 ymm1, m128
 * Load floating point values from the source operand and broadcast to all
 * elements of the destination
 */
extern __m256  __cdecl _mm256_broadcast_ps(__m128 const *a);
extern __m256d __cdecl _mm256_broadcast_pd(__m128d const *a);

/*
 * Insert packed floating-point values
 * **** VINSERTF128 ymm1, ymm2, xmm3/m128, imm8
 * Performs an insertion of 128-bits of packed floating-point values from the
 * second source operand into an the destination at an 128-bit offset from
 * imm8[0]. The remaining portions of the destination are written by the
 * corresponding fields of the first source operand
 */
extern __m256  __cdecl _mm256_insertf128_ps(__m256, __m128 a, int offset);
extern __m256d __cdecl _mm256_insertf128_pd(__m256d, __m128d a, int offset);
extern __m256i __cdecl _mm256_insertf128_si256(__m256i, __m128i a, int offset);

/*
 * Move Aligned Packed Double-Precision Floating-Point Values
 * **** VMOVAPD ymm1, m256
 * **** VMOVAPD m256, ymm1
 * Moves 4 double-precision floating-point values from the source operand to
 * the destination
 */
extern __m256d __cdecl _mm256_load_pd(double const *a);
extern void    __cdecl _mm256_store_pd(double *a, __m256d b);

/*
 * Move Aligned Packed Single-Precision Floating-Point Values
 * **** VMOVAPS ymm1, m256
 * **** VMOVAPS m256, ymm1
 * Moves 8 single-precision floating-point values from the source operand to
 * the destination
 */
extern __m256  __cdecl _mm256_load_ps(float const *a);
extern void    __cdecl _mm256_store_ps(float *a, __m256 b);

/*
 * Move Unaligned Packed Double-Precision Floating-Point Values
 * **** VMOVUPD ymm1, m256
 * **** VMOVUPD m256, ymm1
 * Moves 256 bits of packed double-precision floating-point values from the
 * source operand to the destination
 */
extern __m256d __cdecl _mm256_loadu_pd(double const *a);
extern void    __cdecl _mm256_storeu_pd(double *a, __m256d b);

/*
 * Move Unaligned Packed Single-Precision Floating-Point Values
 * **** VMOVUPS ymm1, m256
 * **** VMOVUPS m256, ymm1
 * Moves 256 bits of packed single-precision floating-point values from the
 * source operand to the destination
 */
extern __m256  __cdecl _mm256_loadu_ps(float const *a);
extern void    __cdecl _mm256_storeu_ps(float *a, __m256 b);

/*
 * Move Aligned Packed Integer Values
 * **** VMOVDQA ymm1, m256
 * **** VMOVDQA m256, ymm1
 * Moves 256 bits of packed integer values from the source operand to the
 * destination
 */
extern __m256i __cdecl _mm256_load_si256(__m256i const *a);
extern void    __cdecl _mm256_store_si256(__m256i *a, __m256i b);

/*
 * Move Unaligned Packed Integer Values
 * **** VMOVDQU ymm1, m256
 * **** VMOVDQU m256, ymm1
 * Moves 256 bits of packed integer values from the source operand to the
 * destination
 */
extern __m256i __cdecl _mm256_loadu_si256(__m256i const *a);
extern void    __cdecl _mm256_storeu_si256(__m256i *a, __m256i b);

/*
 * Conditional SIMD Packed Loads and Stores
 * **** VMASKMOVPD xmm1, xmm2, m128
 * **** VMASKMOVPD ymm1, ymm2, m256
 * **** VMASKMOVPD m128, xmm1, xmm2
 * **** VMASKMOVPD m256, ymm1, ymm2
 *
 * Load forms:
 * Load packed values from the 128-bit (XMM forms) or 256-bit (YMM forms)
 * memory location (third operand) into the destination XMM or YMM register
 * (first operand) using a mask in the first source operand (second operand).
 *
 * Store forms:
 * Stores packed values from the XMM or YMM register in the second source
 * operand (third operand) into the 128-bit (XMM forms) or 256-bit (YMM forms)
 * memory location using a mask in first source operand (second operand).
 * Stores are atomic.
 */
extern __m256d __cdecl _mm256_maskload_pd(double const *a, __m256i mask);
extern void    __cdecl _mm256_maskstore_pd(double *a, __m256i mask, __m256d b);
extern __m128d __cdecl _mm_maskload_pd(double const *a, __m128i mask);
extern void    __cdecl _mm_maskstore_pd(double *a, __m128i mask, __m128d b);

/*
 * Conditional SIMD Packed Loads and Stores
 * **** VMASKMOVPS xmm1, xmm2, m128
 * **** VMASKMOVPS ymm1, ymm2, m256
 * **** VMASKMOVPS m128, xmm1, xmm2
 * **** VMASKMOVPS m256, ymm1, ymm2
 *
 * Load forms:
 * Load packed values from the 128-bit (XMM forms) or 256-bit (YMM forms)
 * memory location (third operand) into the destination XMM or YMM register
 * (first operand) using a mask in the first source operand (second operand).
 *
 * Store forms:
 * Stores packed values from the XMM or YMM register in the second source
 * operand (third operand) into the 128-bit (XMM forms) or 256-bit (YMM forms)
 * memory location using a mask in first source operand (second operand).
 * Stores are atomic.
 */
extern __m256  __cdecl _mm256_maskload_ps(float const *a, __m256i mask);
extern void    __cdecl _mm256_maskstore_ps(float *a, __m256i mask, __m256 b);
extern __m128  __cdecl _mm_maskload_ps(float const *a, __m128i mask);
extern void    __cdecl _mm_maskstore_ps(float *a, __m128i mask, __m128 b);

/*
 * Replicate Single-Precision Floating-Point Values
 * **** VMOVSHDUP ymm1, ymm2/m256
 * Duplicates odd-indexed single-precision floating-point values from the
 * source operand
 */
extern __m256  __cdecl _mm256_movehdup_ps(__m256 a);

/*
 * Replicate Single-Precision Floating-Point Values
 * **** VMOVSLDUP ymm1, ymm2/m256
 * Duplicates even-indexed single-precision floating-point values from the
 * source operand
 */
extern __m256  __cdecl _mm256_moveldup_ps(__m256 a);

/*
 * Replicate Double-Precision Floating-Point Values
 * **** VMOVDDUP ymm1, ymm2/m256
 * Duplicates even-indexed double-precision floating-point values from the
 * source operand
 */
extern __m256d __cdecl _mm256_movedup_pd(__m256d a);

/*
 * Move Unaligned Integer
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -