📄 immintrin.h
字号:
* in the destination
*/
extern __m256d __cdecl _mm256_hsub_pd(__m256d m1, __m256d m2);
/*
* Subtract Horizontal Single Precision Floating-Point Values
* **** VHSUBPS ymm1, ymm2, ymm3/m256
* Subtract pairs of adjacent single-precision floating-point values in
* the first source operand and second source operand and stores results
* in the destination.
*/
extern __m256 __cdecl _mm256_hsub_ps(__m256 m1, __m256 m2);
/*
* Maximum of Packed Double Precision Floating-Point Values
* **** VMAXPD ymm1, ymm2, ymm3/m256
* Performs an SIMD compare of the packed double-precision floating-point
* values in the first source operand and the second source operand and
* returns the maximum value for each pair of values to the destination
*/
extern __m256d __cdecl _mm256_max_pd(__m256d m1, __m256d m2);
/*
* Maximum of Packed Single Precision Floating-Point Values
* **** VMAXPS ymm1, ymm2, ymm3/m256
* Performs an SIMD compare of the packed single-precision floating-point
* values in the first source operand and the second source operand and
* returns the maximum value for each pair of values to the destination
*/
extern __m256 __cdecl _mm256_max_ps(__m256 m1, __m256 m2);
/*
* Minimum of Packed Double Precision Floating-Point Values
* **** VMINPD ymm1, ymm2, ymm3/m256
* Performs an SIMD compare of the packed double-precision floating-point
* values in the first source operand and the second source operand and
* returns the minimum value for each pair of values to the destination
*/
extern __m256d __cdecl _mm256_min_pd(__m256d m1, __m256d m2);
/*
* Minimum of Packed Single Precision Floating-Point Values
* **** VMINPS ymm1, ymm2, ymm3/m256
* Performs an SIMD compare of the packed single-precision floating-point
* values in the first source operand and the second source operand and
* returns the minimum value for each pair of values to the destination
*/
extern __m256 __cdecl _mm256_min_ps(__m256 m1, __m256 m2);
/*
* Multiply Packed Double Precision Floating-Point Values
* **** VMULPD ymm1, ymm2, ymm3/m256
* Performs a SIMD multiply of the four packed double-precision floating-point
* values from the first Source operand to the Second Source operand, and
* stores the packed double-precision floating-point results in the
* destination
*/
extern __m256d __cdecl _mm256_mul_pd(__m256d m1, __m256d m2);
/*
* Multiply Packed Single Precision Floating-Point Values
* **** VMULPS ymm1, ymm2, ymm3/m256
* Performs an SIMD multiply of the eight packed single-precision
* floating-point values from the first source operand to the second source
* operand, and stores the packed double-precision floating-point results in
* the destination
*/
extern __m256 __cdecl _mm256_mul_ps(__m256 m1, __m256 m2);
/*
* Bitwise Logical OR of Packed Double Precision Floating-Point Values
* **** VORPD ymm1, ymm2, ymm3/m256
* Performs a bitwise logical OR of the four packed double-precision
* floating-point values from the first source operand and the second
* source operand, and stores the result in the destination
*/
extern __m256d __cdecl _mm256_or_pd(__m256d m1, __m256d m2);
/*
* Bitwise Logical OR of Packed Single Precision Floating-Point Values
* **** VORPS ymm1, ymm2, ymm3/m256
* Performs a bitwise logical OR of the eight packed single-precision
* floating-point values from the first source operand and the second
* source operand, and stores the result in the destination
*/
extern __m256 __cdecl _mm256_or_ps(__m256 m1, __m256 m2);
/*
* Shuffle Packed Double Precision Floating-Point Values
* **** VSHUFPD ymm1, ymm2, ymm3/m256, imm8
* Moves either of the two packed double-precision floating-point values from
* each double quadword in the first source operand into the low quadword
* of each double quadword of the destination; moves either of the two packed
* double-precision floating-point values from the second source operand into
* the high quadword of each double quadword of the destination operand.
* The selector operand determines which values are moved to the destination
*/
extern __m256d __cdecl _mm256_shuffle_pd(__m256d m1, __m256d m2, const int select);
/*
* Shuffle Packed Single Precision Floating-Point Values
* **** VSHUFPS ymm1, ymm2, ymm3/m256, imm8
* Moves two of the four packed single-precision floating-point values
* from each double qword of the first source operand into the low
* quadword of each double qword of the destination; moves two of the four
* packed single-precision floating-point values from each double qword of
* the second source operand into to the high quadword of each double qword
* of the destination. The selector operand determines which values are moved
* to the destination.
*/
extern __m256 __cdecl _mm256_shuffle_ps(__m256 m1, __m256 m2, const int select);
/*
* Subtract Packed Double Precision Floating-Point Values
* **** VSUBPD ymm1, ymm2, ymm3/m256
* Performs an SIMD subtract of the four packed double-precision floating-point
* values of the second Source operand from the first Source operand, and
* stores the packed double-precision floating-point results in the destination
*/
extern __m256d __cdecl _mm256_sub_pd(__m256d m1, __m256d m2);
/*
* Subtract Packed Single Precision Floating-Point Values
* **** VSUBPS ymm1, ymm2, ymm3/m256
* Performs an SIMD subtract of the eight packed single-precision
* floating-point values in the second Source operand from the First Source
* operand, and stores the packed single-precision floating-point results in
* the destination
*/
extern __m256 __cdecl _mm256_sub_ps(__m256 m1, __m256 m2);
/*
* Bitwise Logical XOR of Packed Double Precision Floating-Point Values
* **** VXORPD ymm1, ymm2, ymm3/m256
* Performs a bitwise logical XOR of the four packed double-precision
* floating-point values from the first source operand and the second
* source operand, and stores the result in the destination
*/
extern __m256d __cdecl _mm256_xor_pd(__m256d m1, __m256d m2);
/*
* Bitwise Logical XOR of Packed Single Precision Floating-Point Values
* **** VXORPS ymm1, ymm2, ymm3/m256
* Performs a bitwise logical XOR of the eight packed single-precision
* floating-point values from the first source operand and the second
* source operand, and stores the result in the destination
*/
extern __m256 __cdecl _mm256_xor_ps(__m256 m1, __m256 m2);
/*
* Compare Packed Double-Precision Floating-Point Values
* **** VCMPPD xmm1, xmm2, xmm3/m128, imm8
* **** VCMPPD ymm1, ymm2, ymm3/m256, imm8
* Performs an SIMD compare of the four packed double-precision floating-point
* values in the second source operand (third operand) and the first source
* operand (second operand) and returns the results of the comparison to the
* destination operand (first operand). The comparison predicate operand
* (immediate) specifies the type of comparison performed on each of the pairs
* of packed values.
* For 128-bit intrinsic with compare predicate values in range 0-7 compiler
* may generate SSE2 instructions if it is warranted for performance reasons.
*/
extern __m128d __cdecl _mm_cmp_pd(__m128d m1, __m128d m2, const int predicate);
extern __m256d __cdecl _mm256_cmp_pd(__m256d m1, __m256d m2, const int predicate);
/*
* Compare Packed Single-Precision Floating-Point Values
* **** VCMPPS xmm1, xmm2, xmm3/m256, imm8
* **** VCMPPS ymm1, ymm2, ymm3/m256, imm8
* Performs a SIMD compare of the packed single-precision floating-point values
* in the second source operand (third operand) and the first source operand
* (second operand) and returns the results of the comparison to the destination
* operand (first operand). The comparison predicate operand (immediate)
* specifies the type of comparison performed on each of the pairs of packed
* values.
* For 128-bit intrinsic with compare predicate values in range 0-7 compiler
* may generate SSE2 instructions if it is warranted for performance reasons.
*/
extern __m128 __cdecl _mm_cmp_ps(__m128 m1, __m128 m2, const int predicate);
extern __m256 __cdecl _mm256_cmp_ps(__m256 m1, __m256 m2, const int predicate);
/*
* Compare Scalar Double-Precision Floating-Point Values
* **** VCMPSD xmm1, xmm2, xmm3/m64, imm8
* Compares the low double-precision floating-point values in the second source
* operand (third operand) and the first source operand (second operand) and
* returns the results in of the comparison to the destination operand (first
* operand). The comparison predicate operand (immediate operand) specifies the
* type of comparison performed.
* For compare predicate values in range 0-7 compiler may generate SSE2
* instructions if it is warranted for performance reasons.
*/
extern __m128d __cdecl _mm_cmp_sd(__m128d m1, __m128d m2, const int predicate);
/*
* Compare Scalar Single-Precision Floating-Point Values
* **** VCMPSS xmm1, xmm2, xmm3/m64, imm8
* Compares the low single-precision floating-point values in the second source
* operand (third operand) and the first source operand (second operand) and
* returns the results of the comparison to the destination operand (first
* operand). The comparison predicate operand (immediate operand) specifies
* the type of comparison performed.
* For compare predicate values in range 0-7 compiler may generate SSE2
* instructions if it is warranted for performance reasons.
*/
extern __m128 __cdecl _mm_cmp_ss(__m128 m1, __m128 m2, const int predicate);
/*
* Convert Packed Doubleword Integers to
* Packed Double-Precision Floating-Point Values
* **** VCVTDQ2PD ymm1, xmm2/m128
* Converts four packed signed doubleword integers in the source operand to
* four packed double-precision floating-point values in the destination
*/
extern __m256d __cdecl _mm256_cvtepi32_pd(__m128i m1);
/*
* Convert Packed Doubleword Integers to
* Packed Single-Precision Floating-Point Values
* **** VCVTDQ2PS ymm1, ymm2/m256
* Converts eight packed signed doubleword integers in the source operand to
* eight packed double-precision floating-point values in the destination
*/
extern __m256 __cdecl _mm256_cvtepi32_ps(__m256i m1);
/*
* Convert Packed Double-Precision Floating-point values to
* Packed Single-Precision Floating-Point Values
* **** VCVTPD2PS xmm1, ymm2/m256
* Converts four packed double-precision floating-point values in the source
* operand to four packed single-precision floating-point values in the
* destination
*/
extern __m128 __cdecl _mm256_cvtpd_ps(__m256d m1);
/*
* Convert Packed Single Precision Floating-Point Values to
* Packed Singed Doubleword Integer Values
* **** VCVTPS2DQ ymm1, ymm2/m256
* Converts eight packed single-precision floating-point values in the source
* operand to eight signed doubleword integers in the destination
*/
extern __m256i __cdecl _mm256_cvtps_epi32(__m256 m1);
/*
* Convert Packed Single Precision Floating-point values to
* Packed Double Precision Floating-Point Values
* **** VCVTPS2PD ymm1, xmm2/m128
* Converts four packed single-precision floating-point values in the source
* operand to four packed double-precision floating-point values in the
* destination
*/
extern __m256d __cdecl _mm256_cvtps_pd(__m128 m1);
/*
* Convert with Truncation Packed Double-Precision Floating-Point values to
* Packed Doubleword Integers
* **** VCVTTPD2DQ xmm1, ymm2/m256
* Converts four packed double-precision floating-point values in the source
* operand to four packed signed doubleword integers in the destination.
* When a conversion is inexact, a truncated (round toward zero) value is
* returned. If a converted result is larger than the maximum signed doubleword
* integer, the floating-point invalid exception is raised, and if this
* exception is masked, the indefinite integer value (80000000H) is returned
*/
extern __m128i __cdecl _mm256_cvttpd_epi32(__m256d m1);
/*
* Convert Packed Double-Precision Floating-point values to
* Packed Doubleword Integers
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -