immintrin.h

来自「C语言库函数的原型,有用的拿去」· C头文件代码 · 共 1,080 行 · 第 1/4 页
1,080 行
/***
*** Copyright (C) 1985-2008 Intel Corporation.  All rights reserved.
***
*** The information and source code contained herein is the exclusive
*** property of Intel Corporation and may not be disclosed, examined
*** or reproduced in whole or in part without explicit written authorization
*** from the company.
***
****/

#pragma once
#ifndef __midl
#ifndef _INCLUDED_IMM
#define _INCLUDED_IMM

#if defined (_M_CEE_PURE)
        #error ERROR: EMM intrinsics not supported in the pure mode!
#else  /* defined (_M_CEE_PURE) */

#include <wmmintrin.h>

#ifdef __cplusplus
extern "C" {
#endif  /* __cplusplus */

/*
 * Intel(R) AVX compiler intrinsics.
 */

typedef union __declspec(intrin_type) _CRT_ALIGN(32) __m256 {
    float m256_f32[8];
} __m256;

typedef struct __declspec(intrin_type) _CRT_ALIGN(32) {
    double m256d_f64[4];
} __m256d;

typedef union  __declspec(intrin_type) _CRT_ALIGN(32) __m256i {
    __int8              m256i_i8[32];
    __int16             m256i_i16[16];
    __int32             m256i_i32[8];
    __int64             m256i_i64[4];
    unsigned __int8     m256i_u8[32];
    unsigned __int16    m256i_u16[16];
    unsigned __int32    m256i_u32[8];
    unsigned __int64    m256i_u64[4];
} __m256i;

/*
 * Compare predicates for scalar and packed compare intrinsics
 */
#define _CMP_EQ_OQ     0x00  /* Equal (ordered, nonsignaling)                       */
#define _CMP_LT_OS     0x01  /* Less-than (ordered, signaling)                      */
#define _CMP_LE_OS     0x02  /* Less-than-or-equal (ordered, signaling)             */
#define _CMP_UNORD_Q   0x03  /* Unordered (nonsignaling)                            */
#define _CMP_NEQ_UQ    0x04  /* Not-equal (unordered, nonsignaling)                 */
#define _CMP_NLT_US    0x05  /* Not-less-than (unordered, signaling)                */
#define _CMP_NLE_US    0x06  /* Not-less-than-or-equal (unordered, signaling)       */
#define _CMP_ORD_Q     0x07  /* Ordered (nonsignaling)                              */
#define _CMP_EQ_UQ     0x08  /* Equal (unordered, non-signaling)                    */
#define _CMP_NGE_US    0x09  /* Not-greater-than-or-equal (unordered, signaling)    */
#define _CMP_NGT_US    0x0A  /* Not-greater-than (unordered, signaling)             */
#define _CMP_FALSE_OQ  0x0B  /* False (ordered, nonsignaling)                       */
#define _CMP_NEQ_OQ    0x0C  /* Not-equal (ordered, non-signaling)                  */
#define _CMP_GE_OS     0x0D  /* Greater-than-or-equal (ordered, signaling)          */
#define _CMP_GT_OS     0x0E  /* Greater-than (ordered, signaling)                   */
#define _CMP_TRUE_UQ   0x0F  /* True (unordered, non-signaling)                     */
#define _CMP_EQ_OS     0x10  /* Equal (ordered, signaling)                          */
#define _CMP_LT_OQ     0x11  /* Less-than (ordered, nonsignaling)                   */
#define _CMP_LE_OQ     0x12  /* Less-than-or-equal (ordered, nonsignaling)          */
#define _CMP_UNORD_S   0x13  /* Unordered (signaling)                               */
#define _CMP_NEQ_US    0x14  /* Not-equal (unordered, signaling)                    */
#define _CMP_NLT_UQ    0x15  /* Not-less-than (unordered, nonsignaling)             */
#define _CMP_NLE_UQ    0x16  /* Not-less-than-or-equal (unordered, nonsignaling)    */
#define _CMP_ORD_S     0x17  /* Ordered (signaling)                                 */
#define _CMP_EQ_US     0x18  /* Equal (unordered, signaling)                        */
#define _CMP_NGE_UQ    0x19  /* Not-greater-than-or-equal (unordered, nonsignaling) */
#define _CMP_NGT_UQ    0x1A  /* Not-greater-than (unordered, nonsignaling)          */
#define _CMP_FALSE_OS  0x1B  /* False (ordered, signaling)                          */
#define _CMP_NEQ_OS    0x1C  /* Not-equal (ordered, signaling)                      */
#define _CMP_GE_OQ     0x1D  /* Greater-than-or-equal (ordered, nonsignaling)       */
#define _CMP_GT_OQ     0x1E  /* Greater-than (ordered, nonsignaling)                */
#define _CMP_TRUE_US   0x1F  /* True (unordered, signaling)                         */

/*
 * Add Packed Double Precision Floating-Point Values
 * **** VADDPD ymm1, ymm2, ymm3/m256
 * Performs an SIMD add of the four packed double-precision floating-point
 * values from the first source operand to the second source operand, and
 * stores the packed double-precision floating-point results in the
 * destination
 */
extern __m256d __cdecl _mm256_add_pd(__m256d m1, __m256d m2);

/*
 * Add Packed Single Precision Floating-Point Values
 * **** VADDPS ymm1, ymm2, ymm3/m256
 * Performs an SIMD add of the eight packed single-precision floating-point
 * values from the first source operand to the second source operand, and
 * stores the packed single-precision floating-point results in the
 * destination
 */
extern __m256 __cdecl _mm256_add_ps(__m256 m1, __m256 m2);

/*
 * Add/Subtract Double Precision Floating-Point Values
 * **** VADDSUBPD ymm1, ymm2, ymm3/m256
 * Adds odd-numbered double-precision floating-point values of the first
 * source operand with the corresponding double-precision floating-point
 * values from the second source operand; stores the result in the odd-numbered
 * values of the destination. Subtracts the even-numbered double-precision
 * floating-point values from the second source operand from the corresponding
 * double-precision floating values in the first source operand; stores the
 * result into the even-numbered values of the destination
 */
extern __m256d __cdecl _mm256_addsub_pd(__m256d m1, __m256d m2);

/*
 * Add/Subtract Packed Single Precision Floating-Point Values
 * **** VADDSUBPS ymm1, ymm2, ymm3/m256
 * Adds odd-numbered single-precision floating-point values of the first source
 * operand with the corresponding single-precision floating-point values from
 * the second source operand; stores the result in the odd-numbered values of
 * the destination. Subtracts the even-numbered single-precision floating-point
 * values from the second source operand from the corresponding
 * single-precision floating values in the first source operand; stores the
 * result into the even-numbered values of the destination
 */
extern __m256 __cdecl _mm256_addsub_ps(__m256 m1, __m256 m2);

/*
 * Bitwise Logical AND of Packed Double Precision Floating-Point Values
 * **** VANDPD ymm1, ymm2, ymm3/m256
 * Performs a bitwise logical AND of the four packed double-precision
 * floating-point values from the first source operand and the second
 * source operand, and stores the result in the destination
 */
extern __m256d __cdecl _mm256_and_pd(__m256d m1, __m256d m2);

/*
 * Bitwise Logical AND of Packed Single Precision Floating-Point Values
 * **** VANDPS ymm1, ymm2, ymm3/m256
 * Performs a bitwise logical AND of the eight packed single-precision
 * floating-point values from the first source operand and the second
 * source operand, and stores the result in the destination
 */
extern __m256 __cdecl _mm256_and_ps(__m256 m1, __m256 m2);

/*
 * Bitwise Logical AND NOT of Packed Double Precision Floating-Point Values
 * **** VANDNPD ymm1, ymm2, ymm3/m256
 * Performs a bitwise logical AND NOT of the four packed double-precision
 * floating-point values from the first source operand and the second source
 * operand, and stores the result in the destination
 */
extern __m256d __cdecl _mm256_andnot_pd(__m256d m1, __m256d m2);

/*
 * Bitwise Logical AND NOT of Packed Single Precision Floating-Point Values
 * **** VANDNPS ymm1, ymm2, ymm3/m256
 * Performs a bitwise logical AND NOT of the eight packed single-precision
 * floating-point values from the first source operand and the second source
 * operand, and stores the result in the destination
 */
extern __m256 __cdecl _mm256_andnot_ps(__m256 m1, __m256 m2);

/*
 * Blend Packed Double Precision Floating-Point Values
 * **** VBLENDPD ymm1, ymm2, ymm3/m256, imm8
 * Double-Precision Floating-Point values from the second source operand are
 * conditionally merged with values from the first source operand and written
 * to the destination. The immediate bits [3:0] determine whether the
 * corresponding Double-Precision Floating Point value in the destination is
 * copied from the second source or first source. If a bit in the mask,
 * orresponding to a word, is "1", then the Double-Precision Floating-Point
 * value in the second source operand is copied, else the value in the first
 * source operand is copied
 */
extern __m256d __cdecl _mm256_blend_pd(__m256d m1, __m256d m2, const int mask);

/*
 * Blend Packed Single Precision Floating-Point Values
 * **** VBLENDPS ymm1, ymm2, ymm3/m256, imm8
 * Single precision floating point values from the second source operand are
 * conditionally merged with values from the first source operand and written
 * to the destination. The immediate bits [7:0] determine whether the
 * corresponding single precision floating-point value in the destination is
 * copied from the second source or first source. If a bit in the mask,
 * corresponding to a word, is "1", then the single-precision floating-point
 * value in the second source operand is copied, else the value in the first
 * source operand is copied
 */
extern __m256 __cdecl _mm256_blend_ps(__m256 m1, __m256 m2, const int mask);

/*
 * Blend Packed Double Precision Floating-Point Values
 * **** VBLENDVPD ymm1, ymm2, ymm3/m256, ymm4
 * Conditionally copy each quadword data element of double-precision
 * floating-point value from the second source operand (third operand) and the
 * first source operand (second operand) depending on mask bits defined in the
 * mask register operand (fourth operand).
 */
extern __m256d __cdecl _mm256_blendv_pd(__m256d m1, __m256d m2, __m256d m3);

/*
 * Blend Packed Single Precision Floating-Point Values
 * **** VBLENDVPS ymm1, ymm2, ymm3/m256, ymm4
 * Conditionally copy each dword data element of single-precision
 * floating-point value from the second source operand (third operand) and the
 * first source operand (second operand) depending on mask bits defined in the
 * mask register operand (fourth operand).
 */
extern __m256 __cdecl _mm256_blendv_ps(__m256 m1, __m256 m2, __m256 mask);

/*
 * Divide Packed Double-Precision Floating-Point Values
 * **** VDIVPD ymm1, ymm2, ymm3/m256
 * Performs an SIMD divide of the four packed double-precision floating-point
 * values in the first source operand by the four packed double-precision
 * floating-point values in the second source operand
 */
extern __m256d __cdecl _mm256_div_pd(__m256d m1, __m256d m2);

/*
 * Divide Packed Single-Precision Floating-Point Values
 * **** VDIVPS ymm1, ymm2, ymm3/m256
 * Performs an SIMD divide of the eight packed single-precision
 * floating-point values in the first source operand by the eight packed
 * single-precision floating-point values in the second source operand
 */
extern __m256 __cdecl _mm256_div_ps(__m256 m1, __m256 m2);

/*
 * Dot Product of Packed Single-Precision Floating-Point Values
 * **** VDPPS ymm1, ymm2, ymm3/m256, imm8
 * Multiplies the packed single precision floating point values in the
 * first source operand with the packed single-precision floats in the
 * second source. Each of the four resulting single-precision values is
 * conditionally summed depending on a mask extracted from the high 4 bits
 * of the immediate operand. This sum is broadcast to each of 4 positions
 * in the destination if the corresponding bit of the mask selected from
 * the low 4 bits of the immediate operand is "1". If the corresponding
 * low bit 0-3 of the mask is zero, the destination is set to zero.
 * The process is replicated for the high elements of the destination.
 */
extern __m256 __cdecl _mm256_dp_ps(__m256 m1, __m256 m2, const int mask);

/*
 * Add Horizontal Double Precision Floating-Point Values
 * **** VHADDPD ymm1, ymm2, ymm3/m256
 * Adds pairs of adjacent double-precision floating-point values in the
 * first source operand and second source operand and stores results in
 * the destination
 */
extern __m256d __cdecl _mm256_hadd_pd(__m256d m1, __m256d m2);

/*
 * Add Horizontal Single Precision Floating-Point Values
 * **** VHADDPS ymm1, ymm2, ymm3/m256
 * Adds pairs of adjacent single-precision floating-point values in the
 * first source operand and second source operand and stores results in
 * the destination
 */
extern __m256 __cdecl _mm256_hadd_ps(__m256 m1, __m256 m2);

/*
 * Subtract Horizontal Double Precision Floating-Point Values
 * **** VHSUBPD ymm1, ymm2, ymm3/m256
 * Subtract pairs of adjacent double-precision floating-point values in
 * the first source operand and second source operand and stores results
immintrin.h - 源码说明

本页面展示了「C语言库函数的原型,有用的拿去」中的 immintrin.h 源码文件，采用 C头文件编程语言编写，共 1,080 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与C语言相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?