📄 xmmintrin.h
字号:
/* Copyright (C) 2002 Free Software Foundation, Inc. This file is part of GNU CC. GNU CC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. GNU CC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *//* As a special exception, if you include this header file into source files compiled by GCC, this header file does not by itself cause the resulting executable to be covered by the GNU General Public License. This exception does not however invalidate any other reasons why the executable file might be covered by the GNU General Public License. *//* Implemented from the specification included in the Intel C++ Compiler User Guide and Reference, version 5.0. */#ifndef _XMMINTRIN_H_INCLUDED#define _XMMINTRIN_H_INCLUDED#ifndef __SSE__# error "SSE instruction set not enabled"#else/* We need type definitions from the MMX header file. */#include <mmintrin.h>/* The data type indended for user use. */typedef int __m128 __attribute__ ((__mode__(__V4SF__)));/* Internal data types for implementing the instrinsics. */typedef int __v4sf __attribute__ ((__mode__(__V4SF__)));typedef int __v4si __attribute__ ((__mode__(__V4SI__)));/* Create a selector for use with the SHUFPS instruction. */#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \ (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))/* Constants for use with _mm_prefetch. */enum _mm_hint{ _MM_HINT_T0 = 3, _MM_HINT_T1 = 2, _MM_HINT_T2 = 1, _MM_HINT_NTA = 0};/* Bits in the MXCSR. */#define _MM_EXCEPT_MASK 0x003f#define _MM_EXCEPT_INVALID 0x0001#define _MM_EXCEPT_DENORM 0x0002#define _MM_EXCEPT_DIV_ZERO 0x0004#define _MM_EXCEPT_OVERFLOW 0x0008#define _MM_EXCEPT_UNDERFLOW 0x0010#define _MM_EXCEPT_INEXACT 0x0020#define _MM_MASK_MASK 0x1f80#define _MM_MASK_INVALID 0x0080#define _MM_MASK_DENORM 0x0100#define _MM_MASK_DIV_ZERO 0x0200#define _MM_MASK_OVERFLOW 0x0400#define _MM_MASK_UNDERFLOW 0x0800#define _MM_MASK_INEXACT 0x1000#define _MM_ROUND_MASK 0x6000#define _MM_ROUND_NEAREST 0x0000#define _MM_ROUND_DOWN 0x2000#define _MM_ROUND_UP 0x4000#define _MM_ROUND_TOWARD_ZERO 0x6000#define _MM_FLUSH_ZERO_MASK 0x8000#define _MM_FLUSH_ZERO_ON 0x8000#define _MM_FLUSH_ZERO_OFF 0x0000/* Perform the respective operation on the lower SPFP (single-precision floating-point) values of A and B; the upper three SPFP values are passed through from A. */static __inline __m128_mm_add_ss (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_addss ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_sub_ss (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_subss ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_mul_ss (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_mulss ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_div_ss (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_divss ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_sqrt_ss (__m128 __A){ return (__m128) __builtin_ia32_sqrtss ((__v4sf)__A);}static __inline __m128_mm_rcp_ss (__m128 __A){ return (__m128) __builtin_ia32_rcpss ((__v4sf)__A);}static __inline __m128_mm_rsqrt_ss (__m128 __A){ return (__m128) __builtin_ia32_rsqrtss ((__v4sf)__A);}static __inline __m128_mm_min_ss (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_minss ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_max_ss (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_maxss ((__v4sf)__A, (__v4sf)__B);}/* Perform the respective operation on the four SPFP values in A and B. */static __inline __m128_mm_add_ps (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_addps ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_sub_ps (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_subps ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_mul_ps (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_mulps ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_div_ps (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_divps ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_sqrt_ps (__m128 __A){ return (__m128) __builtin_ia32_sqrtps ((__v4sf)__A);}static __inline __m128_mm_rcp_ps (__m128 __A){ return (__m128) __builtin_ia32_rcpps ((__v4sf)__A);}static __inline __m128_mm_rsqrt_ps (__m128 __A){ return (__m128) __builtin_ia32_rsqrtps ((__v4sf)__A);}static __inline __m128_mm_min_ps (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_minps ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_max_ps (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_maxps ((__v4sf)__A, (__v4sf)__B);}/* Perform logical bit-wise operations on 128-bit values. */static __inline __m128_mm_and_ps (__m128 __A, __m128 __B){ return __builtin_ia32_andps (__A, __B);}static __inline __m128_mm_andnot_ps (__m128 __A, __m128 __B){ return __builtin_ia32_andnps (__A, __B);}static __inline __m128_mm_or_ps (__m128 __A, __m128 __B){ return __builtin_ia32_orps (__A, __B);}static __inline __m128_mm_xor_ps (__m128 __A, __m128 __B){ return __builtin_ia32_xorps (__A, __B);}/* Perform a comparison on the lower SPFP values of A and B. If the comparison is true, place a mask of all ones in the result, otherwise a mask of zeros. The upper three SPFP values are passed through from A. */static __inline __m128_mm_cmpeq_ss (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_cmpeqss ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_cmplt_ss (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_cmpltss ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_cmple_ss (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_cmpless ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_cmpgt_ss (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_movss ((__v4sf) __A, (__v4sf) __builtin_ia32_cmpltss ((__v4sf) __B, (__v4sf) __A));}static __inline __m128_mm_cmpge_ss (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_movss ((__v4sf) __A, (__v4sf) __builtin_ia32_cmpless ((__v4sf) __B, (__v4sf) __A));}static __inline __m128_mm_cmpneq_ss (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_cmpneqss ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_cmpnlt_ss (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_cmpnltss ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_cmpnle_ss (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_cmpnless ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_cmpngt_ss (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_movss ((__v4sf) __A, (__v4sf) __builtin_ia32_cmpnltss ((__v4sf) __B, (__v4sf) __A));}static __inline __m128_mm_cmpnge_ss (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_movss ((__v4sf) __A, (__v4sf) __builtin_ia32_cmpnless ((__v4sf) __B, (__v4sf) __A));}static __inline __m128_mm_cmpord_ss (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_cmpordss ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_cmpunord_ss (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_cmpunordss ((__v4sf)__A, (__v4sf)__B);}/* Perform a comparison on the four SPFP values of A and B. For each element, if the comparison is true, place a mask of all ones in the result, otherwise a mask of zeros. */static __inline __m128_mm_cmpeq_ps (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_cmpeqps ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_cmplt_ps (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_cmpltps ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_cmple_ps (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_cmpleps ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_cmpgt_ps (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_cmpgtps ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_cmpge_ps (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_cmpgeps ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_cmpneq_ps (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_cmpneqps ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_cmpnlt_ps (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_cmpnltps ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_cmpnle_ps (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_cmpnleps ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_cmpngt_ps (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_cmpngtps ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_cmpnge_ps (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_cmpngeps ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_cmpord_ps (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_cmpordps ((__v4sf)__A, (__v4sf)__B);}static __inline __m128_mm_cmpunord_ps (__m128 __A, __m128 __B){ return (__m128) __builtin_ia32_cmpunordps ((__v4sf)__A, (__v4sf)__B);}/* Compare the lower SPFP values of A and B and return 1 if true and 0 if false. */static __inline int_mm_comieq_ss (__m128 __A, __m128 __B){ return __builtin_ia32_comieq ((__v4sf)__A, (__v4sf)__B);}static __inline int_mm_comilt_ss (__m128 __A, __m128 __B){ return __builtin_ia32_comilt ((__v4sf)__A, (__v4sf)__B);}static __inline int_mm_comile_ss (__m128 __A, __m128 __B){ return __builtin_ia32_comile ((__v4sf)__A, (__v4sf)__B);}static __inline int_mm_comigt_ss (__m128 __A, __m128 __B){ return __builtin_ia32_comigt ((__v4sf)__A, (__v4sf)__B);}static __inline int_mm_comige_ss (__m128 __A, __m128 __B){ return __builtin_ia32_comige ((__v4sf)__A, (__v4sf)__B);}static __inline int_mm_comineq_ss (__m128 __A, __m128 __B){ return __builtin_ia32_comineq ((__v4sf)__A, (__v4sf)__B);}static __inline int_mm_ucomieq_ss (__m128 __A, __m128 __B){ return __builtin_ia32_ucomieq ((__v4sf)__A, (__v4sf)__B);}static __inline int_mm_ucomilt_ss (__m128 __A, __m128 __B){ return __builtin_ia32_ucomilt ((__v4sf)__A, (__v4sf)__B);}static __inline int_mm_ucomile_ss (__m128 __A, __m128 __B){ return __builtin_ia32_ucomile ((__v4sf)__A, (__v4sf)__B);}static __inline int_mm_ucomigt_ss (__m128 __A, __m128 __B){ return __builtin_ia32_ucomigt ((__v4sf)__A, (__v4sf)__B);}static __inline int_mm_ucomige_ss (__m128 __A, __m128 __B){ return __builtin_ia32_ucomige ((__v4sf)__A, (__v4sf)__B);}static __inline int_mm_ucomineq_ss (__m128 __A, __m128 __B){ return __builtin_ia32_ucomineq ((__v4sf)__A, (__v4sf)__B);}/* Convert the lower SPFP value to a 32-bit integer according to the current rounding mode. */static __inline int_mm_cvtss_si32 (__m128 __A){ return __builtin_ia32_cvtss2si ((__v4sf) __A);}#ifdef __x86_64__/* Convert the lower SPFP value to a 32-bit integer according to the current rounding mode. */static __inline long long_mm_cvtss_si64x (__m128 __A){ return __builtin_ia32_cvtss2si64 ((__v4sf) __A);}#endif/* Convert the two lower SPFP values to 32-bit integers according to the current rounding mode. Return the integers in packed form. */static __inline __m64_mm_cvtps_pi32 (__m128 __A){ return (__m64) __builtin_ia32_cvtps2pi ((__v4sf) __A);}/* Truncate the lower SPFP value to a 32-bit integer. */static __inline int_mm_cvttss_si32 (__m128 __A){ return __builtin_ia32_cvttss2si ((__v4sf) __A);}#ifdef __x86_64__/* Truncate the lower SPFP value to a 32-bit integer. */static __inline long long_mm_cvttss_si64x (__m128 __A){ return __builtin_ia32_cvttss2si64 ((__v4sf) __A);}#endif/* Truncate the two lower SPFP values to 32-bit integers. Return the integers in packed form. */static __inline __m64_mm_cvttps_pi32 (__m128 __A)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -