sse4_1-insertps-1.c
来自「用于进行gcc测试」· C语言 代码 · 共 72 行
C
72 行
/* { dg-do run } *//* { dg-require-effective-target sse4 } *//* { dg-options "-O2 -msse4.1" } */#include "sse4_1-check.h"#include <smmintrin.h>#include <string.h>#define msk0 0x01#define msk1 0x10#define msk2 0x29#define msk3 0x30#define msk4 0xFC#define msk5 0x05#define msk6 0x0A#define msk7 0x0Fstatic voidsse4_1_test (void){ union { __m128 x; float f[4]; } res[8], val1, val2, tmp; int masks[8]; int i, j; val2.f[0] = 55.0; val2.f[1] = 55.0; val2.f[2] = 55.0; val2.f[3] = 55.0; val1.f[0] = 1.; val1.f[1] = 2.; val1.f[2] = 3.; val1.f[3] = 4.; res[0].x = _mm_insert_ps (val2.x, val1.x, msk0); res[1].x = _mm_insert_ps (val2.x, val1.x, msk1); res[2].x = _mm_insert_ps (val2.x, val1.x, msk2); res[3].x = _mm_insert_ps (val2.x, val1.x, msk3); masks[0] = msk0; masks[1] = msk1; masks[2] = msk2; masks[3] = msk3; for (i = 0; i < 4; i++) res[i + 4].x = _mm_insert_ps (val2.x, val1.x, msk4); masks[4] = msk4; masks[5] = msk4; masks[6] = msk4; masks[7] = msk4; for (i=0; i < 8; i++) { tmp = val2; tmp.f[(masks[i] & 0x30) >> 4] = val1.f[(masks[i] & 0xC0) >> 6]; for (j = 0; j < 4; j++) if (masks[i] & (0x1 << j)) tmp.f[j] = 0.f; if (memcmp (&res[i], &tmp, sizeof (tmp))) abort (); }}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?