sse4_1-dpps-1.c
来自「用于进行gcc测试」· C语言 代码 · 共 107 行
C
107 行
/* { dg-do run } *//* { dg-require-effective-target sse4 } *//* { dg-options "-O2 -msse4.1" } */#include "sse4_1-check.h"#include <smmintrin.h>#define lmskN 0x00#define lmsk0 0x01#define lmsk1 0x02#define lmsk2 0x04#define lmsk3 0x08#define lmsk01 0x03#define lmsk02 0x05#define lmsk03 0x09#define lmsk12 0x06#define lmsk13 0x0A#define lmsk23 0x0C#define lmskA 0x0F#define hmskN 0x00#define hmskA 0xF0#define hmsk0 0x10#define hmsk1 0x20#define hmsk2 0x40#define hmsk3 0x80#define hmsk01 0x30#define hmsk02 0x50#define hmsk03 0x90#define hmsk12 0x60#define hmsk13 0xA0#define hmsk23 0xC0#ifndef HIMASK#define HIMASK hmskA#endifstatic voidsse4_1_test (void){ union { __m128 x; float f[4]; } val1, val2, res[16]; int masks[16]; int i, j; val1.f[0] = 2.; val1.f[1] = 3.; val1.f[2] = 4.; val1.f[3] = 5.; val2.f[0] = 10.; val2.f[1] = 100.; val2.f[2] = 1000.; val2.f[3] = 10000.; res[0].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk0); res[1].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk1); res[2].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk2); res[3].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk3); res[4].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk01); res[5].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk02); res[6].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk03); res[7].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk12); res[8].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk13); res[9].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk23); res[10].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk0)); res[11].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk1)); res[12].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk2)); res[13].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk3)); res[14].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmskN); res[15].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmskA); masks[0] = HIMASK | lmsk0; masks[1] = HIMASK | lmsk1; masks[2] = HIMASK | lmsk2; masks[3] = HIMASK | lmsk3; masks[4] = HIMASK | lmsk01; masks[5] = HIMASK | lmsk02; masks[6] = HIMASK | lmsk03; masks[7] = HIMASK | lmsk12; masks[8] = HIMASK | lmsk13; masks[9] = HIMASK | lmsk23; masks[10] = HIMASK | (0x0F & ~lmsk0); masks[11] = HIMASK | (0x0F & ~lmsk1); masks[12] = HIMASK | (0x0F & ~lmsk2); masks[13] = HIMASK | (0x0F & ~lmsk3); masks[14] = HIMASK | lmskN; masks[15] = HIMASK | lmskA; for (i = 0; i <= 15; i++) { float tmp = 0.; for (j = 0; j < 4; j++) if ((HIMASK & (0x10 << j))) tmp += val1.f[j] * val2.f[j]; for (j = 0; j < 4; j++) if ((masks[i] & (1 << j)) && res[i].f[j] != tmp) abort (); }}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?