sse4_1-mpsadbw.c

来自「用于进行gcc测试」· C语言 代码 · 共 123 行

C
123
字号
/* { dg-do run } *//* { dg-require-effective-target sse4 } *//* { dg-options "-O2 -msse4.1" } */#include "sse4_1-check.h"#include <smmintrin.h>#include <string.h>#define msk0 0xC0#define msk1 0x01#define msk2 0xF2#define msk3 0x03#define msk4 0x84#define msk5 0x05#define msk6 0xE6#define msk7 0x67static __m128icompute_mpsadbw (unsigned char *v1, unsigned char *v2, int mask){  union    {      __m128i x;      unsigned short s[8];    } ret;  unsigned char s[4];  int i, j;  int offs1, offs2;  offs2 = 4 * (mask & 3);  for (i = 0; i < 4; i++)    s[i] = v2[offs2 + i];  offs1 = 4 * ((mask & 4) >> 2);  for (j = 0; j < 8; j++)    {      ret.s[j] = 0;      for (i = 0; i < 4; i++)	ret.s[j] += abs (v1[offs1 + j + i] - s[i]);    }  return ret.x;}static voidsse4_1_test (void){  union    {      __m128i x;      unsigned int i[4];      unsigned char c[16];    } val1, val2, val3 [8];  __m128i res[8], tmp;  unsigned char masks[8];  int i;  val1.i[0] = 0x35251505;  val1.i[1] = 0x75655545;  val1.i[2] = 0xB5A59585;  val1.i[3] = 0xF5E5D5C5;  val2.i[0] = 0x31211101;  val2.i[1] = 0x71615141;  val2.i[2] = 0xB1A19181;  val2.i[3] = 0xF1E1D1C1;  for (i=0; i < 8; i++)    switch (i % 3)      {      case 1:	val3[i].i[0] = 0xF1E1D1C1;	val3[i].i[1] = 0xB1A19181;	val3[i].i[2] = 0x71615141;	val3[i].i[3] = 0x31211101;	break;      default:	val3[i].x = val2.x;	break;      }  /* Check mpsadbw imm8, xmm, xmm.  */  res[0] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk0);  res[1] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk1);  res[2] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk2);  res[3] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk3);  res[4] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk4);  res[5] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk5);  res[6] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk6);  res[7] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk7);  masks[0] = msk0;  masks[1] = msk1;  masks[2] = msk2;  masks[3] = msk3;  masks[4] = msk4;  masks[5] = msk5;  masks[6] = msk6;  masks[7] = msk7;  for (i=0; i < 8; i++)    {      tmp = compute_mpsadbw (val1.c, val2.c, masks[i]);      if (memcmp (&tmp, &res[i], sizeof (tmp)))	abort ();    }      /* Check mpsadbw imm8, m128, xmm.  */  for (i=0; i < 8; i++)    {      res[i] = _mm_mpsadbw_epu8 (val1.x, val3[i].x, msk4);      masks[i] = msk4;    }  for (i=0; i < 8; i++)    {      tmp = compute_mpsadbw (val1.c, val3[i].c, masks[i]);      if (memcmp (&tmp, &res[i], sizeof (tmp)))	abort ();    }}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?