ssse3-palignr.c

来自「用于进行gcc测试」· C语言 代码 · 共 265 行

C
265
字号
/* { dg-do run } *//* { dg-require-effective-target ssse3 } *//* { dg-options "-O2 -fno-strict-aliasing -mssse3" } */#include "ssse3-check.h"#include "ssse3-vals.h"#include <tmmintrin.h>#include <string.h>/* Test the 64-bit form */static voidssse3_test_palignr (int *i1, int *i2, unsigned int imm, int *r){  __m64 t1 = *(__m64 *) i1;  __m64 t2 = *(__m64 *) i2;  switch (imm)    {    case 0:      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 0);      break;    case 1:      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 1);      break;    case 2:      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 2);      break;    case 3:      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 3);      break;    case 4:      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 4);      break;    case 5:      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 5);      break;    case 6:      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 6);      break;    case 7:      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 7);      break;    case 8:      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 8);      break;    case 9:      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 9);      break;    case 10:      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 10);      break;    case 11:      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 11);      break;    case 12:      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 12);      break;    case 13:      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 13);      break;    case 14:      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 14);      break;    case 15:      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 15);      break;    default:      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 16);      break;    }   _mm_empty();}/* Test the 128-bit form */static voidssse3_test_palignr128 (int *i1, int *i2, unsigned int imm, int *r){  /* Assumes incoming pointers are 16-byte aligned */  __m128i t1 = *(__m128i *) i1;  __m128i t2 = *(__m128i *) i2;  switch (imm)    {    case 0:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 0);      break;    case 1:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 1);      break;    case 2:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 2);      break;    case 3:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 3);      break;    case 4:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 4);      break;    case 5:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 5);      break;    case 6:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 6);      break;    case 7:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 7);      break;    case 8:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 8);      break;    case 9:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 9);      break;    case 10:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 10);      break;    case 11:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 11);      break;    case 12:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 12);      break;    case 13:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 13);      break;    case 14:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 14);      break;    case 15:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 15);      break;    case 16:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 16);      break;    case 17:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 17);      break;    case 18:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 18);      break;    case 19:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 19);      break;    case 20:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 20);      break;    case 21:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 21);      break;    case 22:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 22);      break;    case 23:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 23);      break;    case 24:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 24);      break;    case 25:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 25);      break;    case 26:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 26);      break;    case 27:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 27);      break;    case 28:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 28);      break;    case 29:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 29);      break;    case 30:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 30);      break;    case 31:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 31);      break;    default:      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 32);      break;    }}/* Routine to manually compute the results */static voidcompute_correct_result_128 (int *i1, int *i2, unsigned int imm, int *r){  char buf [32];  char *bout = (char *) r;  int i;  memcpy (&buf[0], i2, 16);  memcpy (&buf[16], i1, 16);  for (i = 0; i < 16; i++)    if (imm >= 32 || imm + i >= 32)      bout[i] = 0;    else      bout[i] = buf[imm + i];}static voidcompute_correct_result_64 (int *i1, int *i2, unsigned int imm, int *r){  char buf [16];  char *bout = (char *)r;  int i;  /* Handle the first half */  memcpy (&buf[0], i2, 8);  memcpy (&buf[8], i1, 8);  for (i = 0; i < 8; i++)    if (imm >= 16 || imm + i >= 16)      bout[i] = 0;    else      bout[i] = buf[imm + i];  /* Handle the second half */  memcpy (&buf[0], &i2[2], 8);  memcpy (&buf[8], &i1[2], 8);  for (i = 0; i < 8; i++)    if (imm >= 16 || imm + i >= 16)      bout[i + 8] = 0;    else      bout[i + 8] = buf[imm + i];}static voidssse3_test (void){  int i;  int r [4] __attribute__ ((aligned(16)));  int ck [4];  unsigned int imm;  int fail = 0;  for (i = 0; i < 256; i += 8)    for (imm = 0; imm < 100; imm++)      {	/* Manually compute the result */	compute_correct_result_64 (&vals[i + 0], &vals[i + 4], imm, ck);	/* Run the 64-bit tests */	ssse3_test_palignr (&vals[i + 0], &vals[i + 4], imm, &r[0]);	ssse3_test_palignr (&vals[i + 2], &vals[i + 6], imm, &r[2]);	fail += chk_128 (ck, r);	/* Recompute the results for 128-bits */	compute_correct_result_128 (&vals[i + 0], &vals[i + 4], imm, ck);	/* Run the 128-bit tests */	ssse3_test_palignr128 (&vals[i + 0], &vals[i + 4], imm, r);	fail += chk_128 (ck, r);      }  if (fail != 0)    abort ();}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?