sse-6.c

来自「linux下编程用 编译软件」· C语言 代码 · 共 317 行

C
317
字号
/* { dg-do run { target i?86-*-* x86_64-*-* } } *//* { dg-options "-O2 -msse2" } */#include <xmmintrin.h>#include <stdio.h>#include <stdlib.h>#include <string.h>#include "../../gcc.dg/i386-cpuid.h"#ifndef NOINLINE#define NOINLINE __attribute__ ((noinline))#endif#define SHIFT (4)typedef union {  __m128i v;  unsigned int s[4];  unsigned short int t[8];  unsigned long long u[2];  unsigned char c[16];}vecInLong;void sse2_tests (void) NOINLINE;void dump128_16 (char *, char *, vecInLong);void dump128_32 (char *, char *, vecInLong);void dump128_64 (char *, char *, vecInLong);void dump128_128 (char *, char *, vecInLong);int check (const char *, const char *[]);char buf[8000];char comparison[8000];static int errors = 0;vecInLong a128, b128, c128, d128, e128, f128;__m128i m128_16, m128_32, s128, m128_64, m128_128;__m64 m64_16, s64, m64_32, m64_64;const char *reference_sse2[] = {  "_mm_srai_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n",  "_mm_sra_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n",  "_mm_srai_epi32 00123456 00123456 00123456 00123456 \n",  "_mm_sra_epi32 00123456 00123456 00123456 00123456 \n",  "_mm_srli_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n",  "_mm_srl_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n",  "_mm_srli_epi32 00123456 00123456 00123456 00123456 \n",  "_mm_srl_epi32 00123456 00123456 00123456 00123456 \n",  "_mm_srli_epi64 00123456789abcde 00123456789abcde \n",  "_mm_srl_epi64 00123456789abcde 00123456789abcde \n",  "_mm_srli_si128 (byte shift)  00000000ffeeddccbbaa998877665544\n",  "_mm_slli_epi16 1230 1230 1230 1230 1230 1230 1230 1230 \n",  "_mm_sll_epi16 1230 1230 1230 1230 1230 1230 1230 1230 \n",  "_mm_slli_epi32 12345670 12345670 12345670 12345670 \n",  "_mm_sll_epi32 12345670 12345670 12345670 12345670 \n",  "_mm_slli_epi64 123456789abcdef0 123456789abcdef0 \n",  "_mm_sll_epi64 123456789abcdef0 123456789abcdef0 \n",  "_mm_sll_si128 (byte shift) bbaa9988776655443322110000000000\n",  "_mm_shuffle_epi32 ffeeddcc bbaa9988 77665544 33221100 \n",  "_mm_shuffelo_epi16 7766 5544 3322 1100 9988 bbaa ddcc ffee \n",  "_mm_shuffehi_epi16 1100 3322 5544 7766 ffee ddcc bbaa 9988 \n",  ""};int main(){  unsigned long cpu_facilities;  cpu_facilities = i386_cpuid ();  if ((cpu_facilities & (bit_MMX | bit_SSE | bit_SSE2 | bit_CMOV))      != (bit_MMX | bit_SSE | bit_SSE2 | bit_CMOV))    /* If host has no vector support, pass.  */    exit (0);  a128.s[0] = 0x01234567;  a128.s[1] = 0x01234567;  a128.s[2] = 0x01234567;  a128.s[3] = 0x01234567;  m128_32 = a128.v;  d128.u[0] = 0x0123456789abcdefULL;  d128.u[1] = 0x0123456789abcdefULL;  m128_64 = d128.v;  /* This is the 128-bit constant 0x00112233445566778899aabbccddeeff,     expressed as two little-endian 64-bit words.  */  e128.u[0] = 0x7766554433221100ULL;  e128.u[1] = 0xffeeddccbbaa9988ULL;  f128.t[0] = 0x0123;  f128.t[1] = 0x0123;  f128.t[2] = 0x0123;  f128.t[3] = 0x0123;  f128.t[4] = 0x0123;  f128.t[5] = 0x0123;  f128.t[6] = 0x0123;  f128.t[7] = 0x0123;  m128_16 = f128.v;  m128_128 = e128.v;  b128.s[0] = SHIFT;  b128.s[1] = 0;  b128.s[2] = 0;  b128.s[3] = 0;  s128 = b128.v;  if (cpu_facilities & bit_SSE2)    {      sse2_tests();      check (buf, reference_sse2);#ifdef DEBUG      printf ("sse2 testing:\n");      printf (buf);      printf ("\ncomparison:\n");      printf (comparison);#endif      buf[0] = '\0';    }  if (errors != 0)    abort ();  exit (0);}void NOINLINEsse2_tests (void){  /* psraw */  c128.v = _mm_srai_epi16 (m128_16, SHIFT);  dump128_16 (buf, "_mm_srai_epi16", c128);  c128.v = _mm_sra_epi16 (m128_16, s128);  dump128_16 (buf, "_mm_sra_epi16", c128);  /* psrad */  c128.v = _mm_srai_epi32 (m128_32, SHIFT);  dump128_32 (buf, "_mm_srai_epi32", c128);  c128.v = _mm_sra_epi32 (m128_32, s128);  dump128_32 (buf, "_mm_sra_epi32", c128);  /* psrlw */  c128.v = _mm_srli_epi16 (m128_16, SHIFT);  dump128_16 (buf, "_mm_srli_epi16", c128);  c128.v = _mm_srl_epi16 (m128_16, s128);  dump128_16 (buf, "_mm_srl_epi16", c128);  /* psrld */  c128.v = _mm_srli_epi32 (m128_32, SHIFT);  dump128_32 (buf, "_mm_srli_epi32", c128);  c128.v = _mm_srl_epi32 (m128_32, s128);  dump128_32 (buf, "_mm_srl_epi32", c128);  /* psrlq */  c128.v = _mm_srli_epi64 (m128_64, SHIFT);  dump128_64 (buf, "_mm_srli_epi64", c128);  c128.v = _mm_srl_epi64 (m128_64, s128);  dump128_64 (buf, "_mm_srl_epi64", c128);  /* psrldq */  c128.v = _mm_srli_si128 (m128_128, SHIFT);  dump128_128 (buf, "_mm_srli_si128 (byte shift) ", c128);  /* psllw */  c128.v = _mm_slli_epi16 (m128_16, SHIFT);  dump128_16 (buf, "_mm_slli_epi16", c128);  c128.v = _mm_sll_epi16 (m128_16, s128);  dump128_16 (buf, "_mm_sll_epi16", c128);  /* pslld */  c128.v = _mm_slli_epi32 (m128_32, SHIFT);  dump128_32 (buf, "_mm_slli_epi32", c128);  c128.v = _mm_sll_epi32 (m128_32, s128);  dump128_32 (buf, "_mm_sll_epi32", c128);  /* psllq */  c128.v = _mm_slli_epi64 (m128_64, SHIFT);  dump128_64 (buf, "_mm_slli_epi64", c128);  c128.v = _mm_sll_epi64 (m128_64, s128);  dump128_64 (buf, "_mm_sll_epi64", c128);  /* pslldq */  c128.v = _mm_slli_si128 (m128_128, SHIFT);  dump128_128 (buf, "_mm_sll_si128 (byte shift)", c128);  /* Shuffle constant 0x1b == 0b_00_01_10_11, e.g. swap words: ABCD => DCBA.  */  /* pshufd */  c128.v = _mm_shuffle_epi32 (m128_128, 0x1b);  dump128_32 (buf, "_mm_shuffle_epi32", c128);  /* pshuflw */  c128.v = _mm_shufflelo_epi16 (m128_128, 0x1b);  dump128_16 (buf, "_mm_shuffelo_epi16", c128);  /* pshufhw */  c128.v = _mm_shufflehi_epi16 (m128_128, 0x1b);  dump128_16 (buf, "_mm_shuffehi_epi16", c128);}voiddump128_16 (char *buf, char *name, vecInLong x){  int i;  char *p = buf + strlen (buf);  sprintf (p, "%s ", name);  p += strlen (p);  for (i=0; i<8; i++)    {      sprintf (p, "%4.4x ", x.t[i]);      p += strlen (p);    }  strcat (p, "\n");}voiddump128_32 (char *buf, char *name, vecInLong x){  int i;  char *p = buf + strlen (buf);  sprintf (p, "%s ", name);  p += strlen (p);  for (i=0; i<4; i++)    {      sprintf (p, "%8.8x ", x.s[i]);      p += strlen (p);    }  strcat (p, "\n");}voiddump128_64 (char *buf, char *name, vecInLong x){  int i;  char *p = buf + strlen (buf);  sprintf (p, "%s ", name);  p += strlen (p);  for (i=0; i<2; i++)    {      sprintf (p, "%16.16llx ", x.u[i]);      p += strlen (p);    }  strcat (p, "\n");}voiddump128_128 (char *buf, char *name, vecInLong x){  int i;  char *p = buf + strlen (buf);  sprintf (p, "%s ", name);  p += strlen (p);  for (i=15; i>=0; i--)    {      /* This is cheating; we don't have a 128-bit int format code.	 Running the loop backwards to compensate for the	 little-endian layout. */      sprintf (p, "%2.2x", x.c[i]);      p += strlen (p);    }  strcat (p, "\n");}intcheck (const char *input, const char *reference[]){  int broken, i, j, len;  const char *p_input;  char *p_comparison;  int new_errors = 0;  p_comparison = &comparison[0];  p_input = input;  for (i = 0; *reference[i] != '\0'; i++)    {      broken = 0;      len = strlen (reference[i]);      for (j = 0; j < len; j++)	{	  /* Ignore the terminating NUL characters at the end of every string in 'reference[]'.  */	  if (!broken && *p_input != reference[i][j])	    {	      *p_comparison = '\0';	      strcat (p_comparison, " >>> ");	      p_comparison += strlen (p_comparison);	      new_errors++;	      broken = 1;	    }	  *p_comparison = *p_input;	  p_comparison++;	  p_input++;	}      if (broken)	{	  *p_comparison = '\0';	  strcat (p_comparison, "expected:\n");	  strcat (p_comparison, reference[i]);	  p_comparison += strlen (p_comparison);	}    }  *p_comparison = '\0';  strcat (p_comparison, new_errors ? "failure\n\n" : "O.K.\n\n") ;  errors += new_errors;  return 0;}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?