⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sse_memory.c

📁 The Valgrind distribution has multiple tools. The most popular is the memory checking tool (called M
💻 C
📖 第 1 页 / 共 2 页
字号:
/* A program to test that SSE/SSE2 insns do not read memory they   should not.  Covers insns of the form OP %xmm, %xmm and OP memory,   %xmm only. */#include <stdio.h>#include <stdlib.h>#include <assert.h>#include <malloc.h>#include <string.h>typedef  unsigned char  V128[16];typedef  unsigned int   UInt;typedef  signed int     Int;typedef  unsigned char  UChar;typedef   struct {      V128 arg1;      V128 arg2;      V128 res;   }   RRArgs;typedef   struct {      V128 arg1;      V128 res;   }   RMArgs;static UChar randUChar ( void ){   static UInt seed = 80021;   seed = 1103515245 * seed + 12345;   return (seed >> 17) & 0xFF;}static void randomise ( UChar* p, Int n ){   Int i;   for (i = 0; i < n; i++)      p[i] = randUChar();}static void randV128 ( V128* v ){   Int i;   for (i = 0; i < 16; i++)      (*v)[i] = randUChar();}static void randRRArgs ( RRArgs* rra ){   randV128(&rra->arg1);   randV128(&rra->arg2);   randV128(&rra->res);}static void randRMArgs ( RMArgs* rra ){   randV128(&rra->arg1);   randV128(&rra->res);}static void showV128 ( V128* v ){   Int i;   for (i = 0; i < 16; i++)      printf("%02x", (Int)(*v)[i]);}static void showMaskedV128 ( V128* v, V128* mask ){   Int i;   for (i = 0; i < 16; i++)      printf("%02x", (Int)( ((*v)[i]) & ((*mask)[i]) ));}static void showRR ( char* op, RRArgs* rra, V128* rmask ){   printf("r %10s ", op);   showV128(&rra->arg1);   printf(" ");   showV128(&rra->arg2);   printf(" ");   showMaskedV128(&rra->res, rmask);   printf("\n");}static void showRM ( char* op, RMArgs* rra, UChar* mem, Int nMem, V128* rmask ){   Int i;   assert(nMem == 4 || nMem == 8 || nMem == 16 || nMem==0);   printf("m %10s ", op);   for (i = 0; i < nMem; i++)      printf("%02x", (Int)mem[i]);   printf(" ");   showV128(&rra->arg1);   printf(" ");   showMaskedV128(&rra->res, rmask );   printf("\n");}#define Wrapper_RegReg(OP)                 \   void r_r_##OP ( RRArgs* p )             \   {                                       \      __asm__ __volatile__("\n"            \         "\tmovups 0(%0), %%xmm6\n"        \         "\tmovups 16(%0), %%xmm7\n"       \         "\t" #OP " %%xmm6, %%xmm7\n"      \         "\tmovups %%xmm7, 32(%0)\n"       \         :                                 \         : "r" (p)                         \         : "memory", "xmm6", "xmm7", "cc"  \      );                                   \   }#define Wrapper_RegMem(OP)                 \   void r_m_##OP ( RMArgs* p, void* mem )  \   {                                       \      __asm__ __volatile__("\n"            \         "\tmovups 0(%0), %%xmm7\n"        \         "\t" #OP " 0(%1), %%xmm7\n"       \         "\tmovups %%xmm7, 16(%0)\n"       \         :                                 \         : "r" (p), "r" (mem)              \         : "memory", "xmm7", "cc"          \      );                                   \   }#define TEST_INSN(res_mask,mem_size,insn)  \                                           \Wrapper_RegReg(insn)                       \Wrapper_RegMem(insn)                       \                                           \void do_##insn ( void )                    \{                                          \   Int    i;                               \   UChar* buf;                             \   RRArgs rargs __attribute__((aligned(16))); \   RMArgs margs __attribute__((aligned(16))); \   for (i = 0; i < 5; i++) {               \      randRRArgs(&rargs);                  \      r_r_##insn(&rargs);                  \      showRR(#insn, &rargs, res_mask);     \   }                                       \   for (i = 0; i < 5; i++) {               \      randRMArgs(&margs);                  \      buf = memalign(16,mem_size);         \      randomise(buf,mem_size);             \      r_m_##insn(&margs,buf);              \      showRM(#insn, &margs, buf, mem_size, res_mask);\      free(buf);                           \   }                                       \}/* Note: these are little endian.  Hence first byte is the least   significant byte of lane zero. *//* Mask for insns where all result bits are non-approximated. */static V128 AllMask  = { 0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF,                          0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF };/* Mark for insns which produce approximated vector short results. */static V128 ApproxPS = { 0x00,0x00,0x80,0xFF, 0x00,0x00,0x80,0xFF,                          0x00,0x00,0x80,0xFF, 0x00,0x00,0x80,0xFF };/* Mark for insns which produce approximated scalar short results. */static V128 ApproxSS = { 0x00,0x00,0x80,0xFF, 0xFF,0xFF,0xFF,0xFF,                         0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF };#define PD 16#define SD 8#define PS 16#define SS 4/* ------------------------ SSE1 ------------------------ */TEST_INSN( &AllMask, PS,addps)TEST_INSN( &AllMask, SS,addss)TEST_INSN( &AllMask, PS,andnps)TEST_INSN( &AllMask, PS,andps)TEST_INSN( &AllMask, PS,cmpeqps)TEST_INSN( &AllMask, SS,cmpeqss)TEST_INSN( &AllMask, PS,cmpleps)TEST_INSN( &AllMask, SS,cmpless)TEST_INSN( &AllMask, PS,cmpltps)TEST_INSN( &AllMask, SS,cmpltss)TEST_INSN( &AllMask, PS,cmpneqps)TEST_INSN( &AllMask, SS,cmpneqss)TEST_INSN( &AllMask, PS,cmpnleps)TEST_INSN( &AllMask, SS,cmpnless)TEST_INSN( &AllMask, PS,cmpnltps)TEST_INSN( &AllMask, SS,cmpnltss)TEST_INSN( &AllMask, PS,cmpordps)TEST_INSN( &AllMask, SS,cmpordss)TEST_INSN( &AllMask, PS,cmpunordps)TEST_INSN( &AllMask, SS,cmpunordss)TEST_INSN( &AllMask, SS,comiss)//TEST_INSN( &AllMask, 0,cvtpi2ps)//TEST_INSN( &AllMask, 0,cvtps2pi)//TEST_INSN( &AllMask, 0,cvtsi2ss)//TEST_INSN( &AllMask, 0,cvtss2si)//TEST_INSN( &AllMask, 0,cvttps2pi)//TEST_INSN( &AllMask, 0,cvttss2si)TEST_INSN( &AllMask, PS,divps)TEST_INSN( &AllMask, SS,divss)TEST_INSN( &AllMask, PS,maxps)TEST_INSN( &AllMask, SS,maxss)TEST_INSN( &AllMask, PS,minps)TEST_INSN( &AllMask, SS,minss)TEST_INSN( &AllMask, 16,movaps)//TEST_INSN( &AllMask, 0,movhlps)//TEST_INSN( &AllMask, 0,movhps)//TEST_INSN( &AllMask, 0,movlhps)//TEST_INSN( &AllMask, 0,movlps)//TEST_INSN( &AllMask, 0,movmskps)//TEST_INSN( &AllMask, 0,movntps)//TEST_INSN( &AllMask, 0,movntq)TEST_INSN( &AllMask, 4,movss)TEST_INSN( &AllMask, 16,movups)TEST_INSN( &AllMask, PS,mulps)TEST_INSN( &AllMask, SS,mulss)TEST_INSN( &AllMask, PS,orps)//TEST_INSN( &AllMask, 0,pavgb) -- dup with sse2?//TEST_INSN( &AllMask, 0,pavgw) -- dup with sse2?//TEST_INSN( &AllMask, 0,pextrw)//TEST_INSN( &AllMask, 0,pinsrw)//TEST_INSN( &AllMask, 0,pmaxsw) -- dup with sse2?//TEST_INSN( &AllMask, 0,pmaxub) -- dup with sse2?//TEST_INSN( &AllMask, 0,pminsw) -- dup with sse2?//TEST_INSN( &AllMask, 0,pminub) -- dup with sse2?//TEST_INSN( &AllMask, 0,pmovmskb)//TEST_INSN( &AllMask, 0,pmulhuw) -- dup with sse2?TEST_INSN( &AllMask, 16,psadbw) // -- XXXXXXXXXXXXXXXX sse2 (xmm variant) not implemented!//TEST_INSN( &AllMask, 0,pshufw)TEST_INSN(&ApproxPS, PS,rcpps)TEST_INSN(&ApproxSS, SS,rcpss)TEST_INSN(&ApproxPS, PS,rsqrtps)TEST_INSN(&ApproxSS, SS,rsqrtss)//TEST_INSN( &AllMask, PS,shufps)TEST_INSN( &AllMask, PS,sqrtps)TEST_INSN( &AllMask, SS,sqrtss)TEST_INSN( &AllMask, PS,subps)TEST_INSN( &AllMask, SS,subss)TEST_INSN( &AllMask, SS,ucomiss)TEST_INSN( &AllMask, PS,unpckhps)TEST_INSN( &AllMask, PS,unpcklps)TEST_INSN( &AllMask, PS,xorps)/* ------------------------ SSE2 ------------------------ */TEST_INSN( &AllMask, PD,addpd)TEST_INSN( &AllMask, SD,addsd)TEST_INSN( &AllMask, PD,andnpd)TEST_INSN( &AllMask, PD,andpd)TEST_INSN( &AllMask, PD,cmpeqpd)TEST_INSN( &AllMask, SD,cmpeqsd)TEST_INSN( &AllMask, PD,cmplepd)TEST_INSN( &AllMask, SD,cmplesd)TEST_INSN( &AllMask, PD,cmpltpd)TEST_INSN( &AllMask, SD,cmpltsd)TEST_INSN( &AllMask, PD,cmpneqpd)TEST_INSN( &AllMask, SD,cmpneqsd)TEST_INSN( &AllMask, PD,cmpnlepd)TEST_INSN( &AllMask, SD,cmpnlesd)TEST_INSN( &AllMask, PD,cmpnltpd)TEST_INSN( &AllMask, SD,cmpnltsd)TEST_INSN( &AllMask, PD,cmpordpd)TEST_INSN( &AllMask, SD,cmpordsd)TEST_INSN( &AllMask, PD,cmpunordpd)TEST_INSN( &AllMask, SD,cmpunordsd)TEST_INSN( &AllMask, SD,comisd)TEST_INSN( &AllMask, 8,cvtdq2pd)TEST_INSN( &AllMask, 16,cvtdq2ps)TEST_INSN( &AllMask, 16,cvtpd2dq)//TEST_INSN( &AllMask, 0,cvtpd2pi)TEST_INSN( &AllMask, 16,cvtpd2ps)   /* reads 16 *///TEST_INSN( &AllMask, 0,cvtpi2pd)TEST_INSN( &AllMask, 16,cvtps2dq)  /* reads 16 */TEST_INSN( &AllMask, 8,cvtps2pd)   /* reads 8 *///TEST_INSN( &AllMask, 0,cvtsd2si)TEST_INSN( &AllMask, SD,cvtsd2ss)   /* reads SD *///TEST_INSN( &AllMask, 0,cvtsi2sd)TEST_INSN( &AllMask, SS,cvtss2sd)   /* reads SS */TEST_INSN( &AllMask, 16,cvttpd2dq)//TEST_INSN( &AllMask, 0,cvttpd2pi)TEST_INSN( &AllMask, 16,cvttps2dq)//TEST_INSN( &AllMask, 0,cvttsd2si)TEST_INSN( &AllMask, PD,divpd)TEST_INSN( &AllMask, SD,divsd)TEST_INSN( &AllMask, PD,maxpd)TEST_INSN( &AllMask, SD,maxsd)TEST_INSN( &AllMask, PD,minpd)TEST_INSN( &AllMask, SD,minsd)TEST_INSN( &AllMask, PD,movapd)//TEST_INSN( &AllMask, 8,movd)//TEST_INSN( &AllMask, 0,movdq2q)TEST_INSN( &AllMask, 16,movdqa)TEST_INSN( &AllMask, 16,movdqu)//TEST_INSN( &AllMask, 16,movhpd)//TEST_INSN( &AllMask, 16,movlpd)//TEST_INSN( &AllMask, 0,movmskpd)//TEST_INSN( &AllMask, 0,movntdq)//TEST_INSN( &AllMask, 0,movnti)//TEST_INSN( &AllMask, 0,movntpd)TEST_INSN( &AllMask, 8,movq)//TEST_INSN( &AllMask, 0,movq2dq)TEST_INSN( &AllMask, 8,movsd)TEST_INSN( &AllMask, 16,movupd)TEST_INSN( &AllMask, PD,mulpd)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -