sse4_2-pcmpstr.h

来自「用于进行gcc测试」· C头文件 代码 · 共 448 行

H
448
字号
#include <nmmintrin.h>#include <string.h>#define CFLAG 0x00000001#define ZFLAG 0x00000002#define SFLAG 0x00000004#define OFLAG 0x00000008#define AFLAG 0x00000010#define PFLAG 0x00000020#define PCMPSTR_EQ(X, Y, RES) \  {							\    int __size = (sizeof (*X) ^ 3) * 8;			\    int __i, __j;					\    for (__i = 0; __i < __size; __i++)			\      for (__j = 0; __j < __size; __j++)		\        RES[__j][__i] = (X[__i] == Y[__j]);		\  }#define PCMPSTR_RNG(X, Y, RES) \  {							\    int __size = (sizeof (*X) ^ 3) * 8;			\    int __i, __j;					\    for (__j = 0; __j < __size; __j++)			\      for (__i = 0; __i < __size - 1; __i += 2)		\	{						\	  RES[__j][__i] = (Y[__j] >= X[__i]);		\	  RES[__j][__i+1] = (Y[__j] <= X[__i + 1]);	\	}						\  }static voidoverride_invalid (unsigned char res[16][16], int la, int lb,		  const int mode, int dim){  int i, j;  for (j = 0; j < dim; j++)    for (i = 0; i < dim; i++)      if (i < la && j >= lb)	res[j][i] = 0;      else if (i >= la)	switch ((mode & 0x0C))	  {	  case _SIDD_CMP_EQUAL_ANY:	  case _SIDD_CMP_RANGES:	    res[j][i] = 0;	    break;	  case _SIDD_CMP_EQUAL_EACH:	    res[j][i] = (j >= lb) ? 1: 0;	    break;	  case _SIDD_CMP_EQUAL_ORDERED:	    res[j][i] = 1;	    break;          }}static void  calc_matrix (__m128i a, int la, __m128i b, int lb, const int mode,	     unsigned char res[16][16]){  union    {      __m128i x;      signed char sc[16];      unsigned char uc[16];      signed short ss[8];      unsigned short us[8];    } d, s;  d.x = a;  s.x = b;  switch ((mode & 3))    {    case _SIDD_UBYTE_OPS:      if ((mode & 0x0C) == _SIDD_CMP_RANGES)	{	  PCMPSTR_RNG (d.uc, s.uc, res);	}      else	{	  PCMPSTR_EQ (d.uc, s.uc, res);	}      break;    case _SIDD_UWORD_OPS:      if ((mode & 0x0C) == _SIDD_CMP_RANGES)	{	  PCMPSTR_RNG (d.us, s.us, res);	}      else	{	  PCMPSTR_EQ (d.us, s.us, res);	}      break;    case _SIDD_SBYTE_OPS:      if ((mode & 0x0C) == _SIDD_CMP_RANGES)	{	  PCMPSTR_RNG (d.sc, s.sc, res);	}      else	{	  PCMPSTR_EQ (d.sc, s.sc, res);	}      break;    case _SIDD_SWORD_OPS:      if ((mode & 0x0C) == _SIDD_CMP_RANGES)	{	  PCMPSTR_RNG (d.ss, s.ss, res);	}      else	{	  PCMPSTR_EQ (d.ss, s.ss, res);	}      break;    }  override_invalid (res, la, lb, mode, (mode & 1) == 0 ? 16 : 8);}static int calc_res (__m128i a, int la, __m128i b, int lb, const int mode){  unsigned char mtx[16][16];  int i, j, k, dim, res = 0;  memset (mtx, 0, sizeof (mtx));  dim = (mode & 1) == 0 ? 16 : 8;  if (la < 0)    la = -la;  if (lb < 0)    lb = -lb;  if (la > dim)    la = dim;   if (lb > dim)    lb = dim;  calc_matrix (a, la, b, lb, mode, mtx);  switch ((mode & 0x0C))    {    case _SIDD_CMP_EQUAL_ANY:      for (i = 0; i < dim; i++)	for (j = 0; j < dim; j++)	  if (mtx[i][j])	    res |= (1 << i);      break;     case _SIDD_CMP_RANGES:      for (i = 0; i < dim; i += 2)	for(j = 0; j < dim; j++)	  if (mtx[j][i] && mtx[j][i+1])	    res |= (1 << j);      break;     case _SIDD_CMP_EQUAL_EACH:      for(i = 0; i < dim; i++)	if (mtx[i][i])	  res |= (1 << i);      break;     case _SIDD_CMP_EQUAL_ORDERED:      for(i = 0; i < dim; i++)	{	  unsigned char val = 1;	  for (j = 0, k = i; j < dim - i && k < dim; j++, k++)	    val &= mtx[k][j];	  	  if (val)	    res |= (1 << i);	  else	    res &= ~(1 << i);	}      break;    }  switch ((mode & 0x30))    {    case _SIDD_POSITIVE_POLARITY:    case _SIDD_MASKED_POSITIVE_POLARITY:      break;    case _SIDD_NEGATIVE_POLARITY:      res ^= -1;      break;    case _SIDD_MASKED_NEGATIVE_POLARITY:      for (i = 0; i < lb; i++)	if (res & (1 << i))	  res &= ~(1 << i);	else	  res |= (1 << i);      break;    }  return res & ((dim == 8) ? 0xFF : 0xFFFF);}static intcmp_flags (__m128i a, int la, __m128i b, int lb,	   int mode, int res2, int is_implicit){  int i;  int flags = 0;  int is_bytes_mode = (mode & 1) == 0;  union    {      __m128i x;      unsigned char uc[16];      unsigned short us[8];    } d, s;  d.x = a;  s.x = b;  /* CF: reset if (RES2 == 0), set otherwise.  */  if (res2 != 0)    flags |= CFLAG;  if (is_implicit)    {      /* ZF: set if any byte/word of src xmm operand is null, reset	 otherwise.	 SF: set if any byte/word of dst xmm operand is null, reset	 otherwise.  */      if (is_bytes_mode)	{	  for (i = 0; i < 16; i++)	    {	      if (s.uc[i] == 0)		flags |= ZFLAG;	      if (d.uc[i] == 0)		flags |= SFLAG;            }	}      else	{	  for (i = 0; i < 8; i++)	    {	      if (s.us[i] == 0)		flags |= ZFLAG;	      if (d.us[i] == 0)		flags |= SFLAG;            }        }    }  else    {      /* ZF: set if abs value of EDX/RDX < 16 (8), reset otherwise.	 SF: set if abs value of EAX/RAX < 16 (8), reset otherwise.  */      int max_ind = is_bytes_mode ? 16 : 8;      if (la < 0)	la = -la;      if (lb < 0)	lb = -lb;      if (lb < max_ind)	flags |= ZFLAG;      if (la < max_ind)	flags |= SFLAG;    }  /* OF: equal to RES2[0].  */  if ((res2 & 0x1))    flags |= OFLAG;  /* AF: Reset.     PF: Reset.  */  return flags;}static intcmp_indexed (__m128i a, int la, __m128i b, int lb,	     const int mode, int *res2){  int i, ndx;  int dim = (mode & 1) == 0 ? 16 : 8;  int r2;    r2 = calc_res (a, la, b, lb, mode);  ndx = dim;  if ((mode & 0x40))    {      for (i = dim - 1; i >= 0; i--)	if (r2 & (1 << i))	  {	    ndx = i;	    break;	  }    }  else    {      for (i = 0; i < dim; i++)	if ((r2 & (1 << i)))	  {	    ndx = i;	    break;	  }    }   *res2 = r2;   return ndx;}static __m128i cmp_masked (__m128i a, int la, __m128i b, int lb,	    const int mode, int *res2){  union    {      __m128i x;      char c[16];      short s[8];    } ret;  int i;  int dim = (mode & 1) == 0 ? 16 : 8;  union    {      int i;      char c[4];      short s[2];    } r2;  r2.i = calc_res (a, la, b, lb, mode);  memset (&ret, 0, sizeof (ret));  if (mode & 0x40)    {      for (i = 0; i < dim; i++)	if (dim == 8)	  ret.s [i] = (r2.i & (1 << i)) ? -1 : 0;	else	  ret.c [i] = (r2.i & (1 << i)) ? -1 : 0;    }  else    {      if (dim == 16)	ret.s[0] = r2.s[0];      else	ret.c[0] = r2.c[0];    }   *res2 = r2.i;   return ret.x;}static int calc_str_len (__m128i a, const int mode){  union    {      __m128i x;      char c[16];      short s[8];    } s;  int i;  int dim  = (mode & 1) == 0 ? 16 : 8;  s.x = a;  if ((mode & 1))    {      for (i = 0; i < dim; i++)	if (s.s[i] == 0)	  break;    }  else    {      for (i = 0; i < dim; i++)       if (s.c[i] == 0)	 break;    }  return i;}static inline intcmp_ei (__m128i *a, int la, __m128i *b, int lb,	const int mode, int *flags){  int res2;  int index = cmp_indexed (*a, la, *b, lb, mode, &res2);  if (flags != NULL)    *flags = cmp_flags (*a, la, *b, lb, mode, res2, 0);  return index;}static inline intcmp_ii (__m128i *a, __m128i *b, const int mode, int *flags){  int la, lb;  int res2;  int index;  la = calc_str_len (*a, mode);  lb = calc_str_len (*b, mode);  index = cmp_indexed (*a, la, *b, lb, mode, &res2);  if (flags != NULL)     *flags = cmp_flags (*a, la, *b, lb, mode, res2, 1);  return index;}static inline __m128icmp_em (__m128i *a, int la, __m128i *b, int lb,	const int mode, int *flags ){  int res2;  __m128i mask = cmp_masked (*a, la, *b, lb, mode, &res2);  if (flags != NULL)    *flags = cmp_flags (*a, la, *b, lb, mode, res2, 0);  return mask;}static inline __m128icmp_im (__m128i *a, __m128i *b, const int mode, int *flags){  int la, lb;  int res2;  __m128i mask;  la = calc_str_len (*a, mode);  lb = calc_str_len (*b, mode);  mask = cmp_masked (*a, la, *b, lb, mode, &res2);  if (flags != NULL)    *flags = cmp_flags (*a, la, *b, lb, mode, res2, 1);  return mask;}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?