asum_fabs4p120_x1.c

来自「基于Blas CLapck的.用过的人知道是干啥的」· C语言 代码 · 共 42 行

C
42
字号
#include "atlas_misc.h"#include "atlas_prefetch.h"#include <math.h>#define myabs fabsTYPE ATL_UASUM(const int N, const TYPE *X, const int incX){   int n;   register TYPE t0=ATL_rzero, t1=ATL_rzero, t2=ATL_rzero, t3=ATL_rzero;   const TYPE *stX, *stX0 = X+N;   n = ATL_AlignOffset(N, X, ATL_sizeof, ATL_MulBySize(4));   if (n)  /* not aligned */   {      stX = X + n;      do t0 += myabs(*X); while(++X != stX);   }   n = N - n;   stX = X + ((n>>2)<<2);   if (X != stX)   {      do      {          ATL_pfl1R(X+120);          t0 += myabs(*X);          t1 += myabs(X[1]);          t2 += myabs(X[2]);          t3 += myabs(X[3]);          X += 4;      }      while (X != stX);      t0 += t1;      t2 += t3;      t0 += t2;   }   if (X != stX0)   {      do t0 += myabs(*X); while(++X != stX0);   }   return(t0);}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?