asum_fabs4p120_x1.c
来自「基于Blas CLapck的.用过的人知道是干啥的」· C语言 代码 · 共 42 行
C
42 行
#include "atlas_misc.h"#include "atlas_prefetch.h"#include <math.h>#define myabs fabsTYPE ATL_UASUM(const int N, const TYPE *X, const int incX){ int n; register TYPE t0=ATL_rzero, t1=ATL_rzero, t2=ATL_rzero, t3=ATL_rzero; const TYPE *stX, *stX0 = X+N; n = ATL_AlignOffset(N, X, ATL_sizeof, ATL_MulBySize(4)); if (n) /* not aligned */ { stX = X + n; do t0 += myabs(*X); while(++X != stX); } n = N - n; stX = X + ((n>>2)<<2); if (X != stX) { do { ATL_pfl1R(X+120); t0 += myabs(*X); t1 += myabs(X[1]); t2 += myabs(X[2]); t3 += myabs(X[3]); X += 4; } while (X != stX); t0 += t1; t2 += t3; t0 += t2; } if (X != stX0) { do t0 += myabs(*X); while(++X != stX0); } return(t0);}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?