axpy4p40_x1y1.c
来自「基于Blas CLapck的.用过的人知道是干啥的」· C语言 代码 · 共 58 行
C
58 行
#include "atlas_misc.h"#include "atlas_prefetch.h"void ATL_UAXPY(const int N, const SCALAR alpha0, const TYPE *X, const int incX, TYPE *Y, const int incY){ int n; const TYPE *stX, *stX0 = X + N; register TYPE alpha=alpha0; register TYPE x0, x1, y0, y1, y2, y3; n = ATL_AlignOffset(N, Y, ATL_sizeof, ATL_MulBySize(4)); if (n) { stX = X + n; do *Y++ += alpha * *X++; while(X != stX); n = N - n; } else n = N; if (n >= 20) { stX = X + ((n>>2)<<2) - 10; y0 = *Y; y1 = Y[1]; y2 = Y[2]; y3 = Y[3]; x0 = *X; x1 = X[1]; y0 += alpha * x0; y1 += alpha * x1; x0 = X[2]; x1 = X[3]; y2 += alpha * x0; y3 += alpha * x1; x0 = X[4]; x1 = X[5]; X += 6; do { ATL_pfl1R(X+40); ATL_pfl1W(Y+40); *Y = y0; Y[1] = y1; Y[2] = y2; Y[3] = y3; y0 = Y[4]; y1 = Y[5]; y2 = Y[6]; y3 = Y[7]; y0 += alpha * x0; x0 = *X; y1 += alpha * x1; x1 = X[1]; y2 += alpha * x0; x0 = X[2]; y3 += alpha * x1; x1 = X[3]; X += 4; Y += 4; } while (X != stX); *Y = y0; Y[1] = y1; Y[2] = y2; Y[3] = y3; y0 = Y[4]; y1 = Y[5]; y2 = Y[6]; y3 = Y[7]; y0 += alpha * x0; x0 = *X; y1 += alpha * x1; x1 = X[1]; X += 2; y2 += alpha * x0; y3 += alpha * x1; Y += 4; *Y = y0; Y[1] = y1; Y[2] = y2; Y[3] = y3; Y += 4; } if (X != stX0) do *Y++ += alpha * *X++; while(X != stX0);}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?