ger.c
来自「Optimized GotoBLAS libraries」· C语言 代码 · 共 126 行
C
126 行
/*********************************************************************//* *//* Optimized BLAS libraries *//* By Kazushige Goto <kgoto@tacc.utexas.edu> *//* *//* Copyright (c) The University of Texas, 2005. All rights reserved. *//* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING *//* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF *//* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE, *//* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY *//* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF *//* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO *//* THE USE OF THE SOFTWARE OR DOCUMENTATION. *//* Under no circumstances shall University be liable for incidental, *//* special, indirect, direct or consequential damages or loss of *//* profits, interruption of business, or related expenses which may *//* arise from use of Software or Documentation, including but not *//* limited to those resulting from defects in Software and/or *//* Documentation, or loss or inaccuracy of data of any kind. *//*********************************************************************/#include <stdio.h>#include "common.h"#define P 4000#define Q 8int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG incy, FLOAT *a, BLASLONG lda, FLOAT *buffer){ BLASLONG is, js, i, j; BLASLONG min_m, min_n; FLOAT *bufX, *bufY, *xorig; xorig = x; for (js = 0; js < n; js += Q) { min_n = n - js; if (min_n > Q) min_n = Q; bufY = y + js; if (incy != 1) { bufY = buffer + P; j = (min_n >> 2); if (j > 0) { do { *(bufY + 0) = *y; y += incy; *(bufY + 1) = *y; y += incy; *(bufY + 2) = *y; y += incy; *(bufY + 3) = *y; y += incy; bufY += 4; j --; } while (j > 0); } j = (min_n & 3); if (j > 0) { do { *bufY = *y; y += incy; bufY ++; j --; } while (j > 0); } bufY = buffer + P; } x = xorig; for (is = 0; is < m; is += P) { min_m = m - is; if (min_m > P) min_m = P; bufX = x + is; if (incx != 1){ bufX = buffer; i = (min_m >> 2); if (i > 0) { do { *(bufX + 0) = *x; x += incx; *(bufX + 1) = *x; x += incx; *(bufX + 2) = *x; x += incx; *(bufX + 3) = *x; x += incx; bufX += 4; i --; } while (i > 0); } i = (min_m & 3); if (i > 0) { do { *bufX = *x; bufX ++; x += incx; i --; } while (i > 0); } bufX = buffer; } for (j = 0; j < min_n; j++) { AXPY_K(min_m, 0, 0, alpha * bufY[j], bufX, 1, a + is + (j + js) * lda, 1, NULL, 0); } } } return 0;}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?