⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 trmm_kernel_rt.c

📁 Optimized GotoBLAS libraries
💻 C
字号:
/*********************************************************************//*                                                                   *//*             Optimized BLAS libraries                              *//*                     By Kazushige Goto <kgoto@tacc.utexas.edu>     *//*                                                                   *//* Copyright (c) The University of Texas, 2005. All rights reserved. *//* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING  *//* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF      *//* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE,              *//* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY  *//* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF     *//* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO   *//* THE USE OF THE SOFTWARE OR DOCUMENTATION.                         *//* Under no circumstances shall University be liable for incidental, *//* special, indirect, direct or consequential damages or loss of     *//* profits, interruption of business, or related expenses which may  *//* arise from use of Software or Documentation, including but not    *//* limited to those resulting from defects in Software and/or        *//* Documentation, or loss or inaccuracy of data of any kind.         *//*********************************************************************/#include <stdio.h>#include "common.h"static FLOAT dp1 = ONE;#ifdef CONJ#define GEMM_KERNEL   GEMM_KERNEL_R#else#define GEMM_KERNEL   GEMM_KERNEL_N#endifvoid CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT dummy1,#ifdef COMPLEX	   FLOAT dummy2,#endif			FLOAT *a, FLOAT *b, FLOAT *c, BLASLONG ldc, BLASLONG offset){  FLOAT *aa, *cc;  BLASLONG  newoffset;  BLASLONG i, j;  GEMM_BETA(m, n, 0, ZERO, #ifdef COMPLEX	    ZERO,#endif	    NULL, 0, NULL, 0, c, ldc);    j = (n >> GEMM_UNROLL_N_SHIFT);  newoffset = -offset;  while (j > 0) {        aa = a;    cc = c;        i = (m >> GEMM_UNROLL_M_SHIFT);        while (i > 0) {	GEMM_KERNEL(GEMM_UNROLL_M, GEMM_UNROLL_N, k - newoffset, dp1, #ifdef COMPLEX		    ZERO,#endif		    aa + GEMM_UNROLL_M * newoffset * COMPSIZE,		    b +  GEMM_UNROLL_N * newoffset * COMPSIZE, 		    cc,		    ldc); 		aa += GEMM_UNROLL_M * k * COMPSIZE;	cc += GEMM_UNROLL_M * COMPSIZE;	i --;    }        if (m & (GEMM_UNROLL_M - 1)) {      i = (GEMM_UNROLL_M >> 1);      while (i > 0) {	if (m & i) {	  GEMM_KERNEL(i, GEMM_UNROLL_N, k - newoffset, dp1, #ifdef COMPLEX		      ZERO,#endif		      aa + i * newoffset * COMPSIZE,		      b  +  GEMM_UNROLL_N * newoffset * COMPSIZE, 		      cc,		      ldc); 	  aa += i * k * COMPSIZE;	  cc += i * COMPSIZE;	}	i >>= 1;      }    }        newoffset += GEMM_UNROLL_N;    b += GEMM_UNROLL_N * k * COMPSIZE;    c += GEMM_UNROLL_N * ldc * COMPSIZE;    j --;  }    if (n & (GEMM_UNROLL_N - 1)) {    j = (GEMM_UNROLL_N >> 1);    while (j > 0) {      if (n & j) {		aa = a;	cc = c;		i = (m >> GEMM_UNROLL_M_SHIFT);		while (i > 0) {	  GEMM_KERNEL(GEMM_UNROLL_M, j, k - newoffset, dp1, #ifdef COMPLEX		      ZERO,#endif		      aa + GEMM_UNROLL_M * newoffset * COMPSIZE,		      b +  j * newoffset * COMPSIZE, 		      cc,		      ldc); 	  	  aa += GEMM_UNROLL_M * k * COMPSIZE;	  cc += GEMM_UNROLL_M * COMPSIZE;	  i --;	}		if (m & (GEMM_UNROLL_M - 1)) {	    i = (GEMM_UNROLL_M >> 1);	    while (i > 0) {	      if (m & i) {		GEMM_KERNEL(i, j, k - newoffset, dp1, #ifdef COMPLEX			    ZERO,#endif			    aa + i * newoffset * COMPSIZE,			    b +  j * newoffset * COMPSIZE, 			    cc,			    ldc); 				aa += i * k * COMPSIZE;		cc += i * COMPSIZE;	      }	      i >>= 1;	    }	}		newoffset += j;	b += j * k * COMPSIZE;	c += j * ldc * COMPSIZE;      }      j >>= 1;    }  }  return;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -