⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gemm_beta.s

📁 Optimized GotoBLAS libraries
💻 S
字号:
/*********************************************************************//*                                                                   *//*             Optimized BLAS libraries                              *//*                     By Kazushige Goto <kgoto@tacc.utexas.edu>     *//*                                                                   *//* Copyright (c) The University of Texas, 2005. All rights reserved. *//* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING  *//* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF      *//* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE,              *//* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY  *//* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF     *//* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO   *//* THE USE OF THE SOFTWARE OR DOCUMENTATION.                         *//* Under no circumstances shall University be liable for incidental, *//* special, indirect, direct or consequential damages or loss of     *//* profits, interruption of business, or related expenses which may  *//* arise from use of Software or Documentation, including but not    *//* limited to those resulting from defects in Software and/or        *//* Documentation, or loss or inaccuracy of data of any kind.         *//*********************************************************************/#define ASSEMBLER#include "common.h"#include "version.h"	.set noat	.set noreorder.text	.align 5	.globl	CNAME	.ent	CNAMECNAME:	.frame	$sp, 0, $26, 0#ifdef PROFILE	ldgp	$gp, 0($27)	lda	$28, _mcount	jsr	$28, ($28), _mcount#endif	ldq	$18,   16($sp)	ble	$16, $End	ldl	$19,   24($sp)	ble	$17, $End#ifndef PROFILE	.prologue 0#else	.prologue 1#endif	fbeq	$f19, $BETA_EQ_ZERO		# if (beta == ZERO)	.align 4$BETA_NE_ZERO:	sra	$16, 3,  $2		# i = (m >> 3) 	mov	$18, $1			# c_offset = c	lda	$17,  -1($17)		# j --	ble	$2,$L52	.align 4$L51:	lds	$f31,  64($1)	lda	$2,    -1($2)	LD	$f14, 0*SIZE($1)	LD	$f15, 1*SIZE($1)	LD	$f16, 2*SIZE($1)	LD	$f17, 3*SIZE($1)	LD	$f18, 4*SIZE($1)	LD	$f11, 5*SIZE($1)	LD	$f21, 6*SIZE($1)	LD	$f22, 7*SIZE($1)	MUL	$f19, $f14, $f23	MUL	$f19, $f15, $f24	MUL	$f19, $f16, $f25	MUL	$f19, $f17, $f26	MUL	$f19, $f18, $f27	MUL	$f19, $f11, $f28	MUL	$f19, $f21, $f29	MUL	$f19, $f22, $f30	ST	$f23, 0*SIZE($1)	ST	$f24, 1*SIZE($1)	ST	$f25, 2*SIZE($1)	ST	$f26, 3*SIZE($1)	ST	$f27, 4*SIZE($1)	ST	$f28, 5*SIZE($1)	ST	$f29, 6*SIZE($1)	ST	$f30, 7*SIZE($1)	lda	$1,8*SIZE($1)	bgt	$2,$L51	.align 4$L52:	and	$16, 7,  $2	ble	$2,$L54	.align	4$L53:	LD	$f12,   0($1)	lda	$2,    -1($2)	MUL	$f19, $f12, $f23	ST	$f23,   0($1)	lda	$1,  SIZE($1)	bgt	$2,$L53	.align 4$L54:	SXADDQ	$19, $18, $18		# c += ldc	bgt	$17,$BETA_NE_ZERO	clr	$0	ret	.align 4$BETA_EQ_ZERO:	sra	$16, 3,  $2		# i = (m >> 3)	lda	$4,  8*SIZE($18) 	mov	$18, $1			# c_offset = c	lda	$17,  -1($17)		# j --	ble	$2,$L42	.align 4$L41:	ST	$f31, 0*SIZE($1)	ST	$f31, 1*SIZE($1)	ST	$f31, 2*SIZE($1)	ST	$f31, 3*SIZE($1)	ST	$f31, 4*SIZE($1)	ST	$f31, 5*SIZE($1)	ST	$f31, 6*SIZE($1)	ST	$f31, 7*SIZE($1)	lda	$2,    -1($2)	lda	$4,   8*SIZE($4)	lda	$1,  8*SIZE($1)	bgt	$2,$L41	.align 4$L42:	and	$16, 7,  $2	ble	$2,$L44	.align	4$L43:	lda	$2,    -1($2)	ST	$f31,   0($1)	lda	$1,  SIZE($1)	bgt	$2, $L43	.align 4$L44:	SXADDQ	$19, $18, $18		# c += ldc	bgt	$17,$BETA_EQ_ZERO	clr	$0	.align 4$End:	ret	.ident	VERSION	.end CNAME

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -