⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nrm2.s

📁 Optimized GotoBLAS libraries
💻 S
字号:
/*********************************************************************//*                                                                   *//*             Optimized BLAS libraries                              *//*                     By Kazushige Goto <kgoto@tacc.utexas.edu>     *//*                                                                   *//* Copyright (c) The University of Texas, 2005. All rights reserved. *//* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING  *//* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF      *//* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE,              *//* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY  *//* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF     *//* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO   *//* THE USE OF THE SOFTWARE OR DOCUMENTATION.                         *//* Under no circumstances shall University be liable for incidental, *//* special, indirect, direct or consequential damages or loss of     *//* profits, interruption of business, or related expenses which may  *//* arise from use of Software or Documentation, including but not    *//* limited to those resulting from defects in Software and/or        *//* Documentation, or loss or inaccuracy of data of any kind.         *//*********************************************************************/#define ASSEMBLER#include "common.h"#include "version.h"	.set noat	.set noreorder.text	.align 5	.globl NAME	.ent NAMENAME:	.frame $sp, 32, $26, 0	ldgp	$gp,   0($27)	lda	$sp, -32($sp)	ldl	$16,   0($16)	ldl	$18,   0($18)	ldah	$4,  0x3f80	fclr	$f0			# norm = 0.	stq	$26,   0($sp)	stt	$f2,   8($sp)	stl	$4,   16($sp)	.prologue 1#ifdef PROFILE	lda	$28, _mcount	jsr	$28, ($28), _mcount	unop	unop#endif	ble	$16, $End	ble	$18, $End#ifndef COMPLEX	LD	$f0,   0($17)	cmpeq	$16, 1, $0	fabs	$f0, $f0	bne	$0,  $End#else	addl	$18, $18, $18#endif	fclr	$f2	lda	$27,  sqrt	lds	$f30, 16($sp)		# one load	fmov	$f30, $f16		# ssq = 1.	.align 4$MainLoop:	LD	$f17,  0($17)		# xi = *x#ifndef COMPLEX	SXADDQ	$18, $17, $17		# x += incx	nop	lda	$16,  -1($16)		# n --#endif	fabs	$f17,  $f0		# absxi = fabs(xi)#ifndef COMPLEX	fbeq	$f17, $MainLoopEnd#else	fbeq	$f17, $MainLoopNext#endif	cmptle	$f0,  $f2, $f10		# if (absxi < scale)	fbeq	$f10, $ABSXI_LT_SCALE	.align 4$ABSXI_GE_SCALE:	DIV	$f0,  $f2,  $f10	# divxi = (absxi / scale)	MUL	$f10, $f10, $f10	# divxi = divxi * divxi	ADD	$f16, $f10, $f16	# ssq += divxi#ifndef COMPLEX	bgt	$16,  $MainLoop	br	$31,  $Finish#else	br	$31, $MainLoopNext#endif	.align 4$ABSXI_LT_SCALE:	DIV	$f2,  $f0,  $f10	# divxi = (scale / absxi)	fmov	$f0,  $f2		# scale = absxi	MUL	$f10, $f10, $f10	# divxi = divxi * divxi	MUL	$f16, $f10, $f10	# ssq = ssq * divxi	ADD	$f30, $f10, $f16	# ssq = 1. + ssq#ifdef COMPLEX	.align 4$MainLoopNext:	LD	$f17,  SIZE($17)	# xi = *x	SXADDQ	$18, $17, $17		# x += incx	nop	lda	$16,  -1($16)		# n --	fabs	$f17,  $f0		# absxi = fabs(xi)	fbeq	$f17, $MainLoopEnd	cmptle	$f0,  $f2, $f10		# if (absxi < scale)	fbeq	$f10, $ABSXI_LT_SCALE2	.align 4$ABSXI_GE_SCALE2:	DIV	$f0,  $f2,  $f10	# divxi = (absxi / scale)	MUL	$f10, $f10, $f10	# divxi = divxi * divxi	ADD	$f16, $f10, $f16	# ssq += divxi	bgt	$16,  $MainLoop	br	$31,  $Finish	.align 4$ABSXI_LT_SCALE2:	DIV	$f2,  $f0,  $f10	# divxi = (scale / absxi)	fmov	$f0,  $f2		# scale = absxi	MUL	$f10, $f10, $f10	# divxi = divxi * divxi	MUL	$f16, $f10, $f10	# ssq = ssq * divxi	ADD	$f30, $f10, $f16	# ssq = 1. + ssq	.align 4#endif$MainLoopEnd:	bgt	$16,$MainLoop	.align 4$Finish:	jsr	$26, ($27), sqrt	ldgp	$gp,  0($26)	MUL	$f2,$f0,$f0	.align 4$End:	ldq	$26,  0($sp)	ldt	$f2,  8($sp)	lda	$sp, 32($sp)	ret	.end NAME

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -