⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 amax.s

📁 Optimized GotoBLAS libraries
💻 S
字号:
/*********************************************************************//*                                                                   *//*             Optimized BLAS libraries                              *//*                     By Kazushige Goto <kgoto@tacc.utexas.edu>     *//*                                                                   *//* Copyright (c) The University of Texas, 2005. All rights reserved. *//* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING  *//* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF      *//* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE,              *//* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY  *//* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF     *//* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO   *//* THE USE OF THE SOFTWARE OR DOCUMENTATION.                         *//* Under no circumstances shall University be liable for incidental, *//* special, indirect, direct or consequential damages or loss of     *//* profits, interruption of business, or related expenses which may  *//* arise from use of Software or Documentation, including but not    *//* limited to those resulting from defects in Software and/or        *//* Documentation, or loss or inaccuracy of data of any kind.         *//*********************************************************************/#define ASSEMBLER#include "common.h"#define N	%i0#define X	%i1#define INCX	%i2#define I	%i3#ifdef DOUBLE#define c1	%f0#define c2	%f2#define c3	%f4#define c4	%f6#define t1	%f8#define t2	%f10#define t3	%f12#define t4	%f14#define a1	%f16#define a2	%f18#define a3	%f20#define a4	%f22#define a5	%f24#define a6	%f26#define a7	%f28#define a8	%f30#else#define c1	%f0#define c2	%f1#define c3	%f2#define c4	%f3#define t1	%f4#define t2	%f5#define t3	%f6#define t4	%f7#define a1	%f8#define a2	%f9#define a3	%f10#define a4	%f11#define a5	%f12#define a6	%f13#define a7	%f14#define a8	%f15#endif#ifndef USE_MIN#define FCMOV	FMOVG#else#define FCMOV	FMOVL#endif	PROLOGUE	SAVESP#ifdef F_INTERFACE	ld	[N], N	ld	[INCX], INCX#endif	st	%g0, [%fp + STACK_START + 0]#ifdef DOUBLE	st	%g0, [%fp + STACK_START + 4]#endif	cmp	N, 0	ble	.LL20	LDF	[%fp + STACK_START], c1	cmp	INCX, 0	ble	.LL20	sll	INCX, BASE_SHIFT, INCX	add	N, -1, N	LDF	[X], c4	add	X, INCX, X	cmp	N, 0	ble	.LL20	FABS	c4, c1	FABS	c4, c2	FABS	c4, c3	FABS	c4, c4	cmp	INCX, SIZE	bne	.LL50	nop	sra	N, 3, I	cmp	I, 0	ble,pn	%icc, .LL15	nop	LDF	[X +  0 * SIZE], a1	LDF	[X +  1 * SIZE], a2	LDF	[X +  2 * SIZE], a3	LDF	[X +  3 * SIZE], a4	LDF	[X +  4 * SIZE], a5	add	I, -1, I	LDF	[X +  5 * SIZE], a6	cmp	I, 0	LDF	[X +  6 * SIZE], a7	LDF	[X +  7 * SIZE], a8	ble,pt	%icc, .LL12	add	X, 8 * SIZE, X#define PREFETCHSIZE 40.LL11:	prefetch [X + PREFETCHSIZE * SIZE], 0	FABS	a1, t1	LDF	[X +  0 * SIZE], a1	FABS	a2, t2	LDF	[X +  1 * SIZE], a2	FABS	a3, t3	LDF	[X +  2 * SIZE], a3	FABS	a4, t4	LDF	[X +  3 * SIZE], a4	FCMP	%fcc0, t1, c1	FCMP	%fcc1, t2, c2	FCMP	%fcc2, t3, c3	FCMP	%fcc3, t4, c4	FCMOV	%fcc0, t1, c1	FCMOV	%fcc1, t2, c2	FCMOV	%fcc2, t3, c3	FCMOV	%fcc3, t4, c4	FABS	a5, t1	LDF	[X +  4 * SIZE], a5	FABS	a6, t2	LDF	[X +  5 * SIZE], a6	FABS	a7, t3	LDF	[X +  6 * SIZE], a7	FABS	a8, t4	LDF	[X +  7 * SIZE], a8	FCMP	%fcc0, t1, c1	FCMP	%fcc1, t2, c2	FCMP	%fcc2, t3, c3	FCMP	%fcc3, t4, c4	FCMOV	%fcc0, t1, c1	add	I, -1, I	FCMOV	%fcc1, t2, c2	cmp	I, 0	FCMOV	%fcc2, t3, c3	FCMOV	%fcc3, t4, c4	bg,pt	%icc, .LL11	add	X, 8 * SIZE, X.LL12:	FABS	a1, t1	FABS	a2, t2	FABS	a3, t3	FABS	a4, t4	FCMP	%fcc0, t1, c1	FCMP	%fcc1, t2, c2	FCMP	%fcc2, t3, c3	FCMP	%fcc3, t4, c4	FCMOV	%fcc0, t1, c1	FCMOV	%fcc1, t2, c2	FCMOV	%fcc2, t3, c3	FCMOV	%fcc3, t4, c4	FABS	a5, t1	FABS	a6, t2	FABS	a7, t3	FABS	a8, t4	FCMP	%fcc0, t1, c1	FCMP	%fcc1, t2, c2	FCMP	%fcc2, t3, c3	FCMP	%fcc3, t4, c4	FCMOV	%fcc0, t1, c1	FCMOV	%fcc1, t2, c2	FCMOV	%fcc2, t3, c3	FCMOV	%fcc3, t4, c4.LL15:	and	N, 7, I	cmp	I,  0	ble,a,pn %icc, .LL19	nop.LL16:	LDF	[X +  0 * SIZE], a1	FABS	a1, t1	FCMP	%fcc0, t1, c1	FCMOV	%fcc0, t1, c1	add	I, -1, I	cmp	I, 0	bg,pt	%icc, .LL16	add	X, 1 * SIZE, X.LL19:	FCMP	%fcc0, c2, c1	FCMP	%fcc1, c4, c3	FCMOV	%fcc0, c2, c1	FCMOV	%fcc1, c4, c3	FCMP	%fcc0, c3, c1	FCMOV	%fcc0, c3, c1.LL20:#if !defined(DOUBLE) && defined(NEED_F2CCONV) && defined(F_INTERFACE_F2C)	fstod	c1, c1#endif	return	%i7 + 8	clr	%g0.LL50:	sra	N, 3, I	cmp	I, 0	ble,pn	%icc, .LL55	nop	LDF	[X +  0 * SIZE], a1	add	X, INCX, X	LDF	[X +  0 * SIZE], a2	add	X, INCX, X	LDF	[X +  0 * SIZE], a3	add	X, INCX, X	LDF	[X +  0 * SIZE], a4	add	X, INCX, X	LDF	[X +  0 * SIZE], a5	add	X, INCX, X	LDF	[X +  0 * SIZE], a6	add	X, INCX, X	add	I, -1, I	LDF	[X +  0 * SIZE], a7	cmp	I, 0	add	X, INCX, X	LDF	[X +  0 * SIZE], a8	ble,pt	%icc, .LL52	add	X, INCX, X.LL51:	FABS	a1, t1	LDF	[X +  0 * SIZE], a1	add	X, INCX, X	FABS	a2, t2	LDF	[X +  0 * SIZE], a2	add	X, INCX, X	FABS	a3, t3	LDF	[X +  0 * SIZE], a3	add	X, INCX, X	FABS	a4, t4	LDF	[X +  0 * SIZE], a4	add	X, INCX, X	FCMP	%fcc0, t1, c1	FCMP	%fcc1, t2, c2	FCMP	%fcc2, t3, c3	FCMP	%fcc3, t4, c4	FCMOV	%fcc0, t1, c1	FCMOV	%fcc1, t2, c2	FCMOV	%fcc2, t3, c3	FCMOV	%fcc3, t4, c4	FABS	a5, t1	LDF	[X +  0 * SIZE], a5	add	X, INCX, X	FABS	a6, t2	LDF	[X +  0 * SIZE], a6	add	X, INCX, X	FABS	a7, t3	LDF	[X +  0 * SIZE], a7	add	X, INCX, X	FABS	a8, t4	LDF	[X +  0 * SIZE], a8	FCMP	%fcc0, t1, c1	FCMP	%fcc1, t2, c2	FCMP	%fcc2, t3, c3	FCMP	%fcc3, t4, c4	FCMOV	%fcc0, t1, c1	add	I, -1, I	FCMOV	%fcc1, t2, c2	cmp	I, 0	FCMOV	%fcc2, t3, c3	FCMOV	%fcc3, t4, c4	bg,pt	%icc, .LL51	add	X, INCX, X.LL52:	FABS	a1, t1	FABS	a2, t2	FABS	a3, t3	FABS	a4, t4	FCMP	%fcc0, t1, c1	FCMP	%fcc1, t2, c2	FCMP	%fcc2, t3, c3	FCMP	%fcc3, t4, c4	FCMOV	%fcc0, t1, c1	FCMOV	%fcc1, t2, c2	FCMOV	%fcc2, t3, c3	FCMOV	%fcc3, t4, c4	FABS	a5, t1	FABS	a6, t2	FABS	a7, t3	FABS	a8, t4	FCMP	%fcc0, t1, c1	FCMP	%fcc1, t2, c2	FCMP	%fcc2, t3, c3	FCMP	%fcc3, t4, c4	FCMOV	%fcc0, t1, c1	FCMOV	%fcc1, t2, c2	FCMOV	%fcc2, t3, c3	FCMOV	%fcc3, t4, c4.LL55:	and	N, 7, I	cmp	I,  0	ble,a,pn %icc, .LL59	nop.LL56:	LDF	[X +  0 * SIZE], a1	FABS	a1, t1	FCMP	%fcc0, t1, c1	FCMOV	%fcc0, t1, c1	add	I, -1, I	cmp	I, 0	bg,pt	%icc, .LL56	add	X, INCX, X.LL59:	FCMP	%fcc0, c2, c1	FCMP	%fcc1, c4, c3	FCMOV	%fcc0, c2, c1	FCMOV	%fcc1, c4, c3	FCMP	%fcc0, c3, c1	FCMOV	%fcc0, c3, c1#if !defined(DOUBLE) && defined(NEED_F2CCONV) && defined(F_INTERFACE_F2C)	fstod	c1, c1#endif	return	%i7 + 8	clr	%o0	EPILOGUE

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -