⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zamax.s

📁 Optimized GotoBLAS libraries
💻 S
字号:
/*********************************************************************//*                                                                   *//*             Optimized BLAS libraries                              *//*                     By Kazushige Goto <kgoto@tacc.utexas.edu>     *//*                                                                   *//* Copyright (c) The University of Texas, 2005. All rights reserved. *//* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING  *//* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF      *//* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE,              *//* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY  *//* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF     *//* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO   *//* THE USE OF THE SOFTWARE OR DOCUMENTATION.                         *//* Under no circumstances shall University be liable for incidental, *//* special, indirect, direct or consequential damages or loss of     *//* profits, interruption of business, or related expenses which may  *//* arise from use of Software or Documentation, including but not    *//* limited to those resulting from defects in Software and/or        *//* Documentation, or loss or inaccuracy of data of any kind.         *//*********************************************************************/#define ASSEMBLER#include "common.h"#include "version.h"#define N	$16#define X	$17#define INCX	$18#ifdef USEMAX#define CMPLT(a, b) cmptlt a, b#else#define CMPLT(a, b) cmptlt b, a#endif#define STACKSIZE 8 * 8	PROLOGUE	PROFCODE	.frame	$sp, STACKSIZE, $26, 0#ifdef F_INTERFACE	ldl	N,     0(N)		# n	ldl	INCX,  0(INCX)		# incx#endif	lda	$sp, -STACKSIZE($sp)	nop	.align 4	stt	$f2,   0($sp)	fclr	$f16	cmplt	$31, N,    $2	unop	stt	$f3,   8($sp)	fclr	$f17	cmplt	$31, INCX, $3	unop	stt	$f4,  16($sp)	fclr	$f18	SXADDQ	INCX, $31, INCX	unop	stt	$f5,  24($sp)	fclr	$f19	and	$2,  $3,  $0	unop	stt	$f6,  32($sp)	unop	stt	$f7,  40($sp)	stt	$f8,  48($sp)	stt	$f9,  56($sp)	fclr	$f0	beq	$0,  $End		# if (n <= 0) or (incx <= 0) return	.align 4	LD	$f20,  0 * SIZE(X)	LD	$f21,  1 * SIZE(X)	sra	N, 2, $1	addq	INCX, INCX, INCX	fabs	$f20, $f20	fabs	$f21, $f21	addt	$f20, $f21, $f0	ble	$1,  $L15	.align 4	lda	$1,  -1($1)	unop	addq	X, INCX, X	unop	LD	$f22,  0 * SIZE(X)	fmov	$f0,  $f1	LD	$f23,  1 * SIZE(X)	addq	X, INCX, X	LD	$f24,  0 * SIZE(X)	fmov	$f0,  $f2	LD	$f25,  1 * SIZE(X)	addq	X, INCX, X	LD	$f26,  0 * SIZE(X)	fmov	$f0,  $f3	LD	$f27,  1 * SIZE(X)	addq	X, INCX, X	fabs	$f20, $f8	fabs	$f21, $f9	fabs	$f22, $f10	fabs	$f23, $f11	fabs	$f24, $f12	fabs	$f25, $f13	fabs	$f26, $f14	fabs	$f27, $f15	ble	$1, $L14	.align 4	LD	$f20,  0 * SIZE(X)	LD	$f21,  1 * SIZE(X)	lda	$1,  -1($1)	addq	X, INCX, X	LD	$f22,  0 * SIZE(X)	LD	$f23,  1 * SIZE(X)	unop	addq	X, INCX, X	LD	$f24,  0 * SIZE(X)	LD	$f25,  1 * SIZE(X)	unop	addq	X, INCX, X	LD	$f26,  0 * SIZE(X)	LD	$f27,  1 * SIZE(X)	addq	X, INCX, X	ble	$1, $L13	.align 4$L12:	addt	$f8,  $f9,  $f16	unop	fabs	$f20, $f8	ldl	$31, 64 * SIZE(X)	addt	$f10, $f11, $f17	unop	fabs	$f21, $f9	LD	$f20,  0 * SIZE(X)	addt	$f12, $f13, $f18	LD	$f21,  1 * SIZE(X)	fabs	$f22, $f10	addq	X, INCX, X	addt	$f14, $f15, $f19	LD	$f22,  0 * SIZE(X)	fabs	$f23, $f11	unop	CMPLT($f0,  $f16), $f4	LD	$f23,  1 * SIZE(X)	fabs	$f24, $f12	addq	X, INCX, X	CMPLT($f1,  $f17), $f5	LD	$f24,  0 * SIZE(X)	fabs	$f25, $f13	unop	CMPLT($f2,  $f18), $f6	LD	$f25,  1 * SIZE(X)	fabs	$f26, $f14	addq	X, INCX, X	CMPLT($f3,  $f19), $f7	LD	$f26,  0 * SIZE(X)	fabs	$f27, $f15	unop	fcmovne	$f4, $f16, $f0	LD	$f27,  1 * SIZE(X)	addq	X, INCX, X	lda	$1,   -1($1)		# i --	fcmovne	$f5, $f17, $f1	fcmovne	$f6, $f18, $f2	fcmovne	$f7, $f19, $f3	bgt	$1,$L12	.align 4$L13:	addt	$f8,  $f9,  $f16	fabs	$f20, $f8	addt	$f10, $f11, $f17	fabs	$f21, $f9	addt	$f12, $f13, $f18	fabs	$f22, $f10	addt	$f14, $f15, $f19	fabs	$f23, $f11	CMPLT($f0,  $f16), $f4	fabs	$f24, $f12	CMPLT($f1,  $f17), $f5	fabs	$f25, $f13	CMPLT($f2,  $f18), $f6	fabs	$f26, $f14	CMPLT($f3,  $f19), $f7	fabs	$f27, $f15	fcmovne	$f4, $f16, $f0	fcmovne	$f5, $f17, $f1	fcmovne	$f6, $f18, $f2	fcmovne	$f7, $f19, $f3	.align 4	$L14:	addt	$f8,  $f9,  $f16	addt	$f10, $f11, $f17	addt	$f12, $f13, $f18	addt	$f14, $f15, $f19	CMPLT($f0,  $f16), $f4	CMPLT($f1,  $f17), $f5	CMPLT($f2,  $f18), $f6	CMPLT($f3,  $f19), $f7	fcmovne	$f4, $f16, $f0	fcmovne	$f5, $f17, $f1	fcmovne	$f6, $f18, $f2	fcmovne	$f7, $f19, $f3	CMPLT($f0,  $f1), $f16	CMPLT($f2,  $f3), $f17	fcmovne	$f16, $f1, $f0	fcmovne	$f17, $f3, $f2	CMPLT($f0,  $f2), $f16	fcmovne	$f16, $f2, $f0	.align 4$L15:	and	N, 3, $1	unop	unop	ble	$1,  $End	.align 4$L16:	LD	$f20,  0 * SIZE(X)	LD	$f21,  1 * SIZE(X)	unop	addq	X, INCX, X	fabs	$f20, $f29	fabs	$f21, $f30	addt	$f29, $f30, $f29	CMPLT($f0,  $f29), $f16	fcmovne	$f16, $f29, $f0	lda	$1,   -1($1)		# i --	bgt	$1, $L16	.align 4$End:	ldt	$f2,   0($sp)	ldt	$f3,   8($sp)	ldt	$f4,  16($sp)	ldt	$f5,  24($sp)	ldt	$f6,  32($sp)	ldt	$f7,  40($sp)	ldt	$f8,  48($sp)	ldt	$f9,  56($sp)	lda	$sp,  STACKSIZE($sp)	ret	EPILOGUE

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -