⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 izamax.s

📁 Optimized GotoBLAS libraries
💻 S
字号:
/*********************************************************************//*                                                                   *//*             Optimized BLAS libraries                              *//*                     By Kazushige Goto <kgoto@tacc.utexas.edu>     *//*                                                                   *//* Copyright (c) The University of Texas, 2005. All rights reserved. *//* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING  *//* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF      *//* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE,              *//* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY  *//* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF     *//* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO   *//* THE USE OF THE SOFTWARE OR DOCUMENTATION.                         *//* Under no circumstances shall University be liable for incidental, *//* special, indirect, direct or consequential damages or loss of     *//* profits, interruption of business, or related expenses which may  *//* arise from use of Software or Documentation, including but not    *//* limited to those resulting from defects in Software and/or        *//* Documentation, or loss or inaccuracy of data of any kind.         *//*********************************************************************/#define ASSEMBLER#include "common.h"#include "version.h"#define N	$16#define X	$17#define INCX	$18#define XX	$19#ifdef USEMAX#define CMPLT(a, b) cmptlt a, b#else#define CMPLT(a, b) cmptlt b, a#endif#define STACKSIZE 8 * 8	PROLOGUE	PROFCODE	.frame	$sp, STACKSIZE, $26, 0#ifdef F_INTERFACE	ldl	N,     0(N)		# n	ldl	INCX,  0(INCX)		# incx#endif	lda	$sp, -STACKSIZE($sp)	nop	.align 4	stt	$f2,   0($sp)	fclr	$f16	cmplt	$31, N,    $2	unop	stt	$f3,   8($sp)	fclr	$f17	cmplt	$31, INCX, $3	unop	stt	$f4,  16($sp)	fclr	$f18	SXADDQ	INCX, $31, INCX	unop	stt	$f5,  24($sp)	fclr	$f19	and	$2,  $3,  $2	clr	$0	stt	$f6,  32($sp)	mov	X, XX	stt	$f7,  40($sp)	stt	$f8,  48($sp)	stt	$f9,  56($sp)	fclr	$f0	beq	$2,  $End		# if (n <= 0) or (incx <= 0) return	.align 4	LD	$f20,  0 * SIZE(X)	LD	$f21,  1 * SIZE(X)	sra	N, 2, $1	addq	INCX, INCX, INCX	fabs	$f20, $f20	fabs	$f21, $f21	addt	$f20, $f21, $f0	ble	$1,  $L15	.align 4	lda	$1,  -1($1)	unop	addq	X, INCX, X	unop	LD	$f22,  0 * SIZE(X)	fmov	$f0,  $f1	LD	$f23,  1 * SIZE(X)	addq	X, INCX, X	LD	$f24,  0 * SIZE(X)	fmov	$f0,  $f2	LD	$f25,  1 * SIZE(X)	addq	X, INCX, X	LD	$f26,  0 * SIZE(X)	fmov	$f0,  $f3	LD	$f27,  1 * SIZE(X)	addq	X, INCX, X	fabs	$f20, $f8	fabs	$f21, $f9	fabs	$f22, $f10	fabs	$f23, $f11	fabs	$f24, $f12	fabs	$f25, $f13	fabs	$f26, $f14	fabs	$f27, $f15	ble	$1, $L14	.align 4	LD	$f20,  0 * SIZE(X)	LD	$f21,  1 * SIZE(X)	lda	$1,  -1($1)	addq	X, INCX, X	LD	$f22,  0 * SIZE(X)	LD	$f23,  1 * SIZE(X)	unop	addq	X, INCX, X	LD	$f24,  0 * SIZE(X)	LD	$f25,  1 * SIZE(X)	unop	addq	X, INCX, X	LD	$f26,  0 * SIZE(X)	LD	$f27,  1 * SIZE(X)	addq	X, INCX, X	ble	$1, $L13	.align 4$L12:	addt	$f8,  $f9,  $f16	unop	fabs	$f20, $f8	ldl	$31, 64 * SIZE(X)	addt	$f10, $f11, $f17	unop	fabs	$f21, $f9	LD	$f20,  0 * SIZE(X)	addt	$f12, $f13, $f18	LD	$f21,  1 * SIZE(X)	fabs	$f22, $f10	addq	X, INCX, X	addt	$f14, $f15, $f19	LD	$f22,  0 * SIZE(X)	fabs	$f23, $f11	unop	CMPLT($f0,  $f16), $f4	LD	$f23,  1 * SIZE(X)	fabs	$f24, $f12	addq	X, INCX, X	CMPLT($f1,  $f17), $f5	LD	$f24,  0 * SIZE(X)	fabs	$f25, $f13	unop	CMPLT($f2,  $f18), $f6	LD	$f25,  1 * SIZE(X)	fabs	$f26, $f14	addq	X, INCX, X	CMPLT($f3,  $f19), $f7	LD	$f26,  0 * SIZE(X)	fabs	$f27, $f15	unop	fcmovne	$f4, $f16, $f0	LD	$f27,  1 * SIZE(X)	addq	X, INCX, X	lda	$1,   -1($1)		# i --	fcmovne	$f5, $f17, $f1	fcmovne	$f6, $f18, $f2	fcmovne	$f7, $f19, $f3	bgt	$1,$L12	.align 4$L13:	addt	$f8,  $f9,  $f16	fabs	$f20, $f8	addt	$f10, $f11, $f17	fabs	$f21, $f9	addt	$f12, $f13, $f18	fabs	$f22, $f10	addt	$f14, $f15, $f19	fabs	$f23, $f11	CMPLT($f0,  $f16), $f4	fabs	$f24, $f12	CMPLT($f1,  $f17), $f5	fabs	$f25, $f13	CMPLT($f2,  $f18), $f6	fabs	$f26, $f14	CMPLT($f3,  $f19), $f7	fabs	$f27, $f15	fcmovne	$f4, $f16, $f0	fcmovne	$f5, $f17, $f1	fcmovne	$f6, $f18, $f2	fcmovne	$f7, $f19, $f3	.align 4	$L14:	addt	$f8,  $f9,  $f16	addt	$f10, $f11, $f17	addt	$f12, $f13, $f18	addt	$f14, $f15, $f19	CMPLT($f0,  $f16), $f4	CMPLT($f1,  $f17), $f5	CMPLT($f2,  $f18), $f6	CMPLT($f3,  $f19), $f7	fcmovne	$f4, $f16, $f0	fcmovne	$f5, $f17, $f1	fcmovne	$f6, $f18, $f2	fcmovne	$f7, $f19, $f3	CMPLT($f0,  $f1), $f16	CMPLT($f2,  $f3), $f17	fcmovne	$f16, $f1, $f0	fcmovne	$f17, $f3, $f2	CMPLT($f0,  $f2), $f16	fcmovne	$f16, $f2, $f0	.align 4$L15:	and	N, 3, $1	unop	unop	ble	$1,  $L20	.align 4$L16:	LD	$f20,  0 * SIZE(X)	LD	$f21,  1 * SIZE(X)	unop	addq	X, INCX, X	fabs	$f20, $f29	fabs	$f21, $f30	addt	$f29, $f30, $f29	CMPLT($f0,  $f29), $f16	fcmovne	$f16, $f29, $f0	lda	$1,   -1($1)		# i --	bgt	$1, $L16	.align 4$L20:	sra	N, 2, $1	ble	$1,  $L40	.align 4	LD	$f10,  0 * SIZE(XX)	LD	$f11,  1 * SIZE(XX)	addq	XX, INCX, XX	LD	$f12,  0 * SIZE(XX)	LD	$f13,  1 * SIZE(XX)	addq	XX, INCX, XX	LD	$f14,  0 * SIZE(XX)	LD	$f15,  1 * SIZE(XX)	addq	XX, INCX, XX	LD	$f16,  0 * SIZE(XX)	LD	$f17,  1 * SIZE(XX)	addq	XX, INCX, XX	fabs	$f10, $f18	fabs	$f11, $f19	fabs	$f12, $f20	fabs	$f13, $f21	lda	$1,  -1($1)	ble	$1, $L23	.align 4$L22:	LD	$f10,  0 * SIZE(XX)	fabs	$f14, $f22	LD	$f11,  1 * SIZE(XX)	addq	XX, INCX, XX	LD	$f12,  0 * SIZE(XX)	fabs	$f15, $f23	LD	$f13,  1 * SIZE(XX)	addq	XX, INCX, XX	LD	$f14,  0 * SIZE(XX)	fabs	$f16, $f24	LD	$f15,  1 * SIZE(XX)	addq	XX, INCX, XX	LD	$f16,  0 * SIZE(XX)	fabs	$f17, $f25	LD	$f17,  1 * SIZE(XX)	addq	XX, INCX, XX	addt	$f18, $f19, $f4	addt	$f20, $f21, $f5	addt	$f22, $f23, $f6	addt	$f24, $f25, $f7	cmpteq	$f0, $f4, $f26	cmpteq	$f0, $f5, $f27	cmpteq	$f0, $f6, $f28	cmpteq	$f0, $f7, $f29	fabs	$f10, $f18	lda	$0,    1($0)	lda	$1,   -1($1)		# i --	fbne	$f26, $End	fabs	$f11, $f19	lda	$0,    1($0)	unop	fbne	$f27, $End	fabs	$f12, $f20	lda	$0,    1($0)	unop	fbne	$f28, $End	fabs	$f13, $f21	lda	$0,    1($0)	fbne	$f29, $End	bgt	$1,  $L22	.align 4$L23:	fabs	$f14, $f22	fabs	$f15, $f23	fabs	$f16, $f24	fabs	$f17, $f25	addt	$f18, $f19, $f4	addt	$f20, $f21, $f5	addt	$f22, $f23, $f6	addt	$f24, $f25, $f7	cmpteq	$f0, $f4, $f26	cmpteq	$f0, $f5, $f27	cmpteq	$f0, $f6, $f28	cmpteq	$f0, $f7, $f29	lda	$0,    1($0)	fbne	$f26, $End	lda	$0,    1($0)	fbne	$f27, $End	lda	$0,    1($0)	fbne	$f28, $End	lda	$0,    1($0)	fbne	$f29, $End	.align 4$L40:	LD	$f10,  0 * SIZE(XX)	LD	$f11,  1 * SIZE(XX)	addq	XX, INCX, XX	fabs	$f10, $f18	fabs	$f11, $f19	addt	$f18, $f19, $f18	cmpteq	$f0, $f18, $f2	lda	$0,    1($0)	fbne	$f2,  $End	br	$31,  $L40	.align 4$End:	ldt	$f2,   0($sp)	ldt	$f3,   8($sp)	ldt	$f4,  16($sp)	ldt	$f5,  24($sp)	ldt	$f6,  32($sp)	ldt	$f7,  40($sp)	ldt	$f8,  48($sp)	ldt	$f9,  56($sp)	lda	$sp,  STACKSIZE($sp)	ret	EPILOGUE

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -