⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 copy.s

📁 Optimized GotoBLAS libraries
💻 S
字号:
/*********************************************************************//*                                                                   *//*             Optimized BLAS libraries                              *//*                     By Kazushige Goto <kgoto@tacc.utexas.edu>     *//*                                                                   *//* Copyright (c) The University of Texas, 2005. All rights reserved. *//* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING  *//* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF      *//* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE,              *//* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY  *//* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF     *//* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO   *//* THE USE OF THE SOFTWARE OR DOCUMENTATION.                         *//* Under no circumstances shall University be liable for incidental, *//* special, indirect, direct or consequential damages or loss of     *//* profits, interruption of business, or related expenses which may  *//* arise from use of Software or Documentation, including but not    *//* limited to those resulting from defects in Software and/or        *//* Documentation, or loss or inaccuracy of data of any kind.         *//*********************************************************************/#define ASSEMBLER#include "common.h"#include "version.h"#define N	$16#define X	$17#define INCX	$18#define Y	$19#define INCY	$20	PROLOGUE	PROFCODE	.frame	$sp, 0, $26, 0#ifndef PROFILE	.prologue 0#else	.prologue 1#endif	cmpeq	INCX,  1, $0	ble	N, $End#ifndef COMPLEX	sra	N,  4, $4#else	sra	N,  3, $4#endif	cmpeq	INCY,  1, $1	and	$0,  $1, $0	beq	$0, $Sub#ifndef COMPLEX	and	N, 15, $5#else	and	N,  7, $5#endif	ble	$4,  $Remain	LD	$f10,  0*SIZE(X)	LD	$f11,  1*SIZE(X)	LD	$f12,  2*SIZE(X)	LD	$f13,  3*SIZE(X)	LD	$f14,  4*SIZE(X)	LD	$f15,  5*SIZE(X)	LD	$f16,  6*SIZE(X)	LD	$f17,  7*SIZE(X)	LD	$f18,  8*SIZE(X)	LD	$f19,  9*SIZE(X)	LD	$f20, 10*SIZE(X)	LD	$f21, 11*SIZE(X)	LD	$f22, 12*SIZE(X)	LD	$f23, 13*SIZE(X)	LD	$f24, 14*SIZE(X)	LD	$f25, 15*SIZE(X)	subq	$4, 1, $4	lda	X, 16*SIZE(X)	ble	$4, $MainLoopEnd	.align 4$MainLoop:	ST	$f10,  0*SIZE(Y)	ST	$f11,  1*SIZE(Y)	ST	$f12,  2*SIZE(Y)	ST	$f13,  3*SIZE(Y)	LD	$f10,  0*SIZE(X)	LD	$f11,  1*SIZE(X)	LD	$f12,  2*SIZE(X)	LD	$f13,  3*SIZE(X)	ST	$f14,  4*SIZE(Y)	ST	$f15,  5*SIZE(Y)	ST	$f16,  6*SIZE(Y)	ST	$f17,  7*SIZE(Y)	LD	$f14,  4*SIZE(X)	LD	$f15,  5*SIZE(X)	LD	$f16,  6*SIZE(X)	LD	$f17,  7*SIZE(X)	ST	$f18,  8*SIZE(Y)	ST	$f19,  9*SIZE(Y)	ST	$f20, 10*SIZE(Y)	ST	$f21, 11*SIZE(Y)	LD	$f18,  8*SIZE(X)	LD	$f19,  9*SIZE(X)	LD	$f20, 10*SIZE(X)	LD	$f21, 11*SIZE(X)	ST	$f22, 12*SIZE(Y)	ST	$f23, 13*SIZE(Y)	ST	$f24, 14*SIZE(Y)	ST	$f25, 15*SIZE(Y)	LD	$f22, 12*SIZE(X)	LD	$f23, 13*SIZE(X)	LD	$f24, 14*SIZE(X)	LD	$f25, 15*SIZE(X)	subq	$4, 1, $4	lda	Y, 16*SIZE(Y)	lda	X, 16*SIZE(X)	bgt	$4, $MainLoop	.align 4$MainLoopEnd:	ST	$f10,  0*SIZE(Y)	ST	$f11,  1*SIZE(Y)	ST	$f12,  2*SIZE(Y)	ST	$f13,  3*SIZE(Y)	ST	$f14,  4*SIZE(Y)	ST	$f15,  5*SIZE(Y)	ST	$f16,  6*SIZE(Y)	ST	$f17,  7*SIZE(Y)	ST	$f18,  8*SIZE(Y)	ST	$f19,  9*SIZE(Y)	ST	$f20, 10*SIZE(Y)	ST	$f21, 11*SIZE(Y)	ST	$f22, 12*SIZE(Y)	ST	$f23, 13*SIZE(Y)	ST	$f24, 14*SIZE(Y)	ST	$f25, 15*SIZE(Y)	lda	Y, 16*SIZE(Y)	.align 4$Remain:	ble	$5, $End	.align 4$RemainLoop:#ifndef COMPLEX	LD	$f10,  0*SIZE(X)	lda	X,   1*SIZE(X)	ST	$f10,  0*SIZE(Y)	lda	Y,   1*SIZE(Y)#else	LD	$f10,  0*SIZE(X)	LD	$f11,  1*SIZE(X)	lda	X,   2*SIZE(X)	ST	$f10,  0*SIZE(Y)	ST	$f11,  1*SIZE(Y)	lda	Y,   2*SIZE(Y)#endif	subq	$5, 1, $5	bgt	$5, $RemainLoop	.align 4$End:	ret	.align 4$Sub:#ifdef COMPLEX	addq	INCX, INCX, INCX	addq	INCY, INCY, INCY	and	N,  7, $5#else	and	N, 15, $5#endif	ble	$4, $SubRemain	.align 4$SubMainLoop:#ifndef COMPLEX	LD	$f10,  0(X)	SXADDQ	INCX, X, X	LD	$f11,  0(X)	SXADDQ	INCX, X, X	LD	$f12,  0(X)	SXADDQ	INCX, X, X	LD	$f13,  0(X)	SXADDQ	INCX, X, X	LD	$f14,  0(X)	SXADDQ	INCX, X, X	LD	$f15,  0(X)	SXADDQ	INCX, X, X	LD	$f16,  0(X)	SXADDQ	INCX, X, X	LD	$f17,  0(X)	SXADDQ	INCX, X, X	LD	$f18,  0(X)	SXADDQ	INCX, X, X	LD	$f19,  0(X)	SXADDQ	INCX, X, X	LD	$f20,  0(X)	SXADDQ	INCX, X, X	LD	$f21,  0(X)	SXADDQ	INCX, X, X	LD	$f22,  0(X)	SXADDQ	INCX, X, X	LD	$f23,  0(X)	SXADDQ	INCX, X, X	LD	$f24,  0(X)	SXADDQ	INCX, X, X	LD	$f25,  0(X)	SXADDQ	INCX, X, X	ST	$f10,  0(Y)	SXADDQ	INCY, Y, Y	ST	$f11,  0(Y)	SXADDQ	INCY, Y, Y	ST	$f12,  0(Y)	SXADDQ	INCY, Y, Y	ST	$f13,  0(Y)	SXADDQ	INCY, Y, Y	ST	$f14,  0(Y)	SXADDQ	INCY, Y, Y	ST	$f15,  0(Y)	SXADDQ	INCY, Y, Y	ST	$f16,  0(Y)	SXADDQ	INCY, Y, Y	ST	$f17,  0(Y)	SXADDQ	INCY, Y, Y	ST	$f18,  0(Y)	SXADDQ	INCY, Y, Y	ST	$f19,  0(Y)	SXADDQ	INCY, Y, Y	ST	$f20,  0(Y)	SXADDQ	INCY, Y, Y	ST	$f21,  0(Y)	SXADDQ	INCY, Y, Y	ST	$f22,  0(Y)	SXADDQ	INCY, Y, Y	ST	$f23,  0(Y)	SXADDQ	INCY, Y, Y	ST	$f24,  0(Y)	SXADDQ	INCY, Y, Y	ST	$f25,  0(Y)	SXADDQ	INCY, Y, Y#else	LD	$f10,    0(X)	LD	$f11, SIZE(X)	SXADDQ	INCX, X, X	LD	$f12,    0(X)	LD	$f13, SIZE(X)	SXADDQ	INCX, X, X	LD	$f14,    0(X)	LD	$f15, SIZE(X)	SXADDQ	INCX, X, X	LD	$f16,    0(X)	LD	$f17, SIZE(X)	SXADDQ	INCX, X, X	LD	$f18,    0(X)	LD	$f19, SIZE(X)	SXADDQ	INCX, X, X	LD	$f20,    0(X)	LD	$f21, SIZE(X)	SXADDQ	INCX, X, X	LD	$f22,    0(X)	LD	$f23, SIZE(X)	SXADDQ	INCX, X, X	LD	$f24,    0(X)	LD	$f25, SIZE(X)	SXADDQ	INCX, X, X	ST	$f10,    0(Y)	ST	$f11, SIZE(Y)	SXADDQ	INCY, Y, Y	ST	$f12,    0(Y)	ST	$f13, SIZE(Y)	SXADDQ	INCY, Y, Y	ST	$f14,    0(Y)	ST	$f15, SIZE(Y)	SXADDQ	INCY, Y, Y	ST	$f16,    0(Y)	ST	$f17, SIZE(Y)	SXADDQ	INCY, Y, Y	ST	$f18,    0(Y)	ST	$f19, SIZE(Y)	SXADDQ	INCY, Y, Y	ST	$f20,    0(Y)	ST	$f21, SIZE(Y)	SXADDQ	INCY, Y, Y	ST	$f22,    0(Y)	ST	$f23, SIZE(Y)	SXADDQ	INCY, Y, Y	ST	$f24,    0(Y)	ST	$f25, SIZE(Y)	SXADDQ	INCY, Y, Y#endif	subq	$4, 1, $4	bgt	$4, $SubMainLoop	.align 4$SubRemain:	ble	$5, $SubEnd	.align 4 $SubRemainLoop:#ifndef COMPLEX	LD	$f10,  0(X)	SXADDQ	INCX, X, X	ST	$f10,  0(Y)	SXADDQ	INCY, Y, Y#else	LD	$f10,    0(X)	LD	$f11, SIZE(X)	SXADDQ	INCX, X, X	ST	$f10,    0(Y)	ST	$f11, SIZE(Y)	SXADDQ	INCY, Y, Y#endif	subq	$5, 1, $5	bgt	$5, $SubRemainLoop	.align 4$SubEnd:	ret	EPILOGUE

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -