📄 copy.s
字号:
/*********************************************************************//* *//* Optimized BLAS libraries *//* By Kazushige Goto <kgoto@tacc.utexas.edu> *//* *//* Copyright (c) The University of Texas, 2005. All rights reserved. *//* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING *//* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF *//* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE, *//* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY *//* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF *//* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO *//* THE USE OF THE SOFTWARE OR DOCUMENTATION. *//* Under no circumstances shall University be liable for incidental, *//* special, indirect, direct or consequential damages or loss of *//* profits, interruption of business, or related expenses which may *//* arise from use of Software or Documentation, including but not *//* limited to those resulting from defects in Software and/or *//* Documentation, or loss or inaccuracy of data of any kind. *//*********************************************************************/#define ASSEMBLER#include "common.h"#include "version.h"#define N $16#define X $17#define INCX $18#define Y $19#define INCY $20 PROLOGUE PROFCODE .frame $sp, 0, $26, 0#ifndef PROFILE .prologue 0#else .prologue 1#endif cmpeq INCX, 1, $0 ble N, $End#ifndef COMPLEX sra N, 4, $4#else sra N, 3, $4#endif cmpeq INCY, 1, $1 and $0, $1, $0 beq $0, $Sub#ifndef COMPLEX and N, 15, $5#else and N, 7, $5#endif ble $4, $Remain LD $f10, 0*SIZE(X) LD $f11, 1*SIZE(X) LD $f12, 2*SIZE(X) LD $f13, 3*SIZE(X) LD $f14, 4*SIZE(X) LD $f15, 5*SIZE(X) LD $f16, 6*SIZE(X) LD $f17, 7*SIZE(X) LD $f18, 8*SIZE(X) LD $f19, 9*SIZE(X) LD $f20, 10*SIZE(X) LD $f21, 11*SIZE(X) LD $f22, 12*SIZE(X) LD $f23, 13*SIZE(X) LD $f24, 14*SIZE(X) LD $f25, 15*SIZE(X) subq $4, 1, $4 lda X, 16*SIZE(X) ble $4, $MainLoopEnd .align 4$MainLoop: ST $f10, 0*SIZE(Y) ST $f11, 1*SIZE(Y) ST $f12, 2*SIZE(Y) ST $f13, 3*SIZE(Y) LD $f10, 0*SIZE(X) LD $f11, 1*SIZE(X) LD $f12, 2*SIZE(X) LD $f13, 3*SIZE(X) ST $f14, 4*SIZE(Y) ST $f15, 5*SIZE(Y) ST $f16, 6*SIZE(Y) ST $f17, 7*SIZE(Y) LD $f14, 4*SIZE(X) LD $f15, 5*SIZE(X) LD $f16, 6*SIZE(X) LD $f17, 7*SIZE(X) ST $f18, 8*SIZE(Y) ST $f19, 9*SIZE(Y) ST $f20, 10*SIZE(Y) ST $f21, 11*SIZE(Y) LD $f18, 8*SIZE(X) LD $f19, 9*SIZE(X) LD $f20, 10*SIZE(X) LD $f21, 11*SIZE(X) ST $f22, 12*SIZE(Y) ST $f23, 13*SIZE(Y) ST $f24, 14*SIZE(Y) ST $f25, 15*SIZE(Y) LD $f22, 12*SIZE(X) LD $f23, 13*SIZE(X) LD $f24, 14*SIZE(X) LD $f25, 15*SIZE(X) subq $4, 1, $4 lda Y, 16*SIZE(Y) lda X, 16*SIZE(X) bgt $4, $MainLoop .align 4$MainLoopEnd: ST $f10, 0*SIZE(Y) ST $f11, 1*SIZE(Y) ST $f12, 2*SIZE(Y) ST $f13, 3*SIZE(Y) ST $f14, 4*SIZE(Y) ST $f15, 5*SIZE(Y) ST $f16, 6*SIZE(Y) ST $f17, 7*SIZE(Y) ST $f18, 8*SIZE(Y) ST $f19, 9*SIZE(Y) ST $f20, 10*SIZE(Y) ST $f21, 11*SIZE(Y) ST $f22, 12*SIZE(Y) ST $f23, 13*SIZE(Y) ST $f24, 14*SIZE(Y) ST $f25, 15*SIZE(Y) lda Y, 16*SIZE(Y) .align 4$Remain: ble $5, $End .align 4$RemainLoop:#ifndef COMPLEX LD $f10, 0*SIZE(X) lda X, 1*SIZE(X) ST $f10, 0*SIZE(Y) lda Y, 1*SIZE(Y)#else LD $f10, 0*SIZE(X) LD $f11, 1*SIZE(X) lda X, 2*SIZE(X) ST $f10, 0*SIZE(Y) ST $f11, 1*SIZE(Y) lda Y, 2*SIZE(Y)#endif subq $5, 1, $5 bgt $5, $RemainLoop .align 4$End: ret .align 4$Sub:#ifdef COMPLEX addq INCX, INCX, INCX addq INCY, INCY, INCY and N, 7, $5#else and N, 15, $5#endif ble $4, $SubRemain .align 4$SubMainLoop:#ifndef COMPLEX LD $f10, 0(X) SXADDQ INCX, X, X LD $f11, 0(X) SXADDQ INCX, X, X LD $f12, 0(X) SXADDQ INCX, X, X LD $f13, 0(X) SXADDQ INCX, X, X LD $f14, 0(X) SXADDQ INCX, X, X LD $f15, 0(X) SXADDQ INCX, X, X LD $f16, 0(X) SXADDQ INCX, X, X LD $f17, 0(X) SXADDQ INCX, X, X LD $f18, 0(X) SXADDQ INCX, X, X LD $f19, 0(X) SXADDQ INCX, X, X LD $f20, 0(X) SXADDQ INCX, X, X LD $f21, 0(X) SXADDQ INCX, X, X LD $f22, 0(X) SXADDQ INCX, X, X LD $f23, 0(X) SXADDQ INCX, X, X LD $f24, 0(X) SXADDQ INCX, X, X LD $f25, 0(X) SXADDQ INCX, X, X ST $f10, 0(Y) SXADDQ INCY, Y, Y ST $f11, 0(Y) SXADDQ INCY, Y, Y ST $f12, 0(Y) SXADDQ INCY, Y, Y ST $f13, 0(Y) SXADDQ INCY, Y, Y ST $f14, 0(Y) SXADDQ INCY, Y, Y ST $f15, 0(Y) SXADDQ INCY, Y, Y ST $f16, 0(Y) SXADDQ INCY, Y, Y ST $f17, 0(Y) SXADDQ INCY, Y, Y ST $f18, 0(Y) SXADDQ INCY, Y, Y ST $f19, 0(Y) SXADDQ INCY, Y, Y ST $f20, 0(Y) SXADDQ INCY, Y, Y ST $f21, 0(Y) SXADDQ INCY, Y, Y ST $f22, 0(Y) SXADDQ INCY, Y, Y ST $f23, 0(Y) SXADDQ INCY, Y, Y ST $f24, 0(Y) SXADDQ INCY, Y, Y ST $f25, 0(Y) SXADDQ INCY, Y, Y#else LD $f10, 0(X) LD $f11, SIZE(X) SXADDQ INCX, X, X LD $f12, 0(X) LD $f13, SIZE(X) SXADDQ INCX, X, X LD $f14, 0(X) LD $f15, SIZE(X) SXADDQ INCX, X, X LD $f16, 0(X) LD $f17, SIZE(X) SXADDQ INCX, X, X LD $f18, 0(X) LD $f19, SIZE(X) SXADDQ INCX, X, X LD $f20, 0(X) LD $f21, SIZE(X) SXADDQ INCX, X, X LD $f22, 0(X) LD $f23, SIZE(X) SXADDQ INCX, X, X LD $f24, 0(X) LD $f25, SIZE(X) SXADDQ INCX, X, X ST $f10, 0(Y) ST $f11, SIZE(Y) SXADDQ INCY, Y, Y ST $f12, 0(Y) ST $f13, SIZE(Y) SXADDQ INCY, Y, Y ST $f14, 0(Y) ST $f15, SIZE(Y) SXADDQ INCY, Y, Y ST $f16, 0(Y) ST $f17, SIZE(Y) SXADDQ INCY, Y, Y ST $f18, 0(Y) ST $f19, SIZE(Y) SXADDQ INCY, Y, Y ST $f20, 0(Y) ST $f21, SIZE(Y) SXADDQ INCY, Y, Y ST $f22, 0(Y) ST $f23, SIZE(Y) SXADDQ INCY, Y, Y ST $f24, 0(Y) ST $f25, SIZE(Y) SXADDQ INCY, Y, Y#endif subq $4, 1, $4 bgt $4, $SubMainLoop .align 4$SubRemain: ble $5, $SubEnd .align 4 $SubRemainLoop:#ifndef COMPLEX LD $f10, 0(X) SXADDQ INCX, X, X ST $f10, 0(Y) SXADDQ INCY, Y, Y#else LD $f10, 0(X) LD $f11, SIZE(X) SXADDQ INCX, X, X ST $f10, 0(Y) ST $f11, SIZE(Y) SXADDQ INCY, Y, Y#endif subq $5, 1, $5 bgt $5, $SubRemainLoop .align 4$SubEnd: ret EPILOGUE
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -