📄 zswap.s
字号:
/*********************************************************************//* *//* Optimized BLAS libraries *//* By Kazushige Goto <kgoto@tacc.utexas.edu> *//* *//* Copyright (c) The University of Texas, 2005. All rights reserved. *//* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING *//* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF *//* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE, *//* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY *//* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF *//* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO *//* THE USE OF THE SOFTWARE OR DOCUMENTATION. *//* Under no circumstances shall University be liable for incidental, *//* special, indirect, direct or consequential damages or loss of *//* profits, interruption of business, or related expenses which may *//* arise from use of Software or Documentation, including but not *//* limited to those resulting from defects in Software and/or *//* Documentation, or loss or inaccuracy of data of any kind. *//*********************************************************************/#define ASSEMBLER#include "common.h"#include "version.h" PROLOGUE PROFCODE .frame $sp, 0, $26, 0 mov $21, $17 ldl $18, 0($sp) ldq $19, 8($sp) ldl $20, 16($sp)#ifndef PROFILE .prologue 0#else .prologue 1#endif ble $16, $SubEnd # if n <= 0 goto $End cmpeq $18, 1, $1 addq $18, $18, $18 cmpeq $20, 1, $2 addq $20, $20, $20 sra $16, 2, $21 and $1, $2, $1 and $16, 3, $22 beq $1, $Sub ble $21, $MainRemain .align 4$MainLoop: LD $f10, 0*SIZE($19) LD $f11, 1*SIZE($19) LD $f12, 2*SIZE($19) LD $f13, 3*SIZE($19) LD $f14, 4*SIZE($19) LD $f15, 5*SIZE($19) LD $f16, 6*SIZE($19) LD $f17, 7*SIZE($19) LD $f20, 0*SIZE($17) LD $f21, 1*SIZE($17) LD $f22, 2*SIZE($17) LD $f23, 3*SIZE($17) LD $f24, 4*SIZE($17) LD $f25, 5*SIZE($17) LD $f26, 6*SIZE($17) LD $f27, 7*SIZE($17) lds $f31, 16*SIZE($17) unop lds $f31, 16*SIZE($19) subl $21, 1, $21 ST $f10, 0*SIZE($17) ST $f11, 1*SIZE($17) ST $f12, 2*SIZE($17) ST $f13, 3*SIZE($17) ST $f14, 4*SIZE($17) ST $f15, 5*SIZE($17) ST $f16, 6*SIZE($17) ST $f17, 7*SIZE($17) ST $f20, 0*SIZE($19) ST $f21, 1*SIZE($19) ST $f22, 2*SIZE($19) ST $f23, 3*SIZE($19) ST $f24, 4*SIZE($19) ST $f25, 5*SIZE($19) ST $f26, 6*SIZE($19) ST $f27, 7*SIZE($19) lda $17, 8*SIZE($17) lda $19, 8*SIZE($19) bgt $21, $MainLoop .align 4$MainRemain: ble $22, $MainEnd .align 4$MainRemainLoop: LD $f10, 0*SIZE($19) LD $f11, 1*SIZE($19) LD $f20, 0*SIZE($17) LD $f21, 1*SIZE($17) lda $17, 2*SIZE($17) lda $19, 2*SIZE($19) subl $22, 1, $22 ST $f10, -2*SIZE($17) ST $f11, -1*SIZE($17) ST $f20, -2*SIZE($19) ST $f21, -1*SIZE($19) bgt $22, $MainRemainLoop .align 4$MainEnd: clr $0 ret .align 4$Sub: mov $17, $23 mov $19, $24 ble $21, $SubRemain .align 4$SubLoop: LD $f10, 0*SIZE($19) LD $f11, 1*SIZE($19) SXADDQ $20, $19, $19 LD $f12, 0*SIZE($19) LD $f13, 1*SIZE($19) SXADDQ $20, $19, $19 LD $f14, 0*SIZE($19) LD $f15, 1*SIZE($19) SXADDQ $20, $19, $19 LD $f16, 0*SIZE($19) LD $f17, 1*SIZE($19) SXADDQ $20, $19, $19 LD $f20, 0*SIZE($17) LD $f21, 1*SIZE($17) SXADDQ $18, $17, $17 LD $f22, 0*SIZE($17) LD $f23, 1*SIZE($17) SXADDQ $18, $17, $17 LD $f24, 0*SIZE($17) LD $f25, 1*SIZE($17) SXADDQ $18, $17, $17 LD $f26, 0*SIZE($17) LD $f27, 1*SIZE($17) SXADDQ $18, $17, $17 ST $f10, 0*SIZE($23) ST $f11, 1*SIZE($23) SXADDQ $18, $23, $23 ST $f12, 0*SIZE($23) ST $f13, 1*SIZE($23) SXADDQ $18, $23, $23 ST $f14, 0*SIZE($23) ST $f15, 1*SIZE($23) SXADDQ $18, $23, $23 ST $f16, 0*SIZE($23) ST $f17, 1*SIZE($23) SXADDQ $18, $23, $23 ST $f20, 0*SIZE($24) ST $f21, 1*SIZE($24) SXADDQ $20, $24, $24 ST $f22, 0*SIZE($24) ST $f23, 1*SIZE($24) SXADDQ $20, $24, $24 ST $f24, 0*SIZE($24) ST $f25, 1*SIZE($24) SXADDQ $20, $24, $24 ST $f26, 0*SIZE($24) ST $f27, 1*SIZE($24) SXADDQ $20, $24, $24 subl $21, 1, $21 bgt $21, $SubLoop .align 4$SubRemain: ble $22, $SubEnd .align 4$SubRemainLoop: LD $f10, 0*SIZE($19) LD $f11, 1*SIZE($19) LD $f20, 0*SIZE($17) LD $f21, 1*SIZE($17) subl $22, 1, $22 ST $f10, 0*SIZE($17) ST $f11, 1*SIZE($17) ST $f20, 0*SIZE($19) ST $f21, 1*SIZE($19) SXADDQ $18, $17, $17 SXADDQ $20, $19, $19 bgt $22, $SubRemainLoop .align 4$SubEnd: clr $0 ret EPILOGUE
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -