📄 slice_asm.s
字号:
/* <LIC_AMD_STD> * Copyright (c) 2005 Advanced Micro Devices, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * The full GNU General Public License is included in this distribution in the * file called COPYING * </LIC_AMD_STD> *//* <CTL_AMD_STD> * </CTL_AMD_STD> *//* <DOC_AMD_STD> * </DOC_AMD_STD> */#include <asm/regdef.h>#include <asm/fpregdef.h> .text .align 2 .set noat .set noreorder// a0 == $4 is src// a1 == $5 is dst.global copy_32_aligned_wordscopy_32_aligned_words: lw t0, 0x00(a0) // first miss, no work to do under lw t8, 0x20(a0) // second miss, can do work under // t0 contains 0x0 lw t1, 0x04(a0) // load_rest first miss lw t2, 0x08(a0) // load_rest first miss lw t3, 0x0c(a0) // load_rest first miss lw t4, 0x10(a0) // load_rest first miss lw t5, 0x14(a0) // load_rest first miss lw t6, 0x18(a0) // load_rest first miss lw t7, 0x1c(a0) // load_rest first miss // t0 contains 0x0 sw t0, 0x00(a1) // store first under second miss sw t1, 0x04(a1) // store first under second miss sw t2, 0x08(a1) // store first under second miss sw t3, 0x0c(a1) // store first under second miss sw t4, 0x10(a1) // store first under second miss sw t5, 0x14(a1) // store first under second miss sw t6, 0x18(a1) // store first under second miss sw t7, 0x1c(a1) // store first under second miss lw t0, 0x40(a0) // third miss, can do work under // t8 contains 0x20 lw t1, 0x24(a0) // load_rest second miss under third miss lw t2, 0x28(a0) // load_rest second miss under third miss lw t3, 0x2c(a0) // load_rest second miss under third miss lw t4, 0x30(a0) // load_rest second miss under third miss lw t5, 0x34(a0) // load_rest second miss under third miss lw t6, 0x38(a0) // load_rest second miss under third miss lw t7, 0x3c(a0) // load_rest second miss under third miss // t8 contains 0x20 sw t8, 0x20(a1) // store second under third miss sw t1, 0x24(a1) // store second under third miss sw t2, 0x28(a1) // store second under third miss sw t3, 0x2c(a1) // store second under third miss sw t4, 0x30(a1) // store second under third miss sw t5, 0x34(a1) // store second under third miss sw t6, 0x38(a1) // store second under third miss sw t7, 0x3c(a1) // store second under third miss lw t8, 0x60(a0) // fourth miss, can do work under // t0 contains 0x40 lw t1, 0x44(a0) // load_rest third miss under fourth miss lw t2, 0x48(a0) // load_rest third miss under fourth miss lw t3, 0x4c(a0) // load_rest third miss under fourth miss lw t4, 0x50(a0) // load_rest third miss under fourth miss lw t5, 0x54(a0) // load_rest third miss under fourth miss lw t6, 0x58(a0) // load_rest third miss under fourth miss lw t7, 0x5c(a0) // load_rest third miss under fourth miss // t0 contains 0x40 sw t0, 0x40(a1) // store third under fourth miss sw t1, 0x44(a1) // store third under fourth miss sw t2, 0x48(a1) // store third under fourth miss sw t3, 0x4c(a1) // store third under fourth miss sw t4, 0x50(a1) // store third under fourth miss sw t5, 0x54(a1) // store third under fourth miss sw t6, 0x58(a1) // store third under fourth miss sw t7, 0x5c(a1) // store third under fourth miss // t8 contains 0x60 lw t1, 0x64(a0) // load_rest third miss lw t2, 0x68(a0) // load_rest third miss lw t3, 0x6c(a0) // load_rest third miss lw t4, 0x70(a0) // load_rest third miss lw t5, 0x74(a0) // load_rest third miss lw t6, 0x78(a0) // load_rest third miss lw t7, 0x7c(a0) // load_rest third miss // t8 contains 0x60 sw t8, 0x60(a1) // store third sw t1, 0x64(a1) // store third sw t2, 0x68(a1) // store third sw t3, 0x6c(a1) // store third sw t4, 0x70(a1) // store third sw t5, 0x74(a1) // store third sw t6, 0x78(a1) // store third jr ra sw t7, 0x7c(a1) // store third// a0 == $4 is src.global zero_128b_32azero_128b_32a: sw $0, 0x00(a0) sw $0, 0x04(a0) sw $0, 0x08(a0) sw $0, 0x0c(a0) sw $0, 0x10(a0) sw $0, 0x14(a0) sw $0, 0x18(a0) sw $0, 0x1c(a0) sw $0, 0x20(a0) sw $0, 0x24(a0) sw $0, 0x28(a0) sw $0, 0x2c(a0) sw $0, 0x30(a0) sw $0, 0x34(a0) sw $0, 0x38(a0) sw $0, 0x3c(a0) sw $0, 0x40(a0) sw $0, 0x44(a0) sw $0, 0x48(a0) sw $0, 0x4c(a0) sw $0, 0x50(a0) sw $0, 0x54(a0) sw $0, 0x58(a0) sw $0, 0x5c(a0) sw $0, 0x60(a0) sw $0, 0x64(a0) sw $0, 0x68(a0) sw $0, 0x6c(a0) sw $0, 0x70(a0) sw $0, 0x74(a0) sw $0, 0x78(a0) jr ra sw $0, 0x7c(a0) // a0 == $4 is src// a1 == $5 is dst.global copy_16b_32acopy_16b_32a: lw t0, 0x00(a0) // first miss, no work to do under lw t3, 0x0c(a0) // second miss, can do work under sw t0, 0x00(a1) lw t1, 0x04(a0) lw t2, 0x08(a0) sw t1, 0x04(a1) sw t2, 0x08(a1) jr ra sw t3, 0x0c(a1)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -