📄 clear_copy.s
字号:
/* <LIC_AMD_STD> * Copyright (C) 2003-2005 Advanced Micro Devices, Inc. All Rights Reserved. * * Unless otherwise designated in writing, this software and any related * documentation are the confidential proprietary information of AMD. * THESE MATERIALS ARE PROVIDED "AS IS" WITHOUT ANY * UNLESS OTHERWISE NOTED IN WRITING, EXPRESS OR IMPLIED WARRANTY OF ANY * KIND, INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, * NONINFRINGEMENT, TITLE, FITNESS FOR ANY PARTICULAR PURPOSE AND IN NO * EVENT SHALL AMD OR ITS LICENSORS BE LIABLE FOR ANY DAMAGES WHATSOEVER. * * AMD does not assume any responsibility for any errors which may appear * in the Materials nor any responsibility to support or update the * Materials. AMD retains the right to modify the Materials at any time, * without notice, and is not obligated to provide such modified * Materials to you. AMD is not obligated to furnish, support, or make * any further information available to you. * </LIC_AMD_STD> *//* <CTL_AMD_STD> * </CTL_AMD_STD> *//* <DOC_AMD_STD> * </DOC_AMD_STD> */#include <asm/regdef.h>#include <asm/fpregdef.h> .text .align 2 .set noat .set noreorder// a0 == $4 is src// a1 == $5 is dst.global copy_32_aligned_wordscopy_32_aligned_words: lw t0, 0x00(a0) // first miss, no work to do under lw t8, 0x20(a0) // second miss, can do work under // t0 contains 0x0 lw t1, 0x04(a0) // load_rest first miss lw t2, 0x08(a0) // load_rest first miss lw t3, 0x0c(a0) // load_rest first miss lw t4, 0x10(a0) // load_rest first miss lw t5, 0x14(a0) // load_rest first miss lw t6, 0x18(a0) // load_rest first miss lw t7, 0x1c(a0) // load_rest first miss // t0 contains 0x0 sw t0, 0x00(a1) // store first under second miss sw t1, 0x04(a1) // store first under second miss sw t2, 0x08(a1) // store first under second miss sw t3, 0x0c(a1) // store first under second miss sw t4, 0x10(a1) // store first under second miss sw t5, 0x14(a1) // store first under second miss sw t6, 0x18(a1) // store first under second miss sw t7, 0x1c(a1) // store first under second miss lw t0, 0x40(a0) // third miss, can do work under // t8 contains 0x20 lw t1, 0x24(a0) // load_rest second miss under third miss lw t2, 0x28(a0) // load_rest second miss under third miss lw t3, 0x2c(a0) // load_rest second miss under third miss lw t4, 0x30(a0) // load_rest second miss under third miss lw t5, 0x34(a0) // load_rest second miss under third miss lw t6, 0x38(a0) // load_rest second miss under third miss lw t7, 0x3c(a0) // load_rest second miss under third miss // t8 contains 0x20 sw t8, 0x20(a1) // store second under third miss sw t1, 0x24(a1) // store second under third miss sw t2, 0x28(a1) // store second under third miss sw t3, 0x2c(a1) // store second under third miss sw t4, 0x30(a1) // store second under third miss sw t5, 0x34(a1) // store second under third miss sw t6, 0x38(a1) // store second under third miss sw t7, 0x3c(a1) // store second under third miss lw t8, 0x60(a0) // fourth miss, can do work under // t0 contains 0x40 lw t1, 0x44(a0) // load_rest third miss under fourth miss lw t2, 0x48(a0) // load_rest third miss under fourth miss lw t3, 0x4c(a0) // load_rest third miss under fourth miss lw t4, 0x50(a0) // load_rest third miss under fourth miss lw t5, 0x54(a0) // load_rest third miss under fourth miss lw t6, 0x58(a0) // load_rest third miss under fourth miss lw t7, 0x5c(a0) // load_rest third miss under fourth miss // t0 contains 0x40 sw t0, 0x40(a1) // store third under fourth miss sw t1, 0x44(a1) // store third under fourth miss sw t2, 0x48(a1) // store third under fourth miss sw t3, 0x4c(a1) // store third under fourth miss sw t4, 0x50(a1) // store third under fourth miss sw t5, 0x54(a1) // store third under fourth miss sw t6, 0x58(a1) // store third under fourth miss sw t7, 0x5c(a1) // store third under fourth miss // t8 contains 0x60 lw t1, 0x64(a0) // load_rest third miss lw t2, 0x68(a0) // load_rest third miss lw t3, 0x6c(a0) // load_rest third miss lw t4, 0x70(a0) // load_rest third miss lw t5, 0x74(a0) // load_rest third miss lw t6, 0x78(a0) // load_rest third miss lw t7, 0x7c(a0) // load_rest third miss // t8 contains 0x60 sw t8, 0x60(a1) // store third sw t1, 0x64(a1) // store third sw t2, 0x68(a1) // store third sw t3, 0x6c(a1) // store third sw t4, 0x70(a1) // store third sw t5, 0x74(a1) // store third sw t6, 0x78(a1) // store third jr ra sw t7, 0x7c(a1) // store third// a0 == $4 is src.global zero_128b_32azero_128b_32a: sw $0, 0x00(a0) sw $0, 0x04(a0) sw $0, 0x08(a0) sw $0, 0x0c(a0) sw $0, 0x10(a0) sw $0, 0x14(a0) sw $0, 0x18(a0) sw $0, 0x1c(a0) sw $0, 0x20(a0) sw $0, 0x24(a0) sw $0, 0x28(a0) sw $0, 0x2c(a0) sw $0, 0x30(a0) sw $0, 0x34(a0) sw $0, 0x38(a0) sw $0, 0x3c(a0) sw $0, 0x40(a0) sw $0, 0x44(a0) sw $0, 0x48(a0) sw $0, 0x4c(a0) sw $0, 0x50(a0) sw $0, 0x54(a0) sw $0, 0x58(a0) sw $0, 0x5c(a0) sw $0, 0x60(a0) sw $0, 0x64(a0) sw $0, 0x68(a0) sw $0, 0x6c(a0) sw $0, 0x70(a0) sw $0, 0x74(a0) sw $0, 0x78(a0) jr ra sw $0, 0x7c(a0)// a0 == $4 is src.global zero_256b_32azero_256b_32a: sw $0, 0x00(a0) sw $0, 0x04(a0) sw $0, 0x08(a0) sw $0, 0x0c(a0) sw $0, 0x10(a0) sw $0, 0x14(a0) sw $0, 0x18(a0) sw $0, 0x1c(a0) sw $0, 0x20(a0) sw $0, 0x24(a0) sw $0, 0x28(a0) sw $0, 0x2c(a0) sw $0, 0x30(a0) sw $0, 0x34(a0) sw $0, 0x38(a0) sw $0, 0x3c(a0) sw $0, 0x40(a0) sw $0, 0x44(a0) sw $0, 0x48(a0) sw $0, 0x4c(a0) sw $0, 0x50(a0) sw $0, 0x54(a0) sw $0, 0x58(a0) sw $0, 0x5c(a0) sw $0, 0x60(a0) sw $0, 0x64(a0) sw $0, 0x68(a0) sw $0, 0x6c(a0) sw $0, 0x70(a0) sw $0, 0x74(a0) sw $0, 0x78(a0) sw $0, 0x7c(a0) sw $0, 0x80(a0) sw $0, 0x84(a0) sw $0, 0x88(a0) sw $0, 0x8c(a0) sw $0, 0x90(a0) sw $0, 0x94(a0) sw $0, 0x98(a0) sw $0, 0x9c(a0) sw $0, 0xa0(a0) sw $0, 0xa4(a0) sw $0, 0xa8(a0) sw $0, 0xac(a0) sw $0, 0xb0(a0) sw $0, 0xb4(a0) sw $0, 0xb8(a0) sw $0, 0xbc(a0) sw $0, 0xc0(a0) sw $0, 0xc4(a0) sw $0, 0xc8(a0) sw $0, 0xcc(a0) sw $0, 0xd0(a0) sw $0, 0xd4(a0) sw $0, 0xd8(a0) sw $0, 0xdc(a0) sw $0, 0xe0(a0) sw $0, 0xe4(a0) sw $0, 0xe8(a0) sw $0, 0xec(a0) sw $0, 0xf0(a0) sw $0, 0xf4(a0) sw $0, 0xf8(a0) jr ra sw $0, 0xfc(a0)// a0 == $4 is src// a1 == $5 is dst.global copy_16b_32acopy_16b_32a: lw t0, 0x00(a0) // first miss, no work to do under lw t3, 0x0c(a0) // second miss, can do work under sw t0, 0x00(a1) lw t1, 0x04(a0) lw t2, 0x08(a0) sw t1, 0x04(a1) sw t2, 0x08(a1) jr ra sw t3, 0x0c(a1)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -