📄 clear_copy.s
字号:
/* <LIC_AMD_STD>
* Copyright (C) 2003-2005 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Unless otherwise designated in writing, this software and any related
* documentation are the confidential proprietary information of AMD.
* THESE MATERIALS ARE PROVIDED "AS IS" WITHOUT ANY
* UNLESS OTHERWISE NOTED IN WRITING, EXPRESS OR IMPLIED WARRANTY OF ANY
* KIND, INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY,
* NONINFRINGEMENT, TITLE, FITNESS FOR ANY PARTICULAR PURPOSE AND IN NO
* EVENT SHALL AMD OR ITS LICENSORS BE LIABLE FOR ANY DAMAGES WHATSOEVER.
*
* AMD does not assume any responsibility for any errors which may appear
* in the Materials nor any responsibility to support or update the
* Materials. AMD retains the right to modify the Materials at any time,
* without notice, and is not obligated to provide such modified
* Materials to you. AMD is not obligated to furnish, support, or make
* any further information available to you.
* </LIC_AMD_STD> */
/* <CTL_AMD_STD>
* </CTL_AMD_STD> */
/* <DOC_AMD_STD>
* </DOC_AMD_STD> */
#include <regdef.h>
.text
.align 2
.set noat
.set noreorder
.ent copy_32_aligned_words
.globl copy_32_aligned_words
// a0 == $4 is src
// a1 == $5 is dst
copy_32_aligned_words:
lw t0, 0x00(a0) // first miss, no work to do under
lw t8, 0x20(a0) // second miss, can do work under
// t0 contains 0x0
lw t1, 0x04(a0) // load_rest first miss
lw t2, 0x08(a0) // load_rest first miss
lw t3, 0x0c(a0) // load_rest first miss
lw t4, 0x10(a0) // load_rest first miss
lw t5, 0x14(a0) // load_rest first miss
lw t6, 0x18(a0) // load_rest first miss
lw t7, 0x1c(a0) // load_rest first miss
// t0 contains 0x0
sw t0, 0x00(a1) // store first under second miss
sw t1, 0x04(a1) // store first under second miss
sw t2, 0x08(a1) // store first under second miss
sw t3, 0x0c(a1) // store first under second miss
sw t4, 0x10(a1) // store first under second miss
sw t5, 0x14(a1) // store first under second miss
sw t6, 0x18(a1) // store first under second miss
sw t7, 0x1c(a1) // store first under second miss
lw t0, 0x40(a0) // third miss, can do work under
// t8 contains 0x20
lw t1, 0x24(a0) // load_rest second miss under third miss
lw t2, 0x28(a0) // load_rest second miss under third miss
lw t3, 0x2c(a0) // load_rest second miss under third miss
lw t4, 0x30(a0) // load_rest second miss under third miss
lw t5, 0x34(a0) // load_rest second miss under third miss
lw t6, 0x38(a0) // load_rest second miss under third miss
lw t7, 0x3c(a0) // load_rest second miss under third miss
// t8 contains 0x20
sw t8, 0x20(a1) // store second under third miss
sw t1, 0x24(a1) // store second under third miss
sw t2, 0x28(a1) // store second under third miss
sw t3, 0x2c(a1) // store second under third miss
sw t4, 0x30(a1) // store second under third miss
sw t5, 0x34(a1) // store second under third miss
sw t6, 0x38(a1) // store second under third miss
sw t7, 0x3c(a1) // store second under third miss
lw t8, 0x60(a0) // fourth miss, can do work under
// t0 contains 0x40
lw t1, 0x44(a0) // load_rest third miss under fourth miss
lw t2, 0x48(a0) // load_rest third miss under fourth miss
lw t3, 0x4c(a0) // load_rest third miss under fourth miss
lw t4, 0x50(a0) // load_rest third miss under fourth miss
lw t5, 0x54(a0) // load_rest third miss under fourth miss
lw t6, 0x58(a0) // load_rest third miss under fourth miss
lw t7, 0x5c(a0) // load_rest third miss under fourth miss
// t0 contains 0x40
sw t0, 0x40(a1) // store third under fourth miss
sw t1, 0x44(a1) // store third under fourth miss
sw t2, 0x48(a1) // store third under fourth miss
sw t3, 0x4c(a1) // store third under fourth miss
sw t4, 0x50(a1) // store third under fourth miss
sw t5, 0x54(a1) // store third under fourth miss
sw t6, 0x58(a1) // store third under fourth miss
sw t7, 0x5c(a1) // store third under fourth miss
// t8 contains 0x60
lw t1, 0x64(a0) // load_rest third miss
lw t2, 0x68(a0) // load_rest third miss
lw t3, 0x6c(a0) // load_rest third miss
lw t4, 0x70(a0) // load_rest third miss
lw t5, 0x74(a0) // load_rest third miss
lw t6, 0x78(a0) // load_rest third miss
lw t7, 0x7c(a0) // load_rest third miss
// t8 contains 0x60
sw t8, 0x60(a1) // store third
sw t1, 0x64(a1) // store third
sw t2, 0x68(a1) // store third
sw t3, 0x6c(a1) // store third
sw t4, 0x70(a1) // store third
sw t5, 0x74(a1) // store third
sw t6, 0x78(a1) // store third
jr ra
sw t7, 0x7c(a1) // store third
.end
// a0 == $4 is src
.ent zero_128b_32a
.globl zero_128b_32a
zero_128b_32a:
sw $0, 0x00(a0)
sw $0, 0x04(a0)
sw $0, 0x08(a0)
sw $0, 0x0c(a0)
sw $0, 0x10(a0)
sw $0, 0x14(a0)
sw $0, 0x18(a0)
sw $0, 0x1c(a0)
sw $0, 0x20(a0)
sw $0, 0x24(a0)
sw $0, 0x28(a0)
sw $0, 0x2c(a0)
sw $0, 0x30(a0)
sw $0, 0x34(a0)
sw $0, 0x38(a0)
sw $0, 0x3c(a0)
sw $0, 0x40(a0)
sw $0, 0x44(a0)
sw $0, 0x48(a0)
sw $0, 0x4c(a0)
sw $0, 0x50(a0)
sw $0, 0x54(a0)
sw $0, 0x58(a0)
sw $0, 0x5c(a0)
sw $0, 0x60(a0)
sw $0, 0x64(a0)
sw $0, 0x68(a0)
sw $0, 0x6c(a0)
sw $0, 0x70(a0)
sw $0, 0x74(a0)
sw $0, 0x78(a0)
jr ra
sw $0, 0x7c(a0)
.end
// a0 == $4 is src
.ent zero_256b_32a
.globl zero_256b_32a
zero_256b_32a:
sw $0, 0x00(a0)
sw $0, 0x04(a0)
sw $0, 0x08(a0)
sw $0, 0x0c(a0)
sw $0, 0x10(a0)
sw $0, 0x14(a0)
sw $0, 0x18(a0)
sw $0, 0x1c(a0)
sw $0, 0x20(a0)
sw $0, 0x24(a0)
sw $0, 0x28(a0)
sw $0, 0x2c(a0)
sw $0, 0x30(a0)
sw $0, 0x34(a0)
sw $0, 0x38(a0)
sw $0, 0x3c(a0)
sw $0, 0x40(a0)
sw $0, 0x44(a0)
sw $0, 0x48(a0)
sw $0, 0x4c(a0)
sw $0, 0x50(a0)
sw $0, 0x54(a0)
sw $0, 0x58(a0)
sw $0, 0x5c(a0)
sw $0, 0x60(a0)
sw $0, 0x64(a0)
sw $0, 0x68(a0)
sw $0, 0x6c(a0)
sw $0, 0x70(a0)
sw $0, 0x74(a0)
sw $0, 0x78(a0)
sw $0, 0x7c(a0)
sw $0, 0x80(a0)
sw $0, 0x84(a0)
sw $0, 0x88(a0)
sw $0, 0x8c(a0)
sw $0, 0x90(a0)
sw $0, 0x94(a0)
sw $0, 0x98(a0)
sw $0, 0x9c(a0)
sw $0, 0xa0(a0)
sw $0, 0xa4(a0)
sw $0, 0xa8(a0)
sw $0, 0xac(a0)
sw $0, 0xb0(a0)
sw $0, 0xb4(a0)
sw $0, 0xb8(a0)
sw $0, 0xbc(a0)
sw $0, 0xc0(a0)
sw $0, 0xc4(a0)
sw $0, 0xc8(a0)
sw $0, 0xcc(a0)
sw $0, 0xd0(a0)
sw $0, 0xd4(a0)
sw $0, 0xd8(a0)
sw $0, 0xdc(a0)
sw $0, 0xe0(a0)
sw $0, 0xe4(a0)
sw $0, 0xe8(a0)
sw $0, 0xec(a0)
sw $0, 0xf0(a0)
sw $0, 0xf4(a0)
sw $0, 0xf8(a0)
jr ra
sw $0, 0xfc(a0)
.end
// a0 == $4 is src
// a1 == $5 is dst
.ent copy_16b_32a
.globl copy_16b_32a
copy_16b_32a:
lw t0, 0x00(a0) // first miss, no work to do under
lw t3, 0x0c(a0) // second miss, can do work under
sw t0, 0x00(a1)
lw t1, 0x04(a0)
lw t2, 0x08(a0)
sw t1, 0x04(a1)
sw t2, 0x08(a1)
jr ra
sw t3, 0x0c(a1)
.end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -