📄 memcopy_macro.inc
字号:
;
; Copyright (c) Microsoft Corporation. All rights reserved.
;
;
; Use of this source code is subject to the terms of the Microsoft end-user
; license agreement (EULA) under which you licensed this SOFTWARE PRODUCT.
; If you did not accept the terms of the EULA, you are not authorized to use
; this source code. For a copy of the EULA, please see the LICENSE.RTF on your
; install media.
;
;/*
;** INTEL CONFIDENTIAL
;** Copyright 2000-2003 Intel Corporation All Rights Reserved.
;**
;** The source code contained or described herein and all documents
;** related to the source code (Material) are owned by Intel Corporation
;** or its suppliers or licensors. Title to the Material remains with
;** Intel Corporation or its suppliers and licensors. The Material contains
;** trade secrets and proprietary and confidential information of Intel
;** or its suppliers and licensors. The Material is protected by worldwide
;** copyright and trade secret laws and treaty provisions. No part of the
;** Material may be used, copied, reproduced, modified, published, uploaded,
;** posted, transmitted, distributed, or disclosed in any way without Intel抯
;** prior express written permission.
;
;** No license under any patent, copyright, trade secret or other intellectual
;** property right is granted to or conferred upon you by disclosure or
;** delivery of the Materials, either expressly, by implication, inducement,
;** estoppel or otherwise. Any license under such intellectual property rights
;** must be express and approved by Intel in writing.
;*/
IF :LNOT: :DEF: __MEMCOPY_MACRO_INC_
GBLL __MEMCOPY_MACRO_INC_
; len >= 16, use this method to avoid much pipeline stalls
MACRO
$mmbtl MEMCOPYMBTS_LG16 $pSrc, $pDst, $len, $inc0, $inc1, $tReg, $dReg0, $dReg1, $dReg2, $dReg3, $ldrop, $strop
; loop unrooling at 8 now
movs $tReg, $len, lsr #3 ; $len --> width
beq $mmbtl.COPY_IN_16BIT_HEIGHT_LOOP_B8_EXIT
$mmbtl.COPY_IN_16BIT_HEIGHT_LOOP_B8
$ldrop $dReg0, [$pSrc], $inc0
$ldrop $dReg1, [$pSrc], $inc0
$ldrop $dReg2, [$pSrc], $inc0
$ldrop $dReg3, [$pSrc], $inc0
subs $tReg, $tReg, #1
$strop $dReg0, [$pDst], $inc1
$strop $dReg1, [$pDst], $inc1
$strop $dReg2, [$pDst], $inc1
$strop $dReg3, [$pDst], $inc1
$ldrop $dReg0, [$pSrc], $inc0
$ldrop $dReg1, [$pSrc], $inc0
$ldrop $dReg2, [$pSrc], $inc0
$ldrop $dReg3, [$pSrc], $inc0
$strop $dReg0, [$pDst], $inc1
$strop $dReg1, [$pDst], $inc1
$strop $dReg2, [$pDst], $inc1
$strop $dReg3, [$pDst], $inc1
bgt $mmbtl.COPY_IN_16BIT_HEIGHT_LOOP_B8
$mmbtl.COPY_IN_16BIT_HEIGHT_LOOP_B8_EXIT
ands $tReg, $len, #7
beq $mmbtl.COPY_IN_16BIT_HEIGHT_LOOP_EXIT
$mmbtl.COPY_IN_16BIT_HEIGHT_LOOP_B1
$ldrop $dReg0, [$pSrc], $inc0
subs $tReg, $tReg, #1
$strop $dReg0, [$pDst], $inc1
bgt $mmbtl.COPY_IN_16BIT_HEIGHT_LOOP_B1
$mmbtl.COPY_IN_16BIT_HEIGHT_LOOP_EXIT
MEND
; len < 16, use this method to avoid much overheader: less register required, less branch
; input:
; $pSrc ->reg pointer to the source buffer
; $pDst ->reg pointer to the destination buffer
; $len ->reg buffer length
; $inc0 ->reg or immediate data(8bit?), address increasement offset for pSrc
; $inc1 ->reg or immediate data(8bit?), address increasement offset for pDst
; $ldrop ->load operation, can be ldr, ldrh, ldrb
; $strop ->store operation, can be str, strh, strb
; work registers
; $dReg0 -> data reg0
; $dReg1 -> data reg1, must use different register from $dReg0
; use 5 registers
MACRO
$mmbts MEMCOPYMBTS_LT16 $pSrc, $pDst, $len, $inc0, $inc1, $dReg0, $dReg1, $ldrop, $strop, $ldrop2, $strop2
; loop unrooling at 8 now
cmp $len, #0
ble $mmbts.COPY_IN_16BIT_HEIGHT_LOOP_B2_EXIT
tst $len, #1
$ldrop2 $dReg0, [$pSrc], $inc0
$strop2 $dReg0, [$pDst], $inc1
movs $len, $len, lsr #1 ; $len --> width
beq $mmbts.COPY_IN_16BIT_HEIGHT_LOOP_B2_EXIT
$mmbts.COPY_IN_16BIT_HEIGHT_LOOP_B2
$ldrop $dReg0, [$pSrc], $inc0
$ldrop $dReg1, [$pSrc], $inc0
subs $len, $len, #1
$strop $dReg0, [$pDst], $inc1
$strop $dReg1, [$pDst], $inc1
bgt $mmbts.COPY_IN_16BIT_HEIGHT_LOOP_B2
$mmbts.COPY_IN_16BIT_HEIGHT_LOOP_B2_EXIT
MEND
ENDIF
END
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -