📄 copyblock.asm
字号:
TTL C:\Dokumente und Einstellungen\drops\Eigene Dateien\myprojects\test5\basic_prediction.cpp
AREA |.drectve|, DRECTVE
DCB "-defaultlib:coredll.lib "
DCB "-defaultlib:corelibc.lib "
EXPORT |CopyBlock| ; CopyBlock
AREA |.pdata|, PDATA
|$T224| DCD |CopyBlock|
DCD 0x40000300
AREA |.text|, CODE
|CopyBlock| PROC ; CopyBlock
; File C:\Dokumente und Einstellungen\drops\Eigene Dateien\myprojects\test5\basic_prediction.cpp
; Line 2
|$M222|
stmdb sp!, {r4 - r8, lr} ; stmfd
;; first, preload the the first two source chunks
;; 0xf5dRf000 is preload register R in armV5 and a noop in armV4 (load never)
& 0xf5d0f000 ; pld[r0] loop 1 src beginning
& 0xf5d0f007 ; pld[r0,#7] loop 1 src end
add r8,r2,r0 ;
& 0xf5d8f000 ; pld[r8] loop 2 src beginning
& 0xf5d8f007 ; pld[r8,#7] loop 2 src end
add r8,r2,r8 ; r8 has pointer to next src line
; if p1 and p2 divisible by zero, move words
ands r3, r0, #3
andeqs r3, r1, #3
beq copy_full_words
ands r3, r0, #1
bne copy_single_bytes
ands r3, r1, #1
beq copy_half_words
copy_single_bytes
; ======== loop 1 ========
;; loop counter, loop 6 times, unroll last two
mov r7,#6
cbytes_loop
;;; preload n+2
& 0xf5d8f000 ; pld[r8] loop 2 src beginning
& 0xf5d8f007 ; pld[r8,#7] loop 2 src end
add r8,r2,r8 ; r8 has pointer to next src line
; copy first word
ldrb r3, [r0]
ldrb r4, [r0,#1]
ldrb r5, [r0,#2]
ldrb r6, [r0,#3]
strb r3, [r1]
strb r4, [r1,#1]
strb r5, [r1,#2]
strb r6, [r1,#3]
; copy second word
ldrb r3, [r0,#4]
ldrb r4, [r0,#5]
ldrb r5, [r0,#6]
ldrb r6, [r0,#7]
strb r3, [r1,#4]
strb r4, [r1,#5]
strb r5, [r1,#6]
strb r6, [r1,#7]
; add stride
add r0, r0, r2
add r1, r1, r2
subs r7,r7,#1
bne cbytes_loop
;;; loop for the first 14 iterations (with preload)
;;; unroll the last two (no preload)
; ======== loop 7 ========
;; no preload
ldrb r3, [r0]
ldrb r4, [r0,#1]
ldrb r5, [r0,#2]
ldrb r6, [r0,#3]
strb r3, [r1]
strb r4, [r1,#1]
strb r5, [r1,#2]
strb r6, [r1,#3]
; copy second word
ldrb r3, [r0,#4]
ldrb r4, [r0,#5]
ldrb r5, [r0,#6]
ldrb r6, [r0,#7]
strb r3, [r1,#4]
strb r4, [r1,#5]
strb r5, [r1,#6]
strb r6, [r1,#7]
; add stride
add r0, r0, r2
add r1, r1, r2
; ======== loop 8 ========
;; no preload
ldrb r3, [r0]
ldrb r4, [r0,#1]
ldrb r5, [r0,#2]
ldrb r6, [r0,#3]
strb r3, [r1]
strb r4, [r1,#1]
strb r5, [r1,#2]
strb r6, [r1,#3]
; copy second word
ldrb r3, [r0,#4]
ldrb r4, [r0,#5]
ldrb r5, [r0,#6]
ldrb r6, [r0,#7]
strb r3, [r1,#4]
strb r4, [r1,#5]
strb r5, [r1,#6]
strb r6, [r1,#7]
; return
ldmia sp!, {r4 - r8, pc} ; ldmfd
;;; HALF WORD COPY
copy_half_words
; ======== loop 1 ========
;; loop counter; loop 6 times, unroll last two
mov r7,#6
chalf_loop
;;; preload n+2
& 0xf5d8f000 ; pld[r8] loop 2 src beginning
& 0xf5d8f007 ; pld[r8,#7] loop 2 src end
add r8,r2,r8 ; r8 has pointer to next src line
;;; copy two words
ldrh r3, [r0]
ldrh r4, [r0,#2]
ldrh r5, [r0,#4]
ldrh r6, [r0,#6]
strh r3, [r1]
strh r4, [r1,#2]
strh r5, [r1,#4]
strh r6, [r1,#6]
; add stride
add r0, r0, r2
add r1, r1, r2
;; check count, loop back
subs r7,r7,#1
bne chalf_loop
;;; loop for the first 6 iterations (with preload)
;;; unroll the last two (no preload)
; ======== loop 7 ========
;; no preload
;;; copy two words
ldrh r3, [r0]
ldrh r4, [r0,#2]
ldrh r5, [r0,#4]
ldrh r6, [r0,#6]
strh r3, [r1]
strh r4, [r1,#2]
strh r5, [r1,#4]
strh r6, [r1,#6]
; add stride
add r0, r0, r2
add r1, r1, r2
; ======== loop 8 ========
;; no preload
;;; copy two words
ldrh r3, [r0]
ldrh r4, [r0,#2]
ldrh r5, [r0,#4]
ldrh r6, [r0,#6]
strh r3, [r1]
strh r4, [r1,#2]
strh r5, [r1,#4]
strh r6, [r1,#6]
; return
ldmia sp!, {r4 - r8, pc} ; ldmfd
;----- copy full words -----
copy_full_words
;; loop counter; loop 6 times, unroll last two
mov r7,#6
cword_loop
;;; preload n+2
& 0xf5d8f000 ; pld[r8] loop 2 src beginning
& 0xf5d8f007 ; pld[r8,#7] loop 2 src end
add r8,r2,r8 ; r8 has pointer to next src line
;;; copy two words
ldr r3, [r0]
ldr r4, [r0,#4]
str r3, [r1]
str r4, [r1,#4]
; add stride
add r0, r0, r2
add r1, r1, r2
;; check count, loop back
subs r7,r7,#1
bne cword_loop
; ======== loop 7 ========
; copy first word
ldr r3, [r0]
ldr r4, [r0,#4]
str r3, [r1]
str r4, [r1,#4]
; add stride
add r0, r0, r2
add r1, r1, r2
; ======== loop 8 ========
; copy first word
ldr r3, [r0]
ldr r4, [r0,#4]
str r3, [r1]
str r4, [r1,#4]
; add stride not necessary for last loop
; return
ldmia sp!, {r4 - r8, pc} ; ldmfd
|$M223|
ENDP ; |CopyBlock|, CopyBlock
END
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -