⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 copymblock.asm

📁 一个播放器 使用了evc 大家可以参考下 哦
💻 ASM
📖 第 1 页 / 共 2 页
字号:
	TTL	C:\Dokumente und Einstellungen\drops\Eigene Dateien\myprojects\test5\basic_prediction.cpp

	AREA	|.drectve|, DRECTVE
	DCB	"-defaultlib:coredll.lib "
	DCB	"-defaultlib:corelibc.lib "

	EXPORT	|CopyMBlock|		; CopyMBlock
	IMPORT	|MessageBoxW|
	
	AREA	|.pdata|, PDATA
|$T224|	DCD	|CopyMBlock|
	DCD	0x40000300

	AREA	|.text|, CODE

|CopyMBlock| PROC				; CopyMBlock
; File C:\Dokumente und Einstellungen\drops\Eigene Dateien\myprojects\test5\basic_prediction.cpp
; Line 2
|$M222|

; if p1 and p2 divisible by four, move words
	stmdb	sp!, {r4 - r8, lr}  ; stmfd
			
	;; optimization assumptions:
	;;  memory bandwidth is limited
	;;  advance loads as much as possible
	;;  minimize the number of stores (and take advantage of store coalescing on XScale and SA1110)
	
	;; first, preload the the first two source chunks
	;; 0xf5dRf000 is preload register R in armV5 and a noop in armV4 (load never)
	& 0xf5d0f000		; pld[r0] loop 1 src beginning  
	& 0xf5d0f00f		; pld[r0,#f] loop 1 src end
	add	r8,r2,r0	; 
	& 0xf5d8f000		; pld[r8] loop 2 src beginning
	& 0xf5d8f00f		; pld[r8,#f] loop 2 src end
	add	r8,r2,r8	; r8 has pointer to next src line
		
; if p1 and p2 divisible by zero, move words

	ands      r3, r0, #3
	andeqs    r3, r1, #3
 	beq       copy_full_words 
	
 	ands      r3, r0, #1	
 	bne       copy_single_bytes 

 	ands      r3, r1, #1	
  	beq       copy_half_words 
	
copy_single_bytes
	
; ======== loop 1 ======== 
; copy first word

	mov	r7,#14

cbytes_loop		
	& 0xf5d8f000		; pld[r8] loop n+1 src beginning
	& 0xf5d8f00f		; pld[r8,#f] loop n+2 src end
	add	r8,r2,r8	; r8 has pointer to next src line

	ldrb       r3, [r0]
	ldrb       r4, [r0,#1]
	ldrb       r5, [r0,#2]
	ldrb       r6, [r0,#3]

	strb       r3, [r1]
	strb       r4, [r1,#1]
	strb       r5, [r1,#2]
	strb       r6, [r1,#3]

; copy second word

	ldrb       r3, [r0,#4]
	ldrb       r4, [r0,#5]
	ldrb       r5, [r0,#6]
	ldrb       r6, [r0,#7]

	strb       r3, [r1,#4]
	strb       r4, [r1,#5]
	strb       r5, [r1,#6]
	strb       r6, [r1,#7]

; copy third word

	ldrb       r3, [r0,#8]
	ldrb       r4, [r0,#9]
	ldrb       r5, [r0,#10]
	ldrb       r6, [r0,#11]

	strb       r3, [r1,#8]
	strb       r4, [r1,#9]
	strb       r5, [r1,#10]
	strb       r6, [r1,#11]

; copy fourth word

	ldrb       r3, [r0,#12]
	ldrb       r4, [r0,#13]
	ldrb       r5, [r0,#14]
	ldrb       r6, [r0,#15]

	strb       r3, [r1,#12]
	strb       r4, [r1,#13]
	strb       r5, [r1,#14]
	strb       r6, [r1,#15]
	
; add stride 

	add       r0, r0, r2
	add       r1, r1, r2
	
	subs	r7,r7,#1
	bne	cbytes_loop

;;; loop for the first 14 iterations (with preload)
;;; unroll the last two (no preload)

; ======== loop 15 ========
	;; no preload
	
	ldrb       r3, [r0]
	ldrb       r4, [r0,#1]
	ldrb       r5, [r0,#2]
	ldrb       r6, [r0,#3]

	strb       r3, [r1]
	strb       r4, [r1,#1]
	strb       r5, [r1,#2]
	strb       r6, [r1,#3]

; copy second word

	ldrb       r3, [r0,#4]
	ldrb       r4, [r0,#5]
	ldrb       r5, [r0,#6]
	ldrb       r6, [r0,#7]

	strb       r3, [r1,#4]
	strb       r4, [r1,#5]
	strb       r5, [r1,#6]
	strb       r6, [r1,#7]

; copy third word

	ldrb       r3, [r0,#8]
	ldrb       r4, [r0,#9]
	ldrb       r5, [r0,#10]
	ldrb       r6, [r0,#11]

	strb       r3, [r1,#8]
	strb       r4, [r1,#9]
	strb       r5, [r1,#10]
	strb       r6, [r1,#11]

; copy fourth word

	ldrb       r3, [r0,#12]
	ldrb       r4, [r0,#13]
	ldrb       r5, [r0,#14]
	ldrb       r6, [r0,#15]

	strb       r3, [r1,#12]
	strb       r4, [r1,#13]
	strb       r5, [r1,#14]
	strb       r6, [r1,#15]
	
; add stride 

	add       r0, r0, r2
	add       r1, r1, r2

; ======== loop 16 ========
	;; no preload
	
	ldrb       r3, [r0]
	ldrb       r4, [r0,#1]
	ldrb       r5, [r0,#2]
	ldrb       r6, [r0,#3]

	strb       r3, [r1]
	strb       r4, [r1,#1]
	strb       r5, [r1,#2]
	strb       r6, [r1,#3]

; copy second word

	ldrb       r3, [r0,#4]
	ldrb       r4, [r0,#5]
	ldrb       r5, [r0,#6]
	ldrb       r6, [r0,#7]

	strb       r3, [r1,#4]
	strb       r4, [r1,#5]
	strb       r5, [r1,#6]
	strb       r6, [r1,#7]

; copy third word

	ldrb       r3, [r0,#8]
	ldrb       r4, [r0,#9]
	ldrb       r5, [r0,#10]
	ldrb       r6, [r0,#11]

	strb       r3, [r1,#8]
	strb       r4, [r1,#9]
	strb       r5, [r1,#10]
	strb       r6, [r1,#11]

; copy fourth word

	ldrb       r3, [r0,#12]
	ldrb       r4, [r0,#13]
	ldrb       r5, [r0,#14]
	ldrb       r6, [r0,#15]

	strb       r3, [r1,#12]
	strb       r4, [r1,#13]
	strb       r5, [r1,#14]
	strb       r6, [r1,#15]
			
; return
	ldmia     sp!, {r4 - r8, pc}  ; ldmfd

	

copy_half_words

; ======== loop 1 ======== 

	mov	r7,#14

chalf_loop		
	& 0xf5d8f000		; pld[r8] loop n+1 src beginning
	& 0xf5d8f00f		; pld[r8,#f] loop n+2 src end
	add	r8,r2,r8	; r8 has pointer to next src line

; copy word 1-2
	
	ldrh       r3, [r0]
	ldrh       r4, [r0,#2]
	ldrh       r5, [r0,#4]
	ldrh       r6, [r0,#6]

	strh       r3, [r1]
	strh       r4, [r1,#2]
	strh       r5, [r1,#4]
	strh       r6, [r1,#6]

; copy word 3-4

	ldrh       r3, [r0,#8]
	ldrh       r4, [r0,#10]
	ldrh       r5, [r0,#12]
	ldrh       r6, [r0,#14]

	strh       r3, [r1,#8]
	strh       r4, [r1,#10]
	strh       r5, [r1,#12]
	strh       r6, [r1,#14]
	
; add stride 

	add       r0, r0, r2
	add       r1, r1, r2
	
	subs	r7,r7,#1
	bne	chalf_loop

;;; loop for the first 14 iterations (with preload)
;;; unroll the last two (no preload)

; ======== loop 15 ========
	;; no preload
	
; copy word 1-2
	
	ldrh       r3, [r0]
	ldrh       r4, [r0,#2]
	ldrh       r5, [r0,#4]
	ldrh       r6, [r0,#6]

	strh       r3, [r1]
	strh       r4, [r1,#2]
	strh       r5, [r1,#4]
	strh       r6, [r1,#6]

; copy word 3-4

	ldrh       r3, [r0,#8]
	ldrh       r4, [r0,#10]
	ldrh       r5, [r0,#12]
	ldrh       r6, [r0,#14]

	strh       r3, [r1,#8]
	strh       r4, [r1,#10]
	strh       r5, [r1,#12]
	strh       r6, [r1,#14]
	
; add stride 

	add       r0, r0, r2
	add       r1, r1, r2

; ======== loop 16 ========
	;; no preload
	
	
; copy word 1-2
	
	ldrh       r3, [r0]
	ldrh       r4, [r0,#2]
	ldrh       r5, [r0,#4]
	ldrh       r6, [r0,#6]

	strh       r3, [r1]
	strh       r4, [r1,#2]
	strh       r5, [r1,#4]
	strh       r6, [r1,#6]

; copy word 3-4

	ldrh       r3, [r0,#8]
	ldrh       r4, [r0,#10]
	ldrh       r5, [r0,#12]
	ldrh       r6, [r0,#14]

	strh       r3, [r1,#8]
	strh       r4, [r1,#10]
	strh       r5, [r1,#12]
	strh       r6, [r1,#14]
			
; return
	ldmia     sp!, {r4 - r8, pc}  ; ldmfd



;----- copy full words -----

copy_full_words

	;; registers:
	;; r0 src ptr
	;; r1 dst ptr
	;; r2 stride
	;; 
	;; r3-r8 available

	;; set up r8 to contain a src preload address
			
; ======== loop 1 ======== 
; copy 4 words

	;; load the words
	ldr       r3, [r0]
	ldr       r4, [r0,#4]
	ldr       r5, [r0,#8]
	ldr       r6, [r0,#12]

	;; preload the n+2 stride
	& 0xf5d8f000		; pld[r8] loop n+2 src beginning
	& 0xf5d8f00f		; pld[r8,#f] loop n+2 src end
	add	r8,r2,r8	; r8 has pointer to next src line
	
	; add stride to src (for loop n+1)
	add       r0, r0, r2

	str       r3, [r1]
	str       r4, [r1,#4]
	str       r5, [r1,#8]
	str       r6, [r1,#12]

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -