⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 utilsarm.s

📁 Lido PXA270平台开发板的最新BSP,包括源代码
💻 S
字号:
;/* ************************************************************************ *\
;**    INTEL Corporation Proprietary Information
;**
;**    This listing is supplied under the terms of a license
;**    agreement with INTEL Corporation and may not be copied
;**    nor disclosed except in accordance with the terms of
;**    that agreement.
;**
;**    Copyright (c) 2003 Intel Corporation.
;**    All Rights Reserved.
;**
;** ************************************************************************ **
;**	FILE: UtilsARM.s
;**	DESCRIPTION: Optimized Vertex lighting routines. 
;**	
;**	AUTHOR: Cian Montgomery
;**	CREATED: July 31, 2003
;**
;\* ************************************************************************ */
;	INCLUDE HXFState.inc ; Definitions of the HXFState Structure

;** ************************************************************************ **
;**	CONSTANTS
;** ************************************************************************ **

;** ************************************************************************ **
;**	EXPORTS
;** ************************************************************************ **
	EXPORT	|HXFCountVertices|
	EXPORT	|HXFCountVertices_8To16|

;** ************************************************************************ **
;**	VARIABLES
;** ************************************************************************ **

;** ************************************************************************ **
;**	MAROS
;** ************************************************************************ **

;** ************************************************************************ **
;**	FUNCTIONS
;** ************************************************************************ **
	AREA	.text, CODE, READONLY

;** ************************************************************************ **
; Name:	HXFCountVertices_8To16
; Description:	Iterates an array of 8 bit indices, converts them to 16 bit
;		and returns the min and max indices in the array
;		FIXME potential issues if the Src Array is not word aligned 
;		Dst array must be DWORD Aligned and must be an even multiple 
;		of 8 in size
; Register Map:
;  r0 - out_pIndices - Shorts
;  r1 - in_pIndices - Bytes
;  r2 - in_NumIndices
;  r3 - out_pMinIndex
;  r12 -
;  r14 - Return Address
; [sp] = out_pMaxIndex
; Prototype in C:	void HXFCountVertices_8To16(HUINT16* pIndices, 
;							const HUINT8* pSrcIndices, HUINT32 NumIndices,
;								HUINT32* out_pMinIndex, HUINT32* out_pMaxIndex);
;** ************************************************************************ **
|HXFCountVertices_8To16| PROC
	; Test For empty list
	cmp r1, #0
	moveq pc, lr

; FIXME not debugged....
	; handle small sets of indices. (1-3)

	cmp r2, #4
	bge BEGIN_PROCESSING_8To16
	stmfd sp!, {r4-r5}

	ldrb r4, [r1] ; Load first - initialize min and max
	strh r12, [r0]
	mov r5, r4

	cmp r2, #2
	blt SMALL_LIST_DONE
	ldrb r12, [r1, #1] ; load
	strh r12, [r0, #2]

	cmp r12, r4		; -- Min
	movlt r4, r12

	cmp r12, r5		; -- Max
	movgt r5, r12
	
	cmp r2, #3
	blt SMALL_LIST_DONE_8To16
	ldrb r12, [r0, #2]
	strh r12, [r1, #4]

	cmp r12, r4	 ; -- Min
	movlt r4, r12

	cmp r12, r5	 ; -- Max
	movgt r5, r12

SMALL_LIST_DONE_8To16
	str r4, [r3]
	str r5, [sp]
	ldmfd sp!, {r4-r5}
	mov pc, lr

BEGIN_PROCESSING_8To16
	; Start some preloads
	pld [r12, #8]	 ; 2 ahead
	pld [r12, #16]	 ; 4 ahead
	

	tst r1, #0x07 ; - 0b011
	wldrweq wr8, [r0] ; initialize the Min with the first 4 values	
	addeq r1, r1, #4
	subeq r2, r2, #4

	wunpckelubeq wr8, wr8 ; Convert to shorts
	wstrdeq wr8, [r0]		; Store the converted values

	wmov wr9, wr8  ; initialize the Max with the initial state

	cmp r2, #4
	blt END_LOOP_8To16

	; Enter main loop
BEGIN_LOOP_8To16	
	pld [r0, #16]	 ; prefetch 4 ahead
	wldrw wr0, [r1]
	
	; update the loop counter
	sub r2, r2, #4
	add r1, r1, #4
	cmp r2, #4

	wunpckelub wr0, wr0 ; Convert to shorts
	wstrd wr0, [r0]		; Store the converted values

	wmaxuh wr9, wr9, wr0
	wminuh wr8, wr8, wr0

	; Update the output pointer
	add r0, r0, #8

	; loop 
	bge BEGIN_LOOP
END_LOOP_8To16
	cmp r2, #0
	beq PROCESS_MIN_MAX_8To16

	; Process trailing partial dword indices
	; POTENTIAL ENDIAN ISSUE
	; FIXME - will I ever get an access violation in this case. 
 	wldrw wr0, [r1]
	
	wunpckelub wr0, wr0 ; Convert to shorts
	wstrd wr0, [r0]		; Store the converted values
	
	; do some fancy shuffles to get all valid data in to the register
	cmp r1, #3
	wshufheq wr0, wr0, #0xA4	 ; 10, 10, 01, 00
	cmp r1, #2
	wshufheq wr0, wr0, #0x54     ;  01, 01, 01, 00
	cmp r1, #1
	wshufheq wr0, wr0, #0x00     ; 00, 00, 00, 00

	wmaxuh wr9, wr9, wr0
	wminuh wr8, wr8, wr0
	
PROCESS_MIN_MAX_8To16	
	; Find the actual Max  and Min
	mov r12, #32
	tbcstb wr0, r12
	wsrld wr6, wr8, wr0
	wsrld wr7, wr9, wr0
	wminuh wr8, wr8, wr6
	wmaxuh wr9, wr9, wr7

	mov r12, #16
	tbcstb wr0, r12
	wsrld wr6, wr8, wr0
	wsrld wr7, wr9, wr0
	wminuh wr8, wr8, wr6
	wmaxuh wr9, wr9, wr7

	; Store the results. 
	textrmuh r12, wr8, #0 ; pull out the result 
	str r12, [r3]
	textrmuh r12, wr9, #0	; pull out the result 
	str r12, [sp]

	mov pc, lr
	ENDP
;** ************************************************************************ **



;** ************************************************************************ **
; Name:	HXFCountVertices
; Description:	Iterates an array of 16 bit indices and returns the min and 
;		max indices in the array
; Register Map:
;  r0 - in_pIndices
;  r1 - in_NumIndices
;  r2 - out_pMinIndex
;  r3 - out_pMaxIndex
;  r12 -
;  r14 - Return Address
; Prototype in C:	void HXFCountVertices(const HUINT16* pIndices, HUINT32 NumIndices,
;								HUINT32* out_pMinIndex, HUINT32* out_pMaxIndex);
;** ************************************************************************ **
|HXFCountVertices| PROC

	cmp r1, #0
	moveq pc, lr

	; handle small sets of indices. (1-3)
	cmp r1, #4
	bge BEGIN_PROCESSING
	stmfd sp!, {r4-r5}

	ldrh r4, [r0] ; Load first - initialize min and max
	mov r5, r4

	cmp r1, #2
	blt SMALL_LIST_DONE
	ldrh r12, [r0, #2] ; la

	cmp r12, r4		; -- Min
	movlt r4, r12

	cmp r12, r5		; -- Max
	movgt r5, r12

	cmp r1, #3
	blt SMALL_LIST_DONE
	ldrh r12, [r0, #4]

	cmp r12, r4	 ; -- Min
	movlt r4, r12

	cmp r12, r5	 ; -- Max
	movgt r5, r12

SMALL_LIST_DONE
	str r4, [r2]
	str r5, [r3]
	ldmfd sp!, {r4-r5}
	mov pc, lr

BEGIN_PROCESSING
	; Start some preloads
	mvn	r12, #0x07
	and r12, r0, r12 ; Compute dword aligned address
	pld [r12, #16]	 ; 2 ahead
	pld [r12, #24]	 ; 3 ahead
	pld [r12, #32]	 ; 4 ahead

	
	mov r12, #0 
	; Align to dword (8 Bytes)
	; No leading anything
	tst r0, #0x07 ; - 0b111
	wldrdeq wr8, [r0] ; initialize the Min with the first 4 values	
	addeq r0, r0, #8
	subeq r1, r1, #4
	beq 	MAIN_LOOP

	; Check for leading Half word - 0b010
	tst r0, #0x02 ; - 0b010
	wldrhne wr8, [r0] 
	addne r12, r12, #1
	addne r0, r0, #2
	subne r1, r1, #1

	; Check for leading Word
	tst r0, #0x04 ; - 0b100

	wldrwne wr0, [r0]
	addne r12, r12, #2
	addne r0, r0, #4
	subne r1, r1, #2

	; do some fancy shuffles to get all valid data in to the register
	; POTENTIAL ENDIAN ISSUE
	tst r12, #1
	wshufhne wr8, wr8, #0x0	 ; 
	wshufheq wr8, wr0, #0x44 ; word shuffle - dup lower word to upper word	

	tst r12, #2
	wunpckilwne	wr8, wr8, wr0

MAIN_LOOP	
	wmov wr9, wr8  ; initialize the Max with the initial state

	cmp r1, #4
	blt END_LOOP

	; Enter main loop
BEGIN_LOOP	
	pld [r0, #32]	 ; prefetch 4 ahead
	wldrd wr0, [r0]
	
	; update the loop counter
	sub r1, r1, #4
	add r0, r0, #8
	cmp r1, #4

	wmaxuh wr9, wr9, wr0
	wminuh wr8, wr8, wr0

	; loop 
	bge BEGIN_LOOP
END_LOOP
	cmp r1, #0
	beq PROCESS_MIN_MAX	

	; Process trailing partial dword indices
	; POTENTIAL ENDIAN ISSUE
	; FIXME - will I ever get an access violation in this case. 
 	wldrd wr0, [r0]
	
	; do some fancy shuffles to get all valid data in to the register
	cmp r1, #3
	wshufheq wr0, wr0, #0xA4	 ; 10, 10, 01, 00
	cmp r1, #2
	wshufheq wr0, wr0, #0x54     ;  01, 01, 01, 00
	cmp r1, #1
	wshufheq wr0, wr0, #0x00     ; 00, 00, 00, 00

	wmaxuh wr9, wr9, wr0
	wminuh wr8, wr8, wr0
	
PROCESS_MIN_MAX	

	; Find the actual Max  and Min
	mov r12, #32
	tbcstb wr0, r12
	wsrld wr6, wr8, wr0
	wsrld wr7, wr9, wr0
	wminuh wr8, wr8, wr6
	wmaxuh wr9, wr9, wr7

	mov r12, #16
	tbcstb wr0, r12
	wsrld wr6, wr8, wr0
	wsrld wr7, wr9, wr0
	wminuh wr8, wr8, wr6
	wmaxuh wr9, wr9, wr7

	; Store the results. 
	textrmuh r12, wr8, #0 ; pull out the result 
	str r12, [r2]
	textrmuh r12, wr9, #0	; pull out the result 
	str r12, [r3]

	mov pc, lr
	ENDP
;** ************************************************************************ **

	END

;/* ************************************************************************ *\
;** ************************************************************************ **	
;** EOF
;** ************************************************************************ **		
;\* ************************************************************************ */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -