📄 utilsarm.s
字号:
;/* ************************************************************************ *\
;** INTEL Corporation Proprietary Information
;**
;** This listing is supplied under the terms of a license
;** agreement with INTEL Corporation and may not be copied
;** nor disclosed except in accordance with the terms of
;** that agreement.
;**
;** Copyright (c) 2003 Intel Corporation.
;** All Rights Reserved.
;**
;** ************************************************************************ **
;** FILE: UtilsARM.s
;** DESCRIPTION: Optimized Vertex lighting routines.
;**
;** AUTHOR: Cian Montgomery
;** CREATED: July 31, 2003
;**
;\* ************************************************************************ */
; INCLUDE HXFState.inc ; Definitions of the HXFState Structure
;** ************************************************************************ **
;** CONSTANTS
;** ************************************************************************ **
;** ************************************************************************ **
;** EXPORTS
;** ************************************************************************ **
EXPORT |HXFCountVertices|
EXPORT |HXFCountVertices_8To16|
;** ************************************************************************ **
;** VARIABLES
;** ************************************************************************ **
;** ************************************************************************ **
;** MAROS
;** ************************************************************************ **
;** ************************************************************************ **
;** FUNCTIONS
;** ************************************************************************ **
AREA .text, CODE, READONLY
;** ************************************************************************ **
; Name: HXFCountVertices_8To16
; Description: Iterates an array of 8 bit indices, converts them to 16 bit
; and returns the min and max indices in the array
; FIXME potential issues if the Src Array is not word aligned
; Dst array must be DWORD Aligned and must be an even multiple
; of 8 in size
; Register Map:
; r0 - out_pIndices - Shorts
; r1 - in_pIndices - Bytes
; r2 - in_NumIndices
; r3 - out_pMinIndex
; r12 -
; r14 - Return Address
; [sp] = out_pMaxIndex
; Prototype in C: void HXFCountVertices_8To16(HUINT16* pIndices,
; const HUINT8* pSrcIndices, HUINT32 NumIndices,
; HUINT32* out_pMinIndex, HUINT32* out_pMaxIndex);
;** ************************************************************************ **
|HXFCountVertices_8To16| PROC
; Test For empty list
cmp r1, #0
moveq pc, lr
; FIXME not debugged....
; handle small sets of indices. (1-3)
cmp r2, #4
bge BEGIN_PROCESSING_8To16
stmfd sp!, {r4-r5}
ldrb r4, [r1] ; Load first - initialize min and max
strh r12, [r0]
mov r5, r4
cmp r2, #2
blt SMALL_LIST_DONE
ldrb r12, [r1, #1] ; load
strh r12, [r0, #2]
cmp r12, r4 ; -- Min
movlt r4, r12
cmp r12, r5 ; -- Max
movgt r5, r12
cmp r2, #3
blt SMALL_LIST_DONE_8To16
ldrb r12, [r0, #2]
strh r12, [r1, #4]
cmp r12, r4 ; -- Min
movlt r4, r12
cmp r12, r5 ; -- Max
movgt r5, r12
SMALL_LIST_DONE_8To16
str r4, [r3]
str r5, [sp]
ldmfd sp!, {r4-r5}
mov pc, lr
BEGIN_PROCESSING_8To16
; Start some preloads
pld [r12, #8] ; 2 ahead
pld [r12, #16] ; 4 ahead
tst r1, #0x07 ; - 0b011
wldrweq wr8, [r0] ; initialize the Min with the first 4 values
addeq r1, r1, #4
subeq r2, r2, #4
wunpckelubeq wr8, wr8 ; Convert to shorts
wstrdeq wr8, [r0] ; Store the converted values
wmov wr9, wr8 ; initialize the Max with the initial state
cmp r2, #4
blt END_LOOP_8To16
; Enter main loop
BEGIN_LOOP_8To16
pld [r0, #16] ; prefetch 4 ahead
wldrw wr0, [r1]
; update the loop counter
sub r2, r2, #4
add r1, r1, #4
cmp r2, #4
wunpckelub wr0, wr0 ; Convert to shorts
wstrd wr0, [r0] ; Store the converted values
wmaxuh wr9, wr9, wr0
wminuh wr8, wr8, wr0
; Update the output pointer
add r0, r0, #8
; loop
bge BEGIN_LOOP
END_LOOP_8To16
cmp r2, #0
beq PROCESS_MIN_MAX_8To16
; Process trailing partial dword indices
; POTENTIAL ENDIAN ISSUE
; FIXME - will I ever get an access violation in this case.
wldrw wr0, [r1]
wunpckelub wr0, wr0 ; Convert to shorts
wstrd wr0, [r0] ; Store the converted values
; do some fancy shuffles to get all valid data in to the register
cmp r1, #3
wshufheq wr0, wr0, #0xA4 ; 10, 10, 01, 00
cmp r1, #2
wshufheq wr0, wr0, #0x54 ; 01, 01, 01, 00
cmp r1, #1
wshufheq wr0, wr0, #0x00 ; 00, 00, 00, 00
wmaxuh wr9, wr9, wr0
wminuh wr8, wr8, wr0
PROCESS_MIN_MAX_8To16
; Find the actual Max and Min
mov r12, #32
tbcstb wr0, r12
wsrld wr6, wr8, wr0
wsrld wr7, wr9, wr0
wminuh wr8, wr8, wr6
wmaxuh wr9, wr9, wr7
mov r12, #16
tbcstb wr0, r12
wsrld wr6, wr8, wr0
wsrld wr7, wr9, wr0
wminuh wr8, wr8, wr6
wmaxuh wr9, wr9, wr7
; Store the results.
textrmuh r12, wr8, #0 ; pull out the result
str r12, [r3]
textrmuh r12, wr9, #0 ; pull out the result
str r12, [sp]
mov pc, lr
ENDP
;** ************************************************************************ **
;** ************************************************************************ **
; Name: HXFCountVertices
; Description: Iterates an array of 16 bit indices and returns the min and
; max indices in the array
; Register Map:
; r0 - in_pIndices
; r1 - in_NumIndices
; r2 - out_pMinIndex
; r3 - out_pMaxIndex
; r12 -
; r14 - Return Address
; Prototype in C: void HXFCountVertices(const HUINT16* pIndices, HUINT32 NumIndices,
; HUINT32* out_pMinIndex, HUINT32* out_pMaxIndex);
;** ************************************************************************ **
|HXFCountVertices| PROC
cmp r1, #0
moveq pc, lr
; handle small sets of indices. (1-3)
cmp r1, #4
bge BEGIN_PROCESSING
stmfd sp!, {r4-r5}
ldrh r4, [r0] ; Load first - initialize min and max
mov r5, r4
cmp r1, #2
blt SMALL_LIST_DONE
ldrh r12, [r0, #2] ; la
cmp r12, r4 ; -- Min
movlt r4, r12
cmp r12, r5 ; -- Max
movgt r5, r12
cmp r1, #3
blt SMALL_LIST_DONE
ldrh r12, [r0, #4]
cmp r12, r4 ; -- Min
movlt r4, r12
cmp r12, r5 ; -- Max
movgt r5, r12
SMALL_LIST_DONE
str r4, [r2]
str r5, [r3]
ldmfd sp!, {r4-r5}
mov pc, lr
BEGIN_PROCESSING
; Start some preloads
mvn r12, #0x07
and r12, r0, r12 ; Compute dword aligned address
pld [r12, #16] ; 2 ahead
pld [r12, #24] ; 3 ahead
pld [r12, #32] ; 4 ahead
mov r12, #0
; Align to dword (8 Bytes)
; No leading anything
tst r0, #0x07 ; - 0b111
wldrdeq wr8, [r0] ; initialize the Min with the first 4 values
addeq r0, r0, #8
subeq r1, r1, #4
beq MAIN_LOOP
; Check for leading Half word - 0b010
tst r0, #0x02 ; - 0b010
wldrhne wr8, [r0]
addne r12, r12, #1
addne r0, r0, #2
subne r1, r1, #1
; Check for leading Word
tst r0, #0x04 ; - 0b100
wldrwne wr0, [r0]
addne r12, r12, #2
addne r0, r0, #4
subne r1, r1, #2
; do some fancy shuffles to get all valid data in to the register
; POTENTIAL ENDIAN ISSUE
tst r12, #1
wshufhne wr8, wr8, #0x0 ;
wshufheq wr8, wr0, #0x44 ; word shuffle - dup lower word to upper word
tst r12, #2
wunpckilwne wr8, wr8, wr0
MAIN_LOOP
wmov wr9, wr8 ; initialize the Max with the initial state
cmp r1, #4
blt END_LOOP
; Enter main loop
BEGIN_LOOP
pld [r0, #32] ; prefetch 4 ahead
wldrd wr0, [r0]
; update the loop counter
sub r1, r1, #4
add r0, r0, #8
cmp r1, #4
wmaxuh wr9, wr9, wr0
wminuh wr8, wr8, wr0
; loop
bge BEGIN_LOOP
END_LOOP
cmp r1, #0
beq PROCESS_MIN_MAX
; Process trailing partial dword indices
; POTENTIAL ENDIAN ISSUE
; FIXME - will I ever get an access violation in this case.
wldrd wr0, [r0]
; do some fancy shuffles to get all valid data in to the register
cmp r1, #3
wshufheq wr0, wr0, #0xA4 ; 10, 10, 01, 00
cmp r1, #2
wshufheq wr0, wr0, #0x54 ; 01, 01, 01, 00
cmp r1, #1
wshufheq wr0, wr0, #0x00 ; 00, 00, 00, 00
wmaxuh wr9, wr9, wr0
wminuh wr8, wr8, wr0
PROCESS_MIN_MAX
; Find the actual Max and Min
mov r12, #32
tbcstb wr0, r12
wsrld wr6, wr8, wr0
wsrld wr7, wr9, wr0
wminuh wr8, wr8, wr6
wmaxuh wr9, wr9, wr7
mov r12, #16
tbcstb wr0, r12
wsrld wr6, wr8, wr0
wsrld wr7, wr9, wr0
wminuh wr8, wr8, wr6
wmaxuh wr9, wr9, wr7
; Store the results.
textrmuh r12, wr8, #0 ; pull out the result
str r12, [r2]
textrmuh r12, wr9, #0 ; pull out the result
str r12, [r3]
mov pc, lr
ENDP
;** ************************************************************************ **
END
;/* ************************************************************************ *\
;** ************************************************************************ **
;** EOF
;** ************************************************************************ **
;\* ************************************************************************ */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -