📄 xllp_wmmx_regs.s
字号:
; * Xllp_Store_All_WMMX_Regs *
; * *
; *******************************
;
;
;
; Stores all WMMX Registers as listed below
; Saving:
; CP0, R0 - 15
; CP1, R2, R3, R8 - 11
;
; NOTE: This routine was written to optimize both the read performance and
; the register usage. It was designed against the EAS stated instruction
; latencies for tmrc and tmrrc instructions in hopes of keeping the
; connection between the coprocessor and core maximized with respect to bandwitdh.
; As such, the registes are not saved in a completely linear fashion, they are
; interleaved. As a result, the restore must treat these like a stack and pop
; off the registers in a similar order.
;
;******************************************************************************
Xllp_Store_All_WMMX_Regs FUNCTION
stmdb sp!, {r0 - r11, r14} ; Store registers to the stack so we don't munge anything
;Save pointer comes in R0, move to R10
mov r10, r0
tmrc r0, wC2 ;CP1, Reg2 -> Core, R0
tmrc r1, wC3 ;CP1, Reg3 -> Core, R1
tmrrc r2, r3, wR0 ;CP0, Reg0 -> Core, R2 (Lo) R3 (High)
;Now store to location referenced by R10, post-increment by 4 bytes
str r0, [r10],#4
str r1, [r10],#4
tmrrc r4, r5, wR1
str r2, [r10], #4 ;Now store wR0
str r3, [r10], #4
tmrrc r0, r1, wR2
str r4, [r10], #4 ;Store wR1
str r5, [r10], #4
tmrc r2, wC8 ;CP1, Reg8 -> Core, R2
tmrrc r6, r7, wR3
str r0, [r10], #4 ;Store wR2
str r1, [r10], #4
tmrrc r4, r5, wR4
str r2, [r10], #4 ;Store wC3
tmrrc r8, r9, wR5
str r6, [r10], #4 ;Store wR3
str r7, [r10], #4
tmrrc r2, r3, wR6
str r4, [r10], #4 ;Store wR4
str r5, [r10], #4
tmrrc r0, r1, wR7
str r8, [r10], #4 ;Store wR5
str r9, [r10], #4
tmrrc r6, r7, wR8
str r2, [r10], #4 ;Store wR6
str r3, [r10], #4
tmrrc r4, r5, wR9
str r0, [r10], #4 ;Store wR7
str r1, [r10], #4
tmrrc r8, r9, wR10
str r6, [r10], #4 ;Store wR8
str r7, [r10], #4
tmrrc r2, r3, wR11
str r4, [r10], #4 ;Store wR9
str r5, [r10], #4
tmrrc r0, r1, wR12
str r8, [r10], #4 ;Store wR10
str r9, [r10], #4
tmrrc r6, r7, wR13
str r2, [r10], #4 ;Store wR11
str r3, [r10], #4
tmrrc r4, r5, wR14
str r0, [r10], #4 ;Store wR12
str r1, [r10], #4
tmrrc r8, r9, wR15
str r6, [r10], #4 ;Store wR13
str r7, [r10], #4
tmrc r2, wC9
str r4, [r10], #4 ;Store wR14
str r5, [r10], #4
tmrc r0, wC10
str r8, [r10], #4 ;Store wR15
str r9, [r10], #4
tmrc r1, wC11
str r2, [r10], #4 ;Store wC9
str r0, [r10], #4 ;Store wC10
str r1, [r10], #4 ;Store wC11
; Now clear the control MUP & CUP bits (Control Update Bits)
; These are WRITE 1 TO CLEAR!
mov r3, #0x3 ;Set the 2 lowest bits == 1
tmcr wC1, r3 ;Now Clear the CUP & MUP bits
;CPWAIT r0
MRC P15, 0, r0, C2, C0, 0 ; arbitrary read of CP15
MOV r2, r0 ; wait for it (foward dependency)
SUB PC, PC, #4 ; branch to next instruction
ldmia sp!, {r0 - r11, r14} ;Now restore the regsiters we stacked
IF Interworking :LOR: Thumbing
bx lr
ELSE
mov pc, lr ; return
ENDIF ; IF Interworking :LOR: Thumbing
ENDFUNC
;******************************************************************************
;
; *******************************
; * *
; * Xllp_Restore_All_WMMX_Regs *
; * *
; *******************************
;
;
;
; Restores all WMMX Registers saved by the above Store function
; Restoring:
; CP0, R0 - 15
; CP1, R2, R3, R8 - 11
;
; NOTE: This routine was written to optimize both the read performance and
; the register usage. It was designed against the EAS stated instruction
; latencies for tmcr and tmcrr instructions in hopes of keeping the
; connection between the coprocessor and core maximized with respect to bandwitdh.
; As such, the registes are not saved in a completely linear fashion, they are
; interleaved. Due to the order saved, the restore order is also not sequential.
;
;******************************************************************************
Xllp_Restore_All_WMMX_Regs FUNCTION
stmdb sp!, {r0 - r11, r14} ;Store registers to the stack so we don't munge anything
;Save pointer comes in R0, move to R10
mov r10, r0
ldr r0, [r10], #4 ;Load wC2
ldr r1, [r10], #4 ;Load wC3
ldr r2, [r10], #4 ;Load wR0
ldr r3, [r10], #4
tmcr wC2, r0
tmcr wC3, r1
ldr r0, [r10], #4 ;Load wR1
ldr r1, [r10], #4
tmcrr wR0, r2, r3
ldr r2, [r10], #4 ;Load wR2
ldr r3, [r10], #4
tmcrr wR1, r0, r1
ldr r0, [r10], #4 ;Load wC8
ldr r4, [r10], #4 ;Load wR3
ldr r5, [r10], #4
tmcrr wR2, r2, r3
ldr r6, [r10], #4 ;Load wR4
ldr r7, [r10], #4
tmcr wC8, r0
ldr r0, [r10], #4 ;Load wR5
ldr r1, [r10], #4
tmcrr wR3, r4, r5
ldr r2, [r10], #4 ;Load wR6
ldr r3, [r10], #4
tmcrr wR4, r6, r7
ldr r4, [r10], #4 ;Load wR7
ldr r5, [r10], #4
tmcrr wR5, r0, r1
ldr r0, [r10], #4 ;Load wR8
ldr r1, [r10], #4
tmcrr wR6, r2, r3
ldr r2, [r10], #4 ;Load wR9
ldr r3, [r10], #4
tmcrr wR7, r4, r5
ldr r4, [r10], #4 ;Load wR10
ldr r5, [r10], #4
tmcrr wR8, r0, r1
ldr r0, [r10], #4 ;Load wR11
ldr r1, [r10], #4
tmcrr wR9, r2, r3
ldr r2, [r10], #4 ;Load wR12
ldr r3, [r10], #4
tmcrr wR10, r4, r5
ldr r4, [r10], #4 ;Load wR13
ldr r5, [r10], #4
tmcrr wR11, r0, r1
ldr r0, [r10], #4 ;Load wR14
ldr r1, [r10], #4
tmcrr wR12, r2, r3
ldr r2, [r10], #4 ;Load wR15
ldr r3, [r10], #4
tmcrr wR13, r4, r5
ldr r4, [r10], #4 ;Load wC9
tmcrr wR14, r0, r1
ldr r5, [r10], #4 ;Load wC10
tmcrr wR15, r2, r3
ldr r0, [r10], #4 ;Load wC11
tmcr wC9, r4
tmcr wC10, r5
tmcr wC11, r0
; Now clear the control MUP & CUP bits (Control Update Bits)
; These are WRITE 1 TO CLEAR!
mov r1, #0x3 ;Set the 2 lowest bits == 1
tmcr wC1, r1 ;Now Clear the CUP & MUP bits
;CPWAIT r2
MRC P15, 0, r2, C2, C0, 0 ; arbitrary read of CP15
MOV r2, r2 ; wait for it (foward dependency)
SUB PC, PC, #4 ; branch to next instruction
ldmia sp!, {r0 - r11, r14} ;Now restore the regsiters we stacked
IF Interworking :LOR: Thumbing
bx lr
ELSE
mov pc, lr ; return
ENDIF ; IF Interworking :LOR: Thumbing
ENDFUNC
END
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -