📄 nios_math1.s
字号:
.include "nios.s"
.ifdef __nios32__
.if __nios_use_fast_mul__
.text
.globl __mulsi3
.globl __mulhi3
.type __mulsi3,@function
.type __mulhi3,@function
__mulsi3:
__mulhi3:
.MACRO SWAP_G0
.HALF 0x7d00
.ENDM
.MACRO SWAP_G1
.HALF 0x7d01
.ENDM
.MACRO SWAP_G2
.HALF 0x7d02
.ENDM
.MACRO SWAP_O1
.HALF 0x7d09
.ENDM
.if __nios_use_multiply__
#
# Multiplication with Nios
# MUL instruction
# (Not recognized by assembler, so
# it is here macro'd)
#
.macro _MUL rSrc
.HALF 0x7ea0+\rSrc
.endm
.macro MUL_O1
_MUL 9
.endm
.macro MUL_O0
_MUL 8
.endm
; Sources are on %o0 and %o1, we call X and Y
; Start multipling X-high and Y-low
MOV %g0,%o0
MUL_O1 ; %g0 = xLo . yLo
MOV %g1,%g0 ; %g1 = xLo . yLo
MOV %g0,%o0
SWAP_G0
MUL_O1 ; %g0 = xHi . yLo >>16
MOV %g2,%g0 ; %g2 = xHi . yLo >>16
MOV %g0,%o0
SWAP_O1
MUL_O1 ; %g0 = xLo . yHi >>16
ADD %g0,%g2 ; %o0 = (xLo . yHi + xHi . yLo) >>16
MOVHI %g0,0 ; throw away the bits above 32
SWAP_G0 ; %g0 = xLo . yHi + xHi . yLo
ADD %g0,%g1 ; %g0 = xLo . yHi + xHi . yLo + xLo . yLo
JMP %o7
MOV %o0,%g0 ; (delay slot) %o0 = xLo . yHi + xHi . yLo + xLo . yLo
.endif
.if (__nios_use_mstep__) && (!__nios_use_multiply__)
#
# Multiplication with Nios
# MSTEP instruction
# (Not recognized by assembler, so
# it is here macro'd)
#
.MACRO MSTEP_R1
.HALF 0x7e81
.ENDM
.MACRO MSTEP_R1_16x
;
; multiply high bits of %g0 with low bits of %g1
; assume low bits of %g0 and high bits of %g1 are already ZERO
;
MSTEP_R1
MSTEP_R1
MSTEP_R1
MSTEP_R1
MSTEP_R1
MSTEP_R1
MSTEP_R1
MSTEP_R1
MSTEP_R1
MSTEP_R1
MSTEP_R1
MSTEP_R1
MSTEP_R1
MSTEP_R1
MSTEP_R1
MSTEP_R1
.ENDM
; Sources are on %o0 and %o1, we call X and Y
; Start multipling X-high and Y-low
MOV %g0,%o0
PFX %hi(0xffff)
ANDN %g0,%lo(0xffff) ; clear low bits of X
MOV %g1,%o1
MOVHI %g1,0 ; clear high bits of Y
MSTEP_R1_16x
MOV %g2,%g0 ; g2 := (Xh * Yl) >> 16
; Multiply X-low and Y-high
MOV %g0,%o1
PFX %hi(0xffff)
ANDN %g0,%lo(0xffff) ; clear low bits of X
MOV %g1,%o0
MOVHI %g1,0 ; clear high bits of Y
MSTEP_R1_16x
ADD %g2,%g0 ; g2 := (Xh * Yl + Xl * Yh) >> 16
MOVHI %g2,0
SWAP_G2 ;%g2
; Multiply X-low and Y-low
MOV %g0,%o0
MOVHI %g0,0
SWAP_G0 ;%g0
MOV %g1,%o1
EXT16s %g1,0
MSTEP_R1_16x
ADD %g2,%g0
JMP %o7
MOV %o0,%g2
.endif
.if (!__nios_use_mstep__) && (!__nios_use_multiply__)
#
# Unrolled loop multiplication
# routine. For full 32 bit multiplications,
# this is faster than GNU's default one.
#
.MACRO ZSTEP bit
SKP0 %o0,\bit
ADD %g0,%o1
LSLI %o1,1
.ENDM
MOVI %g0,0
ZSTEP 0
ZSTEP 1
ZSTEP 2
ZSTEP 3
ZSTEP 4
ZSTEP 5
ZSTEP 6
ZSTEP 7
ZSTEP 8
ZSTEP 9
ZSTEP 10
ZSTEP 11
ZSTEP 12
ZSTEP 13
ZSTEP 14
ZSTEP 15
ZSTEP 16
ZSTEP 17
ZSTEP 18
ZSTEP 19
ZSTEP 20
ZSTEP 21
ZSTEP 22
ZSTEP 23
ZSTEP 24
ZSTEP 25
ZSTEP 26
ZSTEP 27
ZSTEP 28
ZSTEP 29
ZSTEP 30
ZSTEP 31
; No bit 31: we already set %o0 to positive
JMP %o7
MOV %o0,%g0
.endif
.Lfe1:
.size __mulsi3,.Lfe1-__mulsi3
.endif
.endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -