📄 mathsm.h
字号:
ADCS $n, $d, $n, LSL #1
SUBLO $n, $n, $d
ADCS $n, $d, $n, LSL #1
SUBLO $n, $n, $d ; fix up final remainder
ADC $q, $n, $n ; insert final answer bit
MOV $r, $q, LSR #16 ; extract reminder
BIC $q, $q, $r, LSL #16 ; extract quotient
MEND
; Signed 32/16 = 16 remainder 16
; as above but signed
; the sign temp register must be distinct from all the rest
MACRO
SDIV_32d16_16r16 $q, $r, $n, $d, $sign
DISTINCT $sign, $q
DISTINCT $sign, $r
DISTINCT $sign, $n
DISTINCT $sign, $d
ANDS $sign, $d, #1<<31 ; extract sign of denominator
RSBPL $d, $d, #0 ; make denominator -ve
EORS $sign, $sign, $n, ASR#32 ; add in sign of numerator
RSBCS $n, $n, #0 ; make numerator +ve
; sign bit 31 = sign of quotient
; sign bit 30 = sign of remainder
UDIV_32d16_16r16 $q, $r, $n, $d, -
MOVS $sign, $sign, LSL#1
RSBCS $q, $q, #0 ; fixup result signs
RSBMI $r, $r, #0
MEND
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; 32/32 DIVISION - 3 cycles per answer bit ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; unsigned 32/32 = 32 with remainder 32
;
; n=numerator d=denomenator
; q=quotient r=remainder
;
; ie n/d = q + r/d or n=q*d+r and 0<=r<d
; if d=0 then it returns q=0, r=n (so n=q*d+r !)
; registers must be distinct
; n and d are corrupted
;
; This algorithm is from the ARM C library and works the same way
; as standard long division. The divisor is shifted up as far as it will
; go staying less than the numerator and then shifted down again one at a
; time, subtracting off n.
MACRO
UDIV_32d32_32r32 $q,$r,$n,$d
MOV $q, #0 ; zero the quotient
MOV $r, $n ; set the remainder to the current value
MOVS $n, $d ; save the denominator
BEQ %FT08 ; divide by 0
00
CMP $d, $r, LSR #8
MOVLS $d, $d, LSL #8
BLO %BT00
CMP $d, $r, LSR #1
BHI %FT07
CMP $d, $r, LSR #2
BHI %FT06
CMP $d, $r, LSR #3
BHI %FT05
CMP $d, $r, LSR #4
BHI %FT04
CMP $d, $r, LSR #5
BHI %FT03
CMP $d, $r, LSR #6
BHI %FT02
CMP $d, $r, LSR #7
BHI %FT01
00
; not executed when falling through
MOVHI $d, $d, LSR #8
CMP $r, $d, LSL #7
ADC $q, $q, $q
SUBCS $r, $r, $d, LSL #7
CMP $r, $d, LSL #6
01
ADC $q, $q, $q
SUBCS $r, $r, $d, LSL #6
CMP $r, $d, LSL #5
02
ADC $q, $q, $q
SUBCS $r, $r, $d, LSL #5
CMP $r, $d, LSL #4
03
ADC $q, $q, $q
SUBCS $r, $r, $d, LSL #4
CMP $r, $d, LSL #3
04
ADC $q, $q, $q
SUBCS $r, $r, $d, LSL #3
CMP $r, $d, LSL #2
05
ADC $q, $q, $q
SUBCS $r, $r, $d, LSL #2
CMP $r, $d, LSL #1
06
ADC $q, $q, $q
SUBCS $r, $r, $d, LSL #1
07
CMP $r, $d
ADC $q, $q, $q
SUBCS $r, $r, $d
CMP $d, $n
BNE %BT00
08
MEND
; signed 32/32 with remainder 32
;
; n=numerator d=denomenator
; q=quotient r=remainder
; sign = an extra scratch register to store the signs in.
;
; ie n/d = q + r/d or n=q*d+r
; q is rounded towards zero and r has the same sign as n
; hence -3/2 = -1 remainder -1.
; 3/-2 = -1 remainder 1
; -3/-2 = 1 remainder -1.
; if d=0 then it returns q=0, r=n (so n=q*d+r !)
; registers must be distinct
MACRO
SDIV_32d32_32r32 $q, $r, $n, $d, $sign
ANDS $sign, $d, #1<<31 ; get sign of d
RSBMI $d, $d, #0 ; ensure d +ve
EORS $sign, $sign, $n, ASR#32 ; b31=result b30=sign of n
RSBCS $n, $n, #0 ; ensure n +ve
UDIV_32d32_32r32 $q, $r, $n, $d ; do the divide
MOVS $sign, $sign, LSL#1 ; get out sign bits
RSBCS $q, $q, #0 ; negate quotient
RSBMI $r, $r, #0 ; negate remainder
MEND
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; 64/32 DIVISION - 3 cycles per answer bit ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; unsigned 64/32 = 32 remainder 32
; On entry $nh = numerator high, $nl = numerator low, $d = denomenator
; On exit $q = quotient, $r = remainder
; Assumes that numerator<(denominator<<32)
; Top bit of $d must be 0 (ie denominator < 2^31)
; Flag indicates whether the divisor has already been negated or not
; nl can equal q
; nh can equal r
MACRO
UDIV_64d32_32r32 $q, $r, $nl, $nh, $d, $flag
IF "$flag"<>"-"
RSB $d, $d, #0 ; negate divisor
ENDIF
ADDS $q, $nl, $nl ; next bit of numerator/remainder in C
ADCS $r, $d, $nh, LSL #1 ; rem = 2*rem - divisor
RSBCC $r, $d, $r ; if it failed add divisor back on
ADCS $q, $q, $q ; insert answer bit and get numerator bit
k SETA 1
WHILE k<32
ADCS $r, $d, $r, LSL #1 ; rem = 2*rem - divisor
RSBCC $r, $d, $r ; if it failed add divisor back on
ADCS $q, $q, $q ; insert answer bit and get numerator bit
k SETA k+1
WEND
MEND
; signed 64/32 = 32 with remainder 32
; As above but signed and requires an extra register for the sign
MACRO
SDIV_64d32_32r32 $q, $r, $nl, $nh, $d, $sign
MOVS $sign, $d
RSBPL $d, $d, #0 ; make divisor -ve
MOV $sign, $sign, LSR #1 ; shift sign down one bit
EORS $sign, $sign, $nh, ASR #1 ; insert dividend sign and
; $sign bit 31 sign of dividend (= sign of remainder)
; bit 30 sign of dividend EOR sign of divisor (= sign of quotient)
BPL %FT01
RSBS $nl, $nl, #0 ; absolute value of dividend
RSC $nh, $nh, #0 ; absolute value of dividend
01 ; numerator now +ve
UDIV_64d32_32r32 $q, $r, $nl, $nh, $d, -
MOVS $sign, $sign, LSL #1
RSBMI $q, $q, #0 ; set sign of quotient
RSBCS $r, $r, #0 ; set sign of remainder
MEND
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; 64/64 DIVISION ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; unsigned 64/64 = 64 with remainder 64
;
; n=numerator d=denomenator (each split into high and low part)
; q=quotient r=remainder
;
; ie n/d = q + r/d or n=q*d+r and 0<=r<d
; if d=0 then it returns q=0, r=n (so n=q*d+r !)
; registers must be distinct
; n and d are corrupted
; t is a temporary register
;
; Routine is not unrolled since the speedup isn't great.
; Can unroll if you like.
MACRO
UDIV_64d64_64r64 $ql,$qh,$rl,$rh,$nl,$nh,$dl,$dh,$t
MOV $ql,#0 ; zero the quotient
MOV $qh,#0
MOV $rh,$nh ; set the remainder to the current value
MOV $rl,$nl
TEQ $dh,#0
TEQEQ $dl,#0
BEQ %F08 ; divide by 0
MOVS $t,#0 ; count number of shifts
; first loop gets $d as large as possible
00
ADDS $dl, $dl, $dl
ADCS $dh, $dh, $dh ; double d
BCS %F01 ; overflowed
CMP $dh, $rh
CMPEQ $dl, $rl
ADDLS $t, $t, #1 ; done an extra shift
BLS %B00
ADDS $t, $t, #0 ; clear carry
01 ; carry the overflow here
MOVS $dh, $dh, RRX ; colour
MOV $dl, $dl, RRX ; shift back down again
02 ; now main loop
SUBS $nl, $rl, $dl
SBCS $nh, $rh, $dh ; n = r - d and C set if r>=d
MOVCS $rh, $nh
MOVCS $rl, $nl ; r=r-d if this goes
ADCS $ql, $ql, $ql
ADC $qh, $qh, $qh ; shift next bit into the answer
MOVS $dh, $dh, LSR#1
MOV $dl, $dl, RRX ; shift down d
SUBS $t, $t, #1
BGE %B02 ; do next loop (t+1) loops
08
MEND
; signed 64/64 with remainder 64
;
; n=numerator d=denomenator (each has a high and low part)
; q=quotient r=remainder
; sign = an extra scratch register to store the signs in.
;
; ie n/d = q + r/d or n=q*d+r
; q is rounded towards zero and r has the same sign as n
; hence -3/2 = -1 remainder -1.
; 3/-2 = -1 remainder 1
; -3/-2 = 1 remainder -1.
; if d=0 then it returns q=0, r=n (so n=q*d+r !)
; registers must be distinct
MACRO
SDIV_64d64_64r64 $ql,$qh,$rl,$rh,$nl,$nh,$dl,$dh,$t,$sign
ANDS $sign, $dh, #1<<31 ; get sign of d
BPL %F00
RSBS $dl, $dl, #0 ; ensure d +ve
RSC $dh, $dh, #0
00
EORS $sign, $sign, $nh, ASR#32 ; b31=result b30=sign of n
BCC %F01
RSBS $nl, $nl, #0 ; ensure n +ve
RSC $nh, $nh, #0
01
UDIV_64d64_64r64 $ql,$qh,$rl,$rh,$nl,$nh,$dl,$dh,$t ; do the divide
MOVS $sign, $sign, LSL#1 ; get out sign bits
BCC %F02
RSBS $ql, $ql, #0
RSC $qh, $qh, #0
02
MOVS $sign, $sign, LSL#1
BCC %F03
RSBS $rl, $rl, #0 ; negate remainder
RSC $rh, $rh, #0
03
MEND
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; ;
; INTEGER SQUARE ROOT ;
; ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; take the integer square root of a 32 bit unsigned number
; produces a 16 bit answer (not rounded)
; On entry:
; n = input number (32 bit)
; t = temporary register
; On exit:
; q = square root
; r = remainder (n=q*q+r) (r<=2*q so may be 17 bits)
; Internally:
; r = current remainder ( = n-q*q )
; q = current root estimate. At the start of stage k this is of the form
; 01000...00000qqq..qqq
; where there are k q-bits, giving the top k bits of the 16 bit root
; t = 3<<30 the constant required to keep q in the indicated form
;
; Code size: 50 instructions (excluding return)
; Time: 50 cycles (excluding return)
;
; n may be any one of q,r or t but q,r,t must be distinct
MACRO
SQR_32_16r17 $q, $r, $n, $t
DISTINCT $q, $r, $t
SUBS $r, $n, #1<<30 ; take off first estimate ((1<<15)^2)
ADDCC $r, $r, #1<<30 ; add back on if it didn't go
MOV $t, #3<<30 ; initialise a constant in t
ADC $q, $t, #1<<31 ; peform k=0 stage - add next answer bit
; 1..14
k SETA 1
WHILE k<16
CMP $r, $q, ROR #(2*k) ; try setting bit (15-k) of answer
SUBCS $r, $r, $q, ROR #(2*k) ; update remainder if it goes
ADC $q, $t, $q, LSL#1 ; insert next bit of answer
k SETA (k+1)
WEND
BIC $q, $q, #3<<30 ; extract answer
MEND
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; ;
; INTEGER CUBE ROOT ;
; ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Very Fast integer cube root
; Author: Dominic Symes
;
; 64 cycles total (62 excluding constant setup)
; 6 cycles/bit of answer
;
; Details:
;
; Let n = original number to be cube rooted
; Let q(k) = Cube root of n with bits 0 to k-1 cleared (ie the estimate)
; Let r(k) = n - q(k)^3 (ie the remainder after working out bit k)
; Let b(k) = bit k of the answer (ie q(k) = [0..0b(11)b(10)..b(k)0..0])
; Let s(k) = 3*q(k)*q(k) + 3*q(k)*2^(k-1) + 2^(2k-2)
;
; Then it is easily shown that given q(k+1),r(k+1),s(k+1) then b(k)=1
; if and only if r(k+1) >= 2^k * s(k+1)
;
; if b(k)=0 then r(k) = r(k+1)
; if b(k)=1 then r(k) = r(k+1) - 2^k * s(k+1)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -