📄 mathsm.h
字号:
;
; Hence the task is to maintain s(k) in the fewest number of cycles.
; By substitution:
;
; if b(k)=0 then s(k) = s(k+1) - 3*2^(k-1)*( q(k+1) + 2^(k-1) )
; if b(k)=1 then s(k) = s(k+1) + 3*2^(k-1)*( 3*q(k+1) + 5*2^(k-1) )
;
; NB q(k) is a multiple of 2^k
; s(k) is a multiple of 2^(2k-2)
;
; Let Q(k) = (q(k) + 2^(k-2)) ROR #k = [010..0b(11)b(10)..b(k)]
; Let S(k) = s(k) ROR #(2*k) = [xx0..0x...x]
;
; We need two constants in registers:
;
; c1 = NOT(1<<29)
; c2 = 3<<30
; Main step macro (to find b(k))
; On entry: Q1=Q(k+1) R=r(k+1) S=S(k+1) Q=scratch
; On exit: Q1=scratch R=r(k) S=S(k) Q=Q(k)
; We use the shorthand P=2^(k-1) and PP = P*P = 2^(2k-2)
MACRO
CBR_STEP $k, $Q, $Q1, $R, $S, $c1, $c2
; Q = r(k+1) - 2^k * s(k+1) = R - (S ROL (3*k+2))
SUBS $Q, $R, $S, ROR #(30-3*$k)
; now carry=b(k) so update remainder
MOVCS $R, $Q
; now set Q = Q(k)
; c2 = [ 1 1 0 ................. 0 ]
; Q1 LSL #1 = [ 1 0 0 ......... q(k+1)>>k ]
; carry = [ 0 0 0 .............. b(k) ]
; SUM = [ 0 1 0 ........... q(k)>>k ]
; = Q(k)
ADC $Q, $c2, $Q1, LSL#1
; now effectively add P*(q(k+1)+P)) to S
; Q1 = [ 0 1 0 ....................... q(k+1) >> (k+1) ]
; S ROL #2 = [ 0 0 0 .......................... s(k+1) >> 2k ]
; SUM = [ 0 1 0 ............. { s(k+1)+P*q(k+1) } >> 2k ]
ADD $S, $Q1, $S, ROR #30
; now if b(k)=1 effectively set Q1=-2*Q1 + magic constant
; c1 = [ 1 1 0 1 1 ................................ -1 ]
; Q1 LSL #1 = [ 1 0 0 0 0 .............. { 2q(k+1) } >> (k+1) ]
; EOR = [ 0 1 0 1 1 .......... { -2q(k+1)-4P } >> (k+1) ]
EORCS $Q1, $c1, $Q1, LSL #1
; now finally set S = S(k)
; if b(k)=0 then
; S = [ 0 1 0 0 0 ......... { s(k+1)+P*q(k+1) } >> 2k ]
; Q1 ROL#2 = [ 0 0 0 0 0 .......... { 4*q(k+1)+4P } >> (k+1) ]
; DIFF = [ 0 1 0 0 0 ... { s(k+1)-3*P*q(k+1)-4PP } >> 2k ]
; = { s(k+1)-3*P*q(k+1)-4PP + PP } ROR 2k
; = s(k) ROR 2k
; if b(k)=1 then
; S = [ 0 1 0 0 0 ......... { s(k+1)+P*q(k+1) } >> 2k ]
; Q1 ROL#2 = [ 0 1 1 1 1 ...... { -8q(k+1)-16P+4P } >> (k+1) ]
; DIFF = [ 1 1 0 0 0 .. { s(k+1)+9*P*q(k+1)+12PP } >> 2k ]
; = { s(k+1)+9*P*q(k+1) + 12PP + 3PP } ROR 2k
; = s(k) ROR 2k
SUB $S, $S, $Q1, ROR #30
; finished
MEND
; Integer cube root
;
; q = register to hold the result (may equal any of the other registers)
; r = register holding the input value
;
; q0, q1, s, c1, c2 = temporary registers, all distinct and
; distinct from r.
;
; The algorithm calculates the largest integer q such that
; q*q*q <= r. The remainder is not currently returned but could
; be extracted with the addition of some more instructions.
; q has at most 11 bits.
;
; cycle count = 2 + 5 + 9*6 + 3 = 64 cycles
MACRO
CBR_32_11 $q, $r, $q0, $q1, $s, $c1, $c2
; initialise constants
MVN $c1, #1<<29
MOV $c2, #3<<30
; first case (k=10, trying to set bit 10) optimised
SUBS $q0, $r, #1<<30 ; can we set bit 10?
MOVCS $r, $q0 ; if so update remainder
ADC $q0, $c2, #1<<31 ; insert answer bit
MOVCC $s, #(1<<30) ; S(10) if b(10)=0
MOVCS $s, #(3<<30)+4 ; S(10) if b(10)=1
; general cases
CBR_STEP 9, $q1, $q0, $r, $s, $c1, $c2 ; calc b(9)
CBR_STEP 8, $q0, $q1, $r, $s, $c1, $c2 ; calc b(8)
CBR_STEP 7, $q1, $q0, $r, $s, $c1, $c2 ; calc b(7)
CBR_STEP 6, $q0, $q1, $r, $s, $c1, $c2 ; calc b(6)
CBR_STEP 5, $q1, $q0, $r, $s, $c1, $c2 ; calc b(5)
CBR_STEP 4, $q0, $q1, $r, $s, $c1, $c2 ; calc b(4)
CBR_STEP 3, $q1, $q0, $r, $s, $c1, $c2 ; calc b(3)
CBR_STEP 2, $q0, $q1, $r, $s, $c1, $c2 ; calc b(2)
CBR_STEP 1, $q1, $q0, $r, $s, $c1, $c2 ; calc b(1)
; special case for final bit
CMP $r, $s, ROR #30 ; 0 or 1?
ADC $q0, $q1, $q1 ; insert answer bit
BIC $q, $q0, #3<<30 ; extract answer
MEND
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; ;
; FIXED POINT MACROS ;
; ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; The following macros deal with fixed point numbers with binary
; point before bit $bit (which is specified in the macro). In other
; words the 32 bit integer x is used to represent the number
; x*2^(-$bit).
; The overflow action for each operation should be specified.
; $topbit gives the number of bits the answer should fit into (with top
; sign bit if signed) without overflow. This will usually be 32.
;
; Ie, format of number:
; 32 $topbit $bit 0
; unsigned: 000..000 xxx...xxx yyy...yyy
; signed: sss..sss sxx...xxx yyy...yyy
;
; where x is the integer part, s the sign bit and y the fraction part
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; ;
; FIXED POINT DIVISION ;
; ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Unsigned fixed point division with 32 bit answer
;
; Calculates q=n/d where n and d are both integers
; (or both fixed point with the same number of bits after the binary point)
;
; The answer in q is given with fixed point at position $bit (a constant)
; The answer must fit into $topbit bits (usually 32)
; Branches to the label $overflow if there is a divide by 0 or
; an overflow in the division (answer goes outside the given number of bits)
;
; q, n, d must be distinct registers
; n and d are corrupted
MACRO
UDIVF_32d32_32 $q, $n, $d, $bit, $topbit, $overflow
IF "$overflow"<>""
CMP $d, $n, LSR#($topbit-$bit) ; check 2^(topbit-bit) > n/d
BLS $overflow ; if not then overflow/div 0
ENDIF
MOV $q, $n, LSL#$bit ; low of n<<bit
MOV $n, $n, LSR#(32-$bit) ; high of n<<bit
UDIV_64d32_32r32 $q, $n, $q, $n, $d ; do the divide (3 cycles/bit)
MEND
; Signed fixed point divide with 32 bit answer
; Same as unsigned but needs an extra register to store the sign
MACRO
SDIVF_32d32_32 $q, $n, $d, $bit, $topbit, $sign, $overflow
EORS $sign, $n, $d, ASR#32 ; get sign of quotient in b31
RSBCS $d, $d, #0 ; make denominator +ve
MOVS $n, $n
RSBMI $n, $n, #0 ; make numerator +ve
UDIVF_32d32_32 $q, $n, $d, $bit, $topbit-1, $overflow
TST $sign, #1<<31
RSBNE $q, $q, #0 ; get sign of quotient
MEND
;==========================================================================
; Fixed point Cosine macro.
;
; This takes the Cosine of the value passed (assumed to be in radians).
;
; If "$RANGE"=1 the parameter is first range reduced to -PI/2 < x < PI/2
; and then the Cosine is calculated, else:
;
; If "$RANGE"=0 then the parameter is assumed to already be range
; reduced (and will give dodgy result if it isn't in the range).
;
;
;==========================================================================
MACRO
ARMCOS $z, $x, $t0, $t1, $PREC, $RANGE
CMP $x, #0 ; if x < 0
RSBLT $x, $x, #0 ; x = -x
; First part involves reducing the argument to the range
; -PI/2 < f=f(x) < PI/2
; Only do this bit if $RANGE is set, otherwise assume is
; argument is range checked.
MOV $z, #(0x19<<$PREC) >> 3 ; Fabricate PI to 18 bits
ADD $z, $z, #(0x22<<$PREC) >> 11
ADD $x, $x, $z, ASR #1 ; n = x = (PI/2) + x
IF $RANGE>0
ADD $t1, $x, $x, ASR #1 ; n = n/PI
ADD $t1, $t1, $t1, ASR #15 ; (only for high precision)
ADD $t1, $t1, $t1, ASR #6
SUB $t1, $t1, $x, ASR #2
SUB $t0, $t1, $x, ASR #12
ADD $t0, $t0, #(1<<($PREC+1)) ; Round to nearest integer, and
MOV $t0, $t0, ASR #($PREC+2) ; get back to original precision.
ELSE
; No range checking - argument should be |x| < PI/2
MOV $t0, #0
ENDIF
TST $t0, #0x001 ; If n is odd then negate result
; (keep flags till later)
; Calculate the range reduced x
MUL $t1, $z, $t0 ; x = x - (n*PI)
SUB $x, $x, $t1
; ADD $x, $x, $z, ASR #1 ; x += PI/2 (for cosine)
; x is now in range -PI/2 < x < PI/2 so can now calculate sin(x)
MUL $t1, $x, $x ; g = (f/2)**2
MOV $t1, $t1, ASR #$PREC+2
; r = ((((R3*g) + R2)*g) + R1)*g
ADD $z, $t1, $t1, ASR #1 ; Assume R3*g = approx = g*(13/1024)
ADD $z, $z, $t1, ASR #3
MOV $z, $z, ASR #7
MOV $t0, #(0x88<<$PREC)>>8 ; Assume R2 = approx = 0x8888 >> 18
ORR $t0, $t0, $t0, LSR #8
IF $PREC > 17
ORR $t0, $t0, $t0, LSR #16 ; This op for > 18 bit precision only
ENDIF
RSB $z, $z, $t0, LSR #2
ORR $t0, $t0, $t0, LSR #2 ; Assume R3 = approx = 0xAAAA >> 16
MUL $z, $t1, $z ; r = r * g
RSB $z, $t0, $z, ASR #$PREC ; r = r - 0.6666662674
MUL $z, $t1, $z ; r = r * g
MOV $z, $z, ASR #$PREC
MUL $z, $x, $z ; r = x + r * x
ADD $z, $x, $z, ASR #$PREC
; If we have done the range checking then correct the answers sign
RSBNE $z, $z, #0
MEND
;===============================================================================
MACRO
ARMSIN $z, $x, $t0, $t1, $PREC, $RANGE
TST $x, $x, LSL #1 ; if (x<0) c=1 else c=0
; CMP $x, #0 ; if x < 0
; MOVLT $sgn, #1 ; sgn=-1
; RSBLT $x, $x, #0 ; x = -x
RSBCS $x, $x, #0 ; x = -x
; First part involves reducing the argument to the range
; -PI/2 < f=f(x) < PI/2
; Only do this bit if $RANGE is set, otherwise assume is
; argument is range checked.
MOV $z, #(0x19<<$PREC) >> 3 ; Fabricate PI to 18 bits
ADD $z, $z, #(0x22<<$PREC) >> 11
IF $RANGE>0
; ADD $t0, $x, $z, ASR #1 ; n = (PI/2) + x
; If RANGE=2 then will work for large n, else only work for small n
IF $RANGE=2
_UMULL $t0, $t1, $a1, $t0 ; n = n/PI
MOV $t0, $t0, LSR #$PREC ; Normalise and get 32 LSBits,
ORR $t0, $t0, $t1, LSL #(32-$PREC) ; assuming it doesn't overflow this
ELSE
ADD $t1, $x, $x, ASR #1 ; n = x/PI
ADD $t1, $t1, $t1, ASR #15 ; (only for high precision)
ADD $t1, $t1, $t1, ASR #6
SUB $t1, $t1, $x, ASR #2
SUB $t0, $t1, $x, ASR #12
; MOV $t0, $t0, ASR #2 ; Get back to original precision.
ENDIF
ADD $t0, $t0, #(1<<($PREC+1)) ; Round to nearest integer
MOV $t0, $t0, ASR #($PREC+2)
; TST $t0, #0x001 ; Is N odd (save in flags for later)
; Calculate the range reduced x
; MUL $t1, $z, $t0 ; x = x - (n*PI)
; SUB $x, $x, $t1, ASR #$PREC
ELSE
; No range checking - argument should be |x| < 2-sqrt(3)
MOV $t0, #0
ENDIF
; Want to perform: If (x<0) sgn=-1 else sgn=1
; If n is odd then sgn=-sgn
; Flags are already set to CS if (x<0), toggle C-flag if n is odd
TST $t0, #0x001 ; If n is odd then negate result
CMPNE $z, $z, RRX ; (toggle C-flag if odd)
MUL $t1, $z, $t0 ; x = x - (n*PI)
SUB $x, $x, $t1
; ADD $x, $x, $z, ASR #1 ; x += PI/2 (for cosine)
; x is now in range -PI/2 < x < PI/2 so can now calculate sin(x)
MUL $t1, $x, $x ; g = (f/2)**2
MOV $t1, $t1, ASR #$PREC+2
; r = ((((R3*g) + R2)*g) + R1)*g
ADD $z, $t1, $t1, ASR #1 ; Assume R3*g = approx = g*(13/1024)
ADD $z, $z, $t1, ASR #3
MOV $z, $z, ASR #7
MOV $t0, #(0x88<<$PREC)>>8 ; Assume R2 = approx = 0x8888 >> 18
ORR $t0, $t0, $t0, LSR #8
IF $PREC > 17
ORR $t0, $t0, $t0, LSR #16 ; This op for > 18 bit precision only
ENDIF
RSB $z, $z, $t0, LSR #2
ORR $t0, $t0, $t0, LSR #2 ; Assume R3 = approx = 0xAAAA >> 16
MUL $z, $t1, $z ; r = r * g
RSB $z, $t0, $z, ASR #$PREC ; r = r - 0.6666662674
MUL $z, $t1, $z ; r = r * g
MOV $z, $z, ASR #$PREC
MUL $z, $x, $z ; r = x + r * x
ADD $z, $x, $z, ASR #$PREC
; If we have done the range checking then correct the answers sign
RSBCS $z, $z, #0
MEND
;;;;;;;
; END ;
;;;;;;;
END
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -